drivers: net: declance: fix comparing pointer to 0
[linux-2.6-microblaze.git] / net / unix / af_unix.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:        Implementation of BSD Unix domain sockets.
4  *
5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *              Linus Torvalds  :       Assorted bug cures.
9  *              Niibe Yutaka    :       async I/O support.
10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
11  *              Alan Cox        :       Limit size of allocated blocks.
12  *              Alan Cox        :       Fixed the stupid socketpair bug.
13  *              Alan Cox        :       BSD compatibility fine tuning.
14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
15  *              Alan Cox        :       Sorted out a proper draft version of
16  *                                      file descriptor passing hacked up from
17  *                                      Mike Shaver's work.
18  *              Marty Leisner   :       Fixes to fd passing
19  *              Nick Nevin      :       recvmsg bugfix.
20  *              Alan Cox        :       Started proper garbage collector
21  *              Heiko EiBfeldt  :       Missing verify_area check
22  *              Alan Cox        :       Started POSIXisms
23  *              Andreas Schwab  :       Replace inode by dentry for proper
24  *                                      reference counting
25  *              Kirk Petersen   :       Made this a module
26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
27  *                                      Lots of bug fixes.
28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
29  *                                      by above two patches.
30  *           Andrea Arcangeli   :       If possible we block in connect(2)
31  *                                      if the max backlog of the listen socket
32  *                                      is been reached. This won't break
33  *                                      old apps and it will avoid huge amount
34  *                                      of socks hashed (this for unix_gc()
35  *                                      performances reasons).
36  *                                      Security fix that limits the max
37  *                                      number of socks to 2*max_files and
38  *                                      the number of skb queueable in the
39  *                                      dgram receiver.
40  *              Artur Skawina   :       Hash function optimizations
41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
42  *            Malcolm Beattie   :       Set peercred for socketpair
43  *           Michal Ostrowski   :       Module initialization cleanup.
44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
45  *                                      the core infrastructure is doing that
46  *                                      for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *      [TO FIX]
51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
52  *              other the moment one end closes.
53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *      [NOT TO FIX]
56  *      accept() returns a path name even if the connecting socket has closed
57  *              in the meantime (BSD loses the path and gives up).
58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *      BSD af_unix apparently has connect forgetting to block properly.
62  *              (need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *      Bug fixes and improvements.
66  *              - client shutdown killed server socket.
67  *              - removed all useless cli/sti pairs.
68  *
69  *      Semantic changes/extensions.
70  *              - generic control message passing.
71  *              - SCM_CREDENTIALS control message.
72  *              - "Abstract" (not FS based) socket bindings.
73  *                Abstract names are sequences of bytes (not zero terminated)
74  *                started by 0, so that this name space does not intersect
75  *                with BSD names.
76  */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116
117 #include "scm.h"
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191
192 static inline int unix_recvq_full(struct sock const *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196
197 struct sock *unix_peer_get(struct sock *s)
198 {
199         struct sock *peer;
200
201         unix_state_lock(s);
202         peer = unix_peer(s);
203         if (peer)
204                 sock_hold(peer);
205         unix_state_unlock(s);
206         return peer;
207 }
208 EXPORT_SYMBOL_GPL(unix_peer_get);
209
210 static inline void unix_release_addr(struct unix_address *addr)
211 {
212         if (refcount_dec_and_test(&addr->refcnt))
213                 kfree(addr);
214 }
215
216 /*
217  *      Check unix socket name:
218  *              - should be not zero length.
219  *              - if started by not zero, should be NULL terminated (FS object)
220  *              - if started by zero, it is abstract name.
221  */
222
223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
224 {
225         *hashp = 0;
226
227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
228                 return -EINVAL;
229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230                 return -EINVAL;
231         if (sunaddr->sun_path[0]) {
232                 /*
233                  * This may look like an off by one error but it is a bit more
234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
235                  * sun_path[108] doesn't as such exist.  However in kernel space
236                  * we are guaranteed that it is a valid memory location in our
237                  * kernel address buffer.
238                  */
239                 ((char *)sunaddr)[len] = 0;
240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241                 return len;
242         }
243
244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245         return len;
246 }
247
248 static void __unix_remove_socket(struct sock *sk)
249 {
250         sk_del_node_init(sk);
251 }
252
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255         WARN_ON(!sk_unhashed(sk));
256         sk_add_node(sk, list);
257 }
258
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261         spin_lock(&unix_table_lock);
262         __unix_remove_socket(sk);
263         spin_unlock(&unix_table_lock);
264 }
265
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268         spin_lock(&unix_table_lock);
269         __unix_insert_socket(list, sk);
270         spin_unlock(&unix_table_lock);
271 }
272
273 static struct sock *__unix_find_socket_byname(struct net *net,
274                                               struct sockaddr_un *sunname,
275                                               int len, int type, unsigned int hash)
276 {
277         struct sock *s;
278
279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
280                 struct unix_sock *u = unix_sk(s);
281
282                 if (!net_eq(sock_net(s), net))
283                         continue;
284
285                 if (u->addr->len == len &&
286                     !memcmp(u->addr->name, sunname, len))
287                         return s;
288         }
289         return NULL;
290 }
291
292 static inline struct sock *unix_find_socket_byname(struct net *net,
293                                                    struct sockaddr_un *sunname,
294                                                    int len, int type,
295                                                    unsigned int hash)
296 {
297         struct sock *s;
298
299         spin_lock(&unix_table_lock);
300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
301         if (s)
302                 sock_hold(s);
303         spin_unlock(&unix_table_lock);
304         return s;
305 }
306
307 static struct sock *unix_find_socket_byinode(struct inode *i)
308 {
309         struct sock *s;
310
311         spin_lock(&unix_table_lock);
312         sk_for_each(s,
313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
314                 struct dentry *dentry = unix_sk(s)->path.dentry;
315
316                 if (dentry && d_backing_inode(dentry) == i) {
317                         sock_hold(s);
318                         goto found;
319                 }
320         }
321         s = NULL;
322 found:
323         spin_unlock(&unix_table_lock);
324         return s;
325 }
326
327 /* Support code for asymmetrically connected dgram sockets
328  *
329  * If a datagram socket is connected to a socket not itself connected
330  * to the first socket (eg, /dev/log), clients may only enqueue more
331  * messages if the present receive queue of the server socket is not
332  * "too large". This means there's a second writeability condition
333  * poll and sendmsg need to test. The dgram recv code will do a wake
334  * up on the peer_wait wait queue of a socket upon reception of a
335  * datagram which needs to be propagated to sleeping would-be writers
336  * since these might not have sent anything so far. This can't be
337  * accomplished via poll_wait because the lifetime of the server
338  * socket might be less than that of its clients if these break their
339  * association with it or if the server socket is closed while clients
340  * are still connected to it and there's no way to inform "a polling
341  * implementation" that it should let go of a certain wait queue
342  *
343  * In order to propagate a wake up, a wait_queue_entry_t of the client
344  * socket is enqueued on the peer_wait queue of the server socket
345  * whose wake function does a wake_up on the ordinary client socket
346  * wait queue. This connection is established whenever a write (or
347  * poll for write) hit the flow control condition and broken when the
348  * association to the server socket is dissolved or after a wake up
349  * was relayed.
350  */
351
352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
353                                       void *key)
354 {
355         struct unix_sock *u;
356         wait_queue_head_t *u_sleep;
357
358         u = container_of(q, struct unix_sock, peer_wake);
359
360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
361                             q);
362         u->peer_wake.private = NULL;
363
364         /* relaying can only happen while the wq still exists */
365         u_sleep = sk_sleep(&u->sk);
366         if (u_sleep)
367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
368
369         return 0;
370 }
371
372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
373 {
374         struct unix_sock *u, *u_other;
375         int rc;
376
377         u = unix_sk(sk);
378         u_other = unix_sk(other);
379         rc = 0;
380         spin_lock(&u_other->peer_wait.lock);
381
382         if (!u->peer_wake.private) {
383                 u->peer_wake.private = other;
384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
385
386                 rc = 1;
387         }
388
389         spin_unlock(&u_other->peer_wait.lock);
390         return rc;
391 }
392
393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
394                                             struct sock *other)
395 {
396         struct unix_sock *u, *u_other;
397
398         u = unix_sk(sk);
399         u_other = unix_sk(other);
400         spin_lock(&u_other->peer_wait.lock);
401
402         if (u->peer_wake.private == other) {
403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
404                 u->peer_wake.private = NULL;
405         }
406
407         spin_unlock(&u_other->peer_wait.lock);
408 }
409
410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
411                                                    struct sock *other)
412 {
413         unix_dgram_peer_wake_disconnect(sk, other);
414         wake_up_interruptible_poll(sk_sleep(sk),
415                                    EPOLLOUT |
416                                    EPOLLWRNORM |
417                                    EPOLLWRBAND);
418 }
419
420 /* preconditions:
421  *      - unix_peer(sk) == other
422  *      - association is stable
423  */
424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
425 {
426         int connected;
427
428         connected = unix_dgram_peer_wake_connect(sk, other);
429
430         /* If other is SOCK_DEAD, we want to make sure we signal
431          * POLLOUT, such that a subsequent write() can get a
432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
433          * to other and its full, we will hang waiting for POLLOUT.
434          */
435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
436                 return 1;
437
438         if (connected)
439                 unix_dgram_peer_wake_disconnect(sk, other);
440
441         return 0;
442 }
443
444 static int unix_writable(const struct sock *sk)
445 {
446         return sk->sk_state != TCP_LISTEN &&
447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
448 }
449
450 static void unix_write_space(struct sock *sk)
451 {
452         struct socket_wq *wq;
453
454         rcu_read_lock();
455         if (unix_writable(sk)) {
456                 wq = rcu_dereference(sk->sk_wq);
457                 if (skwq_has_sleeper(wq))
458                         wake_up_interruptible_sync_poll(&wq->wait,
459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
461         }
462         rcu_read_unlock();
463 }
464
465 /* When dgram socket disconnects (or changes its peer), we clear its receive
466  * queue of packets arrived from previous peer. First, it allows to do
467  * flow control based only on wmem_alloc; second, sk connected to peer
468  * may receive messages only from that peer. */
469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470 {
471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
472                 skb_queue_purge(&sk->sk_receive_queue);
473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474
475                 /* If one link of bidirectional dgram pipe is disconnected,
476                  * we signal error. Messages are lost. Do not make this,
477                  * when peer was not connected to us.
478                  */
479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
480                         other->sk_err = ECONNRESET;
481                         other->sk_error_report(other);
482                 }
483         }
484 }
485
486 static void unix_sock_destructor(struct sock *sk)
487 {
488         struct unix_sock *u = unix_sk(sk);
489
490         skb_queue_purge(&sk->sk_receive_queue);
491
492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
493         WARN_ON(!sk_unhashed(sk));
494         WARN_ON(sk->sk_socket);
495         if (!sock_flag(sk, SOCK_DEAD)) {
496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
497                 return;
498         }
499
500         if (u->addr)
501                 unix_release_addr(u->addr);
502
503         atomic_long_dec(&unix_nr_socks);
504         local_bh_disable();
505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
506         local_bh_enable();
507 #ifdef UNIX_REFCNT_DEBUG
508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
509                 atomic_long_read(&unix_nr_socks));
510 #endif
511 }
512
513 static void unix_release_sock(struct sock *sk, int embrion)
514 {
515         struct unix_sock *u = unix_sk(sk);
516         struct path path;
517         struct sock *skpair;
518         struct sk_buff *skb;
519         int state;
520
521         unix_remove_socket(sk);
522
523         /* Clear state */
524         unix_state_lock(sk);
525         sock_orphan(sk);
526         sk->sk_shutdown = SHUTDOWN_MASK;
527         path         = u->path;
528         u->path.dentry = NULL;
529         u->path.mnt = NULL;
530         state = sk->sk_state;
531         sk->sk_state = TCP_CLOSE;
532         unix_state_unlock(sk);
533
534         wake_up_interruptible_all(&u->peer_wait);
535
536         skpair = unix_peer(sk);
537
538         if (skpair != NULL) {
539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
540                         unix_state_lock(skpair);
541                         /* No more writes */
542                         skpair->sk_shutdown = SHUTDOWN_MASK;
543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
544                                 skpair->sk_err = ECONNRESET;
545                         unix_state_unlock(skpair);
546                         skpair->sk_state_change(skpair);
547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
548                 }
549
550                 unix_dgram_peer_wake_disconnect(sk, skpair);
551                 sock_put(skpair); /* It may now die */
552                 unix_peer(sk) = NULL;
553         }
554
555         /* Try to flush out this socket. Throw out buffers at least */
556
557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
558                 if (state == TCP_LISTEN)
559                         unix_release_sock(skb->sk, 1);
560                 /* passed fds are erased in the kfree_skb hook        */
561                 UNIXCB(skb).consumed = skb->len;
562                 kfree_skb(skb);
563         }
564
565         if (path.dentry)
566                 path_put(&path);
567
568         sock_put(sk);
569
570         /* ---- Socket is dead now and most probably destroyed ---- */
571
572         /*
573          * Fixme: BSD difference: In BSD all sockets connected to us get
574          *        ECONNRESET and we die on the spot. In Linux we behave
575          *        like files and pipes do and wait for the last
576          *        dereference.
577          *
578          * Can't we simply set sock->err?
579          *
580          *        What the above comment does talk about? --ANK(980817)
581          */
582
583         if (unix_tot_inflight)
584                 unix_gc();              /* Garbage collect fds */
585 }
586
587 static void init_peercred(struct sock *sk)
588 {
589         put_pid(sk->sk_peer_pid);
590         if (sk->sk_peer_cred)
591                 put_cred(sk->sk_peer_cred);
592         sk->sk_peer_pid  = get_pid(task_tgid(current));
593         sk->sk_peer_cred = get_current_cred();
594 }
595
596 static void copy_peercred(struct sock *sk, struct sock *peersk)
597 {
598         put_pid(sk->sk_peer_pid);
599         if (sk->sk_peer_cred)
600                 put_cred(sk->sk_peer_cred);
601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
603 }
604
605 static int unix_listen(struct socket *sock, int backlog)
606 {
607         int err;
608         struct sock *sk = sock->sk;
609         struct unix_sock *u = unix_sk(sk);
610         struct pid *old_pid = NULL;
611
612         err = -EOPNOTSUPP;
613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
614                 goto out;       /* Only stream/seqpacket sockets accept */
615         err = -EINVAL;
616         if (!u->addr)
617                 goto out;       /* No listens on an unbound socket */
618         unix_state_lock(sk);
619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620                 goto out_unlock;
621         if (backlog > sk->sk_max_ack_backlog)
622                 wake_up_interruptible_all(&u->peer_wait);
623         sk->sk_max_ack_backlog  = backlog;
624         sk->sk_state            = TCP_LISTEN;
625         /* set credentials so connect can copy them */
626         init_peercred(sk);
627         err = 0;
628
629 out_unlock:
630         unix_state_unlock(sk);
631         put_pid(old_pid);
632 out:
633         return err;
634 }
635
636 static int unix_release(struct socket *);
637 static int unix_bind(struct socket *, struct sockaddr *, int);
638 static int unix_stream_connect(struct socket *, struct sockaddr *,
639                                int addr_len, int flags);
640 static int unix_socketpair(struct socket *, struct socket *);
641 static int unix_accept(struct socket *, struct socket *, int, bool);
642 static int unix_getname(struct socket *, struct sockaddr *, int);
643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
645                                     poll_table *);
646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
647 #ifdef CONFIG_COMPAT
648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
649 #endif
650 static int unix_shutdown(struct socket *, int);
651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
654                                     size_t size, int flags);
655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
656                                        struct pipe_inode_info *, size_t size,
657                                        unsigned int flags);
658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
661                               int, int);
662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
664                                   int);
665
666 static int unix_set_peek_off(struct sock *sk, int val)
667 {
668         struct unix_sock *u = unix_sk(sk);
669
670         if (mutex_lock_interruptible(&u->iolock))
671                 return -EINTR;
672
673         sk->sk_peek_off = val;
674         mutex_unlock(&u->iolock);
675
676         return 0;
677 }
678
679 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
680 {
681         struct sock *sk = sock->sk;
682         struct unix_sock *u;
683
684         if (sk) {
685                 u = unix_sk(sock->sk);
686                 seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
687         }
688 }
689
690 static const struct proto_ops unix_stream_ops = {
691         .family =       PF_UNIX,
692         .owner =        THIS_MODULE,
693         .release =      unix_release,
694         .bind =         unix_bind,
695         .connect =      unix_stream_connect,
696         .socketpair =   unix_socketpair,
697         .accept =       unix_accept,
698         .getname =      unix_getname,
699         .poll =         unix_poll,
700         .ioctl =        unix_ioctl,
701 #ifdef CONFIG_COMPAT
702         .compat_ioctl = unix_compat_ioctl,
703 #endif
704         .listen =       unix_listen,
705         .shutdown =     unix_shutdown,
706         .setsockopt =   sock_no_setsockopt,
707         .getsockopt =   sock_no_getsockopt,
708         .sendmsg =      unix_stream_sendmsg,
709         .recvmsg =      unix_stream_recvmsg,
710         .mmap =         sock_no_mmap,
711         .sendpage =     unix_stream_sendpage,
712         .splice_read =  unix_stream_splice_read,
713         .set_peek_off = unix_set_peek_off,
714         .show_fdinfo =  unix_show_fdinfo,
715 };
716
717 static const struct proto_ops unix_dgram_ops = {
718         .family =       PF_UNIX,
719         .owner =        THIS_MODULE,
720         .release =      unix_release,
721         .bind =         unix_bind,
722         .connect =      unix_dgram_connect,
723         .socketpair =   unix_socketpair,
724         .accept =       sock_no_accept,
725         .getname =      unix_getname,
726         .poll =         unix_dgram_poll,
727         .ioctl =        unix_ioctl,
728 #ifdef CONFIG_COMPAT
729         .compat_ioctl = unix_compat_ioctl,
730 #endif
731         .listen =       sock_no_listen,
732         .shutdown =     unix_shutdown,
733         .setsockopt =   sock_no_setsockopt,
734         .getsockopt =   sock_no_getsockopt,
735         .sendmsg =      unix_dgram_sendmsg,
736         .recvmsg =      unix_dgram_recvmsg,
737         .mmap =         sock_no_mmap,
738         .sendpage =     sock_no_sendpage,
739         .set_peek_off = unix_set_peek_off,
740         .show_fdinfo =  unix_show_fdinfo,
741 };
742
743 static const struct proto_ops unix_seqpacket_ops = {
744         .family =       PF_UNIX,
745         .owner =        THIS_MODULE,
746         .release =      unix_release,
747         .bind =         unix_bind,
748         .connect =      unix_stream_connect,
749         .socketpair =   unix_socketpair,
750         .accept =       unix_accept,
751         .getname =      unix_getname,
752         .poll =         unix_dgram_poll,
753         .ioctl =        unix_ioctl,
754 #ifdef CONFIG_COMPAT
755         .compat_ioctl = unix_compat_ioctl,
756 #endif
757         .listen =       unix_listen,
758         .shutdown =     unix_shutdown,
759         .setsockopt =   sock_no_setsockopt,
760         .getsockopt =   sock_no_getsockopt,
761         .sendmsg =      unix_seqpacket_sendmsg,
762         .recvmsg =      unix_seqpacket_recvmsg,
763         .mmap =         sock_no_mmap,
764         .sendpage =     sock_no_sendpage,
765         .set_peek_off = unix_set_peek_off,
766         .show_fdinfo =  unix_show_fdinfo,
767 };
768
769 static struct proto unix_proto = {
770         .name                   = "UNIX",
771         .owner                  = THIS_MODULE,
772         .obj_size               = sizeof(struct unix_sock),
773 };
774
775 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
776 {
777         struct sock *sk = NULL;
778         struct unix_sock *u;
779
780         atomic_long_inc(&unix_nr_socks);
781         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
782                 goto out;
783
784         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
785         if (!sk)
786                 goto out;
787
788         sock_init_data(sock, sk);
789
790         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
791         sk->sk_write_space      = unix_write_space;
792         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
793         sk->sk_destruct         = unix_sock_destructor;
794         u         = unix_sk(sk);
795         u->path.dentry = NULL;
796         u->path.mnt = NULL;
797         spin_lock_init(&u->lock);
798         atomic_long_set(&u->inflight, 0);
799         INIT_LIST_HEAD(&u->link);
800         mutex_init(&u->iolock); /* single task reading lock */
801         mutex_init(&u->bindlock); /* single task binding lock */
802         init_waitqueue_head(&u->peer_wait);
803         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
804         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
805         unix_insert_socket(unix_sockets_unbound(sk), sk);
806 out:
807         if (sk == NULL)
808                 atomic_long_dec(&unix_nr_socks);
809         else {
810                 local_bh_disable();
811                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
812                 local_bh_enable();
813         }
814         return sk;
815 }
816
817 static int unix_create(struct net *net, struct socket *sock, int protocol,
818                        int kern)
819 {
820         if (protocol && protocol != PF_UNIX)
821                 return -EPROTONOSUPPORT;
822
823         sock->state = SS_UNCONNECTED;
824
825         switch (sock->type) {
826         case SOCK_STREAM:
827                 sock->ops = &unix_stream_ops;
828                 break;
829                 /*
830                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
831                  *      nothing uses it.
832                  */
833         case SOCK_RAW:
834                 sock->type = SOCK_DGRAM;
835                 /* fall through */
836         case SOCK_DGRAM:
837                 sock->ops = &unix_dgram_ops;
838                 break;
839         case SOCK_SEQPACKET:
840                 sock->ops = &unix_seqpacket_ops;
841                 break;
842         default:
843                 return -ESOCKTNOSUPPORT;
844         }
845
846         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
847 }
848
849 static int unix_release(struct socket *sock)
850 {
851         struct sock *sk = sock->sk;
852
853         if (!sk)
854                 return 0;
855
856         unix_release_sock(sk, 0);
857         sock->sk = NULL;
858
859         return 0;
860 }
861
862 static int unix_autobind(struct socket *sock)
863 {
864         struct sock *sk = sock->sk;
865         struct net *net = sock_net(sk);
866         struct unix_sock *u = unix_sk(sk);
867         static u32 ordernum = 1;
868         struct unix_address *addr;
869         int err;
870         unsigned int retries = 0;
871
872         err = mutex_lock_interruptible(&u->bindlock);
873         if (err)
874                 return err;
875
876         err = 0;
877         if (u->addr)
878                 goto out;
879
880         err = -ENOMEM;
881         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
882         if (!addr)
883                 goto out;
884
885         addr->name->sun_family = AF_UNIX;
886         refcount_set(&addr->refcnt, 1);
887
888 retry:
889         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
890         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
891
892         spin_lock(&unix_table_lock);
893         ordernum = (ordernum+1)&0xFFFFF;
894
895         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
896                                       addr->hash)) {
897                 spin_unlock(&unix_table_lock);
898                 /*
899                  * __unix_find_socket_byname() may take long time if many names
900                  * are already in use.
901                  */
902                 cond_resched();
903                 /* Give up if all names seems to be in use. */
904                 if (retries++ == 0xFFFFF) {
905                         err = -ENOSPC;
906                         kfree(addr);
907                         goto out;
908                 }
909                 goto retry;
910         }
911         addr->hash ^= sk->sk_type;
912
913         __unix_remove_socket(sk);
914         smp_store_release(&u->addr, addr);
915         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
916         spin_unlock(&unix_table_lock);
917         err = 0;
918
919 out:    mutex_unlock(&u->bindlock);
920         return err;
921 }
922
923 static struct sock *unix_find_other(struct net *net,
924                                     struct sockaddr_un *sunname, int len,
925                                     int type, unsigned int hash, int *error)
926 {
927         struct sock *u;
928         struct path path;
929         int err = 0;
930
931         if (sunname->sun_path[0]) {
932                 struct inode *inode;
933                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
934                 if (err)
935                         goto fail;
936                 inode = d_backing_inode(path.dentry);
937                 err = inode_permission(inode, MAY_WRITE);
938                 if (err)
939                         goto put_fail;
940
941                 err = -ECONNREFUSED;
942                 if (!S_ISSOCK(inode->i_mode))
943                         goto put_fail;
944                 u = unix_find_socket_byinode(inode);
945                 if (!u)
946                         goto put_fail;
947
948                 if (u->sk_type == type)
949                         touch_atime(&path);
950
951                 path_put(&path);
952
953                 err = -EPROTOTYPE;
954                 if (u->sk_type != type) {
955                         sock_put(u);
956                         goto fail;
957                 }
958         } else {
959                 err = -ECONNREFUSED;
960                 u = unix_find_socket_byname(net, sunname, len, type, hash);
961                 if (u) {
962                         struct dentry *dentry;
963                         dentry = unix_sk(u)->path.dentry;
964                         if (dentry)
965                                 touch_atime(&unix_sk(u)->path);
966                 } else
967                         goto fail;
968         }
969         return u;
970
971 put_fail:
972         path_put(&path);
973 fail:
974         *error = err;
975         return NULL;
976 }
977
978 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
979 {
980         struct dentry *dentry;
981         struct path path;
982         int err = 0;
983         /*
984          * Get the parent directory, calculate the hash for last
985          * component.
986          */
987         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
988         err = PTR_ERR(dentry);
989         if (IS_ERR(dentry))
990                 return err;
991
992         /*
993          * All right, let's create it.
994          */
995         err = security_path_mknod(&path, dentry, mode, 0);
996         if (!err) {
997                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
998                 if (!err) {
999                         res->mnt = mntget(path.mnt);
1000                         res->dentry = dget(dentry);
1001                 }
1002         }
1003         done_path_create(&path, dentry);
1004         return err;
1005 }
1006
1007 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1008 {
1009         struct sock *sk = sock->sk;
1010         struct net *net = sock_net(sk);
1011         struct unix_sock *u = unix_sk(sk);
1012         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1013         char *sun_path = sunaddr->sun_path;
1014         int err;
1015         unsigned int hash;
1016         struct unix_address *addr;
1017         struct hlist_head *list;
1018         struct path path = { };
1019
1020         err = -EINVAL;
1021         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1022             sunaddr->sun_family != AF_UNIX)
1023                 goto out;
1024
1025         if (addr_len == sizeof(short)) {
1026                 err = unix_autobind(sock);
1027                 goto out;
1028         }
1029
1030         err = unix_mkname(sunaddr, addr_len, &hash);
1031         if (err < 0)
1032                 goto out;
1033         addr_len = err;
1034
1035         if (sun_path[0]) {
1036                 umode_t mode = S_IFSOCK |
1037                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1038                 err = unix_mknod(sun_path, mode, &path);
1039                 if (err) {
1040                         if (err == -EEXIST)
1041                                 err = -EADDRINUSE;
1042                         goto out;
1043                 }
1044         }
1045
1046         err = mutex_lock_interruptible(&u->bindlock);
1047         if (err)
1048                 goto out_put;
1049
1050         err = -EINVAL;
1051         if (u->addr)
1052                 goto out_up;
1053
1054         err = -ENOMEM;
1055         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1056         if (!addr)
1057                 goto out_up;
1058
1059         memcpy(addr->name, sunaddr, addr_len);
1060         addr->len = addr_len;
1061         addr->hash = hash ^ sk->sk_type;
1062         refcount_set(&addr->refcnt, 1);
1063
1064         if (sun_path[0]) {
1065                 addr->hash = UNIX_HASH_SIZE;
1066                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1067                 spin_lock(&unix_table_lock);
1068                 u->path = path;
1069                 list = &unix_socket_table[hash];
1070         } else {
1071                 spin_lock(&unix_table_lock);
1072                 err = -EADDRINUSE;
1073                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1074                                               sk->sk_type, hash)) {
1075                         unix_release_addr(addr);
1076                         goto out_unlock;
1077                 }
1078
1079                 list = &unix_socket_table[addr->hash];
1080         }
1081
1082         err = 0;
1083         __unix_remove_socket(sk);
1084         smp_store_release(&u->addr, addr);
1085         __unix_insert_socket(list, sk);
1086
1087 out_unlock:
1088         spin_unlock(&unix_table_lock);
1089 out_up:
1090         mutex_unlock(&u->bindlock);
1091 out_put:
1092         if (err)
1093                 path_put(&path);
1094 out:
1095         return err;
1096 }
1097
1098 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1099 {
1100         if (unlikely(sk1 == sk2) || !sk2) {
1101                 unix_state_lock(sk1);
1102                 return;
1103         }
1104         if (sk1 < sk2) {
1105                 unix_state_lock(sk1);
1106                 unix_state_lock_nested(sk2);
1107         } else {
1108                 unix_state_lock(sk2);
1109                 unix_state_lock_nested(sk1);
1110         }
1111 }
1112
1113 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1114 {
1115         if (unlikely(sk1 == sk2) || !sk2) {
1116                 unix_state_unlock(sk1);
1117                 return;
1118         }
1119         unix_state_unlock(sk1);
1120         unix_state_unlock(sk2);
1121 }
1122
1123 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1124                               int alen, int flags)
1125 {
1126         struct sock *sk = sock->sk;
1127         struct net *net = sock_net(sk);
1128         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1129         struct sock *other;
1130         unsigned int hash;
1131         int err;
1132
1133         err = -EINVAL;
1134         if (alen < offsetofend(struct sockaddr, sa_family))
1135                 goto out;
1136
1137         if (addr->sa_family != AF_UNSPEC) {
1138                 err = unix_mkname(sunaddr, alen, &hash);
1139                 if (err < 0)
1140                         goto out;
1141                 alen = err;
1142
1143                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1144                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1145                         goto out;
1146
1147 restart:
1148                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1149                 if (!other)
1150                         goto out;
1151
1152                 unix_state_double_lock(sk, other);
1153
1154                 /* Apparently VFS overslept socket death. Retry. */
1155                 if (sock_flag(other, SOCK_DEAD)) {
1156                         unix_state_double_unlock(sk, other);
1157                         sock_put(other);
1158                         goto restart;
1159                 }
1160
1161                 err = -EPERM;
1162                 if (!unix_may_send(sk, other))
1163                         goto out_unlock;
1164
1165                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1166                 if (err)
1167                         goto out_unlock;
1168
1169         } else {
1170                 /*
1171                  *      1003.1g breaking connected state with AF_UNSPEC
1172                  */
1173                 other = NULL;
1174                 unix_state_double_lock(sk, other);
1175         }
1176
1177         /*
1178          * If it was connected, reconnect.
1179          */
1180         if (unix_peer(sk)) {
1181                 struct sock *old_peer = unix_peer(sk);
1182                 unix_peer(sk) = other;
1183                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1184
1185                 unix_state_double_unlock(sk, other);
1186
1187                 if (other != old_peer)
1188                         unix_dgram_disconnected(sk, old_peer);
1189                 sock_put(old_peer);
1190         } else {
1191                 unix_peer(sk) = other;
1192                 unix_state_double_unlock(sk, other);
1193         }
1194         return 0;
1195
1196 out_unlock:
1197         unix_state_double_unlock(sk, other);
1198         sock_put(other);
1199 out:
1200         return err;
1201 }
1202
1203 static long unix_wait_for_peer(struct sock *other, long timeo)
1204 {
1205         struct unix_sock *u = unix_sk(other);
1206         int sched;
1207         DEFINE_WAIT(wait);
1208
1209         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1210
1211         sched = !sock_flag(other, SOCK_DEAD) &&
1212                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1213                 unix_recvq_full(other);
1214
1215         unix_state_unlock(other);
1216
1217         if (sched)
1218                 timeo = schedule_timeout(timeo);
1219
1220         finish_wait(&u->peer_wait, &wait);
1221         return timeo;
1222 }
1223
1224 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1225                                int addr_len, int flags)
1226 {
1227         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1228         struct sock *sk = sock->sk;
1229         struct net *net = sock_net(sk);
1230         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1231         struct sock *newsk = NULL;
1232         struct sock *other = NULL;
1233         struct sk_buff *skb = NULL;
1234         unsigned int hash;
1235         int st;
1236         int err;
1237         long timeo;
1238
1239         err = unix_mkname(sunaddr, addr_len, &hash);
1240         if (err < 0)
1241                 goto out;
1242         addr_len = err;
1243
1244         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1245             (err = unix_autobind(sock)) != 0)
1246                 goto out;
1247
1248         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1249
1250         /* First of all allocate resources.
1251            If we will make it after state is locked,
1252            we will have to recheck all again in any case.
1253          */
1254
1255         err = -ENOMEM;
1256
1257         /* create new sock for complete connection */
1258         newsk = unix_create1(sock_net(sk), NULL, 0);
1259         if (newsk == NULL)
1260                 goto out;
1261
1262         /* Allocate skb for sending to listening sock */
1263         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1264         if (skb == NULL)
1265                 goto out;
1266
1267 restart:
1268         /*  Find listening sock. */
1269         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1270         if (!other)
1271                 goto out;
1272
1273         /* Latch state of peer */
1274         unix_state_lock(other);
1275
1276         /* Apparently VFS overslept socket death. Retry. */
1277         if (sock_flag(other, SOCK_DEAD)) {
1278                 unix_state_unlock(other);
1279                 sock_put(other);
1280                 goto restart;
1281         }
1282
1283         err = -ECONNREFUSED;
1284         if (other->sk_state != TCP_LISTEN)
1285                 goto out_unlock;
1286         if (other->sk_shutdown & RCV_SHUTDOWN)
1287                 goto out_unlock;
1288
1289         if (unix_recvq_full(other)) {
1290                 err = -EAGAIN;
1291                 if (!timeo)
1292                         goto out_unlock;
1293
1294                 timeo = unix_wait_for_peer(other, timeo);
1295
1296                 err = sock_intr_errno(timeo);
1297                 if (signal_pending(current))
1298                         goto out;
1299                 sock_put(other);
1300                 goto restart;
1301         }
1302
1303         /* Latch our state.
1304
1305            It is tricky place. We need to grab our state lock and cannot
1306            drop lock on peer. It is dangerous because deadlock is
1307            possible. Connect to self case and simultaneous
1308            attempt to connect are eliminated by checking socket
1309            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1310            check this before attempt to grab lock.
1311
1312            Well, and we have to recheck the state after socket locked.
1313          */
1314         st = sk->sk_state;
1315
1316         switch (st) {
1317         case TCP_CLOSE:
1318                 /* This is ok... continue with connect */
1319                 break;
1320         case TCP_ESTABLISHED:
1321                 /* Socket is already connected */
1322                 err = -EISCONN;
1323                 goto out_unlock;
1324         default:
1325                 err = -EINVAL;
1326                 goto out_unlock;
1327         }
1328
1329         unix_state_lock_nested(sk);
1330
1331         if (sk->sk_state != st) {
1332                 unix_state_unlock(sk);
1333                 unix_state_unlock(other);
1334                 sock_put(other);
1335                 goto restart;
1336         }
1337
1338         err = security_unix_stream_connect(sk, other, newsk);
1339         if (err) {
1340                 unix_state_unlock(sk);
1341                 goto out_unlock;
1342         }
1343
1344         /* The way is open! Fastly set all the necessary fields... */
1345
1346         sock_hold(sk);
1347         unix_peer(newsk)        = sk;
1348         newsk->sk_state         = TCP_ESTABLISHED;
1349         newsk->sk_type          = sk->sk_type;
1350         init_peercred(newsk);
1351         newu = unix_sk(newsk);
1352         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1353         otheru = unix_sk(other);
1354
1355         /* copy address information from listening to new sock
1356          *
1357          * The contents of *(otheru->addr) and otheru->path
1358          * are seen fully set up here, since we have found
1359          * otheru in hash under unix_table_lock.  Insertion
1360          * into the hash chain we'd found it in had been done
1361          * in an earlier critical area protected by unix_table_lock,
1362          * the same one where we'd set *(otheru->addr) contents,
1363          * as well as otheru->path and otheru->addr itself.
1364          *
1365          * Using smp_store_release() here to set newu->addr
1366          * is enough to make those stores, as well as stores
1367          * to newu->path visible to anyone who gets newu->addr
1368          * by smp_load_acquire().  IOW, the same warranties
1369          * as for unix_sock instances bound in unix_bind() or
1370          * in unix_autobind().
1371          */
1372         if (otheru->path.dentry) {
1373                 path_get(&otheru->path);
1374                 newu->path = otheru->path;
1375         }
1376         refcount_inc(&otheru->addr->refcnt);
1377         smp_store_release(&newu->addr, otheru->addr);
1378
1379         /* Set credentials */
1380         copy_peercred(sk, other);
1381
1382         sock->state     = SS_CONNECTED;
1383         sk->sk_state    = TCP_ESTABLISHED;
1384         sock_hold(newsk);
1385
1386         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1387         unix_peer(sk)   = newsk;
1388
1389         unix_state_unlock(sk);
1390
1391         /* take ten and and send info to listening sock */
1392         spin_lock(&other->sk_receive_queue.lock);
1393         __skb_queue_tail(&other->sk_receive_queue, skb);
1394         spin_unlock(&other->sk_receive_queue.lock);
1395         unix_state_unlock(other);
1396         other->sk_data_ready(other);
1397         sock_put(other);
1398         return 0;
1399
1400 out_unlock:
1401         if (other)
1402                 unix_state_unlock(other);
1403
1404 out:
1405         kfree_skb(skb);
1406         if (newsk)
1407                 unix_release_sock(newsk, 0);
1408         if (other)
1409                 sock_put(other);
1410         return err;
1411 }
1412
1413 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1414 {
1415         struct sock *ska = socka->sk, *skb = sockb->sk;
1416
1417         /* Join our sockets back to back */
1418         sock_hold(ska);
1419         sock_hold(skb);
1420         unix_peer(ska) = skb;
1421         unix_peer(skb) = ska;
1422         init_peercred(ska);
1423         init_peercred(skb);
1424
1425         if (ska->sk_type != SOCK_DGRAM) {
1426                 ska->sk_state = TCP_ESTABLISHED;
1427                 skb->sk_state = TCP_ESTABLISHED;
1428                 socka->state  = SS_CONNECTED;
1429                 sockb->state  = SS_CONNECTED;
1430         }
1431         return 0;
1432 }
1433
1434 static void unix_sock_inherit_flags(const struct socket *old,
1435                                     struct socket *new)
1436 {
1437         if (test_bit(SOCK_PASSCRED, &old->flags))
1438                 set_bit(SOCK_PASSCRED, &new->flags);
1439         if (test_bit(SOCK_PASSSEC, &old->flags))
1440                 set_bit(SOCK_PASSSEC, &new->flags);
1441 }
1442
1443 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1444                        bool kern)
1445 {
1446         struct sock *sk = sock->sk;
1447         struct sock *tsk;
1448         struct sk_buff *skb;
1449         int err;
1450
1451         err = -EOPNOTSUPP;
1452         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1453                 goto out;
1454
1455         err = -EINVAL;
1456         if (sk->sk_state != TCP_LISTEN)
1457                 goto out;
1458
1459         /* If socket state is TCP_LISTEN it cannot change (for now...),
1460          * so that no locks are necessary.
1461          */
1462
1463         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1464         if (!skb) {
1465                 /* This means receive shutdown. */
1466                 if (err == 0)
1467                         err = -EINVAL;
1468                 goto out;
1469         }
1470
1471         tsk = skb->sk;
1472         skb_free_datagram(sk, skb);
1473         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1474
1475         /* attach accepted sock to socket */
1476         unix_state_lock(tsk);
1477         newsock->state = SS_CONNECTED;
1478         unix_sock_inherit_flags(sock, newsock);
1479         sock_graft(tsk, newsock);
1480         unix_state_unlock(tsk);
1481         return 0;
1482
1483 out:
1484         return err;
1485 }
1486
1487
1488 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1489 {
1490         struct sock *sk = sock->sk;
1491         struct unix_address *addr;
1492         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1493         int err = 0;
1494
1495         if (peer) {
1496                 sk = unix_peer_get(sk);
1497
1498                 err = -ENOTCONN;
1499                 if (!sk)
1500                         goto out;
1501                 err = 0;
1502         } else {
1503                 sock_hold(sk);
1504         }
1505
1506         addr = smp_load_acquire(&unix_sk(sk)->addr);
1507         if (!addr) {
1508                 sunaddr->sun_family = AF_UNIX;
1509                 sunaddr->sun_path[0] = 0;
1510                 err = sizeof(short);
1511         } else {
1512                 err = addr->len;
1513                 memcpy(sunaddr, addr->name, addr->len);
1514         }
1515         sock_put(sk);
1516 out:
1517         return err;
1518 }
1519
1520 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1521 {
1522         int err = 0;
1523
1524         UNIXCB(skb).pid  = get_pid(scm->pid);
1525         UNIXCB(skb).uid = scm->creds.uid;
1526         UNIXCB(skb).gid = scm->creds.gid;
1527         UNIXCB(skb).fp = NULL;
1528         unix_get_secdata(scm, skb);
1529         if (scm->fp && send_fds)
1530                 err = unix_attach_fds(scm, skb);
1531
1532         skb->destructor = unix_destruct_scm;
1533         return err;
1534 }
1535
1536 static bool unix_passcred_enabled(const struct socket *sock,
1537                                   const struct sock *other)
1538 {
1539         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1540                !other->sk_socket ||
1541                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1542 }
1543
1544 /*
1545  * Some apps rely on write() giving SCM_CREDENTIALS
1546  * We include credentials if source or destination socket
1547  * asserted SOCK_PASSCRED.
1548  */
1549 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1550                             const struct sock *other)
1551 {
1552         if (UNIXCB(skb).pid)
1553                 return;
1554         if (unix_passcred_enabled(sock, other)) {
1555                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1556                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1557         }
1558 }
1559
1560 static int maybe_init_creds(struct scm_cookie *scm,
1561                             struct socket *socket,
1562                             const struct sock *other)
1563 {
1564         int err;
1565         struct msghdr msg = { .msg_controllen = 0 };
1566
1567         err = scm_send(socket, &msg, scm, false);
1568         if (err)
1569                 return err;
1570
1571         if (unix_passcred_enabled(socket, other)) {
1572                 scm->pid = get_pid(task_tgid(current));
1573                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1574         }
1575         return err;
1576 }
1577
1578 static bool unix_skb_scm_eq(struct sk_buff *skb,
1579                             struct scm_cookie *scm)
1580 {
1581         const struct unix_skb_parms *u = &UNIXCB(skb);
1582
1583         return u->pid == scm->pid &&
1584                uid_eq(u->uid, scm->creds.uid) &&
1585                gid_eq(u->gid, scm->creds.gid) &&
1586                unix_secdata_eq(scm, skb);
1587 }
1588
1589 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1590 {
1591         struct scm_fp_list *fp = UNIXCB(skb).fp;
1592         struct unix_sock *u = unix_sk(sk);
1593
1594         lockdep_assert_held(&sk->sk_receive_queue.lock);
1595
1596         if (unlikely(fp && fp->count))
1597                 u->scm_stat.nr_fds += fp->count;
1598 }
1599
1600 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1601 {
1602         struct scm_fp_list *fp = UNIXCB(skb).fp;
1603         struct unix_sock *u = unix_sk(sk);
1604
1605         lockdep_assert_held(&sk->sk_receive_queue.lock);
1606
1607         if (unlikely(fp && fp->count))
1608                 u->scm_stat.nr_fds -= fp->count;
1609 }
1610
1611 /*
1612  *      Send AF_UNIX data.
1613  */
1614
1615 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616                               size_t len)
1617 {
1618         struct sock *sk = sock->sk;
1619         struct net *net = sock_net(sk);
1620         struct unix_sock *u = unix_sk(sk);
1621         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622         struct sock *other = NULL;
1623         int namelen = 0; /* fake GCC */
1624         int err;
1625         unsigned int hash;
1626         struct sk_buff *skb;
1627         long timeo;
1628         struct scm_cookie scm;
1629         int data_len = 0;
1630         int sk_locked;
1631
1632         wait_for_unix_gc();
1633         err = scm_send(sock, msg, &scm, false);
1634         if (err < 0)
1635                 return err;
1636
1637         err = -EOPNOTSUPP;
1638         if (msg->msg_flags&MSG_OOB)
1639                 goto out;
1640
1641         if (msg->msg_namelen) {
1642                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643                 if (err < 0)
1644                         goto out;
1645                 namelen = err;
1646         } else {
1647                 sunaddr = NULL;
1648                 err = -ENOTCONN;
1649                 other = unix_peer_get(sk);
1650                 if (!other)
1651                         goto out;
1652         }
1653
1654         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655             && (err = unix_autobind(sock)) != 0)
1656                 goto out;
1657
1658         err = -EMSGSIZE;
1659         if (len > sk->sk_sndbuf - 32)
1660                 goto out;
1661
1662         if (len > SKB_MAX_ALLOC) {
1663                 data_len = min_t(size_t,
1664                                  len - SKB_MAX_ALLOC,
1665                                  MAX_SKB_FRAGS * PAGE_SIZE);
1666                 data_len = PAGE_ALIGN(data_len);
1667
1668                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669         }
1670
1671         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672                                    msg->msg_flags & MSG_DONTWAIT, &err,
1673                                    PAGE_ALLOC_COSTLY_ORDER);
1674         if (skb == NULL)
1675                 goto out;
1676
1677         err = unix_scm_to_skb(&scm, skb, true);
1678         if (err < 0)
1679                 goto out_free;
1680
1681         skb_put(skb, len - data_len);
1682         skb->data_len = data_len;
1683         skb->len = len;
1684         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685         if (err)
1686                 goto out_free;
1687
1688         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689
1690 restart:
1691         if (!other) {
1692                 err = -ECONNRESET;
1693                 if (sunaddr == NULL)
1694                         goto out_free;
1695
1696                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697                                         hash, &err);
1698                 if (other == NULL)
1699                         goto out_free;
1700         }
1701
1702         if (sk_filter(other, skb) < 0) {
1703                 /* Toss the packet but do not return any error to the sender */
1704                 err = len;
1705                 goto out_free;
1706         }
1707
1708         sk_locked = 0;
1709         unix_state_lock(other);
1710 restart_locked:
1711         err = -EPERM;
1712         if (!unix_may_send(sk, other))
1713                 goto out_unlock;
1714
1715         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716                 /*
1717                  *      Check with 1003.1g - what should
1718                  *      datagram error
1719                  */
1720                 unix_state_unlock(other);
1721                 sock_put(other);
1722
1723                 if (!sk_locked)
1724                         unix_state_lock(sk);
1725
1726                 err = 0;
1727                 if (unix_peer(sk) == other) {
1728                         unix_peer(sk) = NULL;
1729                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730
1731                         unix_state_unlock(sk);
1732
1733                         unix_dgram_disconnected(sk, other);
1734                         sock_put(other);
1735                         err = -ECONNREFUSED;
1736                 } else {
1737                         unix_state_unlock(sk);
1738                 }
1739
1740                 other = NULL;
1741                 if (err)
1742                         goto out_free;
1743                 goto restart;
1744         }
1745
1746         err = -EPIPE;
1747         if (other->sk_shutdown & RCV_SHUTDOWN)
1748                 goto out_unlock;
1749
1750         if (sk->sk_type != SOCK_SEQPACKET) {
1751                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752                 if (err)
1753                         goto out_unlock;
1754         }
1755
1756         /* other == sk && unix_peer(other) != sk if
1757          * - unix_peer(sk) == NULL, destination address bound to sk
1758          * - unix_peer(sk) == sk by time of get but disconnected before lock
1759          */
1760         if (other != sk &&
1761             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1762                 if (timeo) {
1763                         timeo = unix_wait_for_peer(other, timeo);
1764
1765                         err = sock_intr_errno(timeo);
1766                         if (signal_pending(current))
1767                                 goto out_free;
1768
1769                         goto restart;
1770                 }
1771
1772                 if (!sk_locked) {
1773                         unix_state_unlock(other);
1774                         unix_state_double_lock(sk, other);
1775                 }
1776
1777                 if (unix_peer(sk) != other ||
1778                     unix_dgram_peer_wake_me(sk, other)) {
1779                         err = -EAGAIN;
1780                         sk_locked = 1;
1781                         goto out_unlock;
1782                 }
1783
1784                 if (!sk_locked) {
1785                         sk_locked = 1;
1786                         goto restart_locked;
1787                 }
1788         }
1789
1790         if (unlikely(sk_locked))
1791                 unix_state_unlock(sk);
1792
1793         if (sock_flag(other, SOCK_RCVTSTAMP))
1794                 __net_timestamp(skb);
1795         maybe_add_creds(skb, sock, other);
1796         spin_lock(&other->sk_receive_queue.lock);
1797         scm_stat_add(other, skb);
1798         __skb_queue_tail(&other->sk_receive_queue, skb);
1799         spin_unlock(&other->sk_receive_queue.lock);
1800         unix_state_unlock(other);
1801         other->sk_data_ready(other);
1802         sock_put(other);
1803         scm_destroy(&scm);
1804         return len;
1805
1806 out_unlock:
1807         if (sk_locked)
1808                 unix_state_unlock(sk);
1809         unix_state_unlock(other);
1810 out_free:
1811         kfree_skb(skb);
1812 out:
1813         if (other)
1814                 sock_put(other);
1815         scm_destroy(&scm);
1816         return err;
1817 }
1818
1819 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1820  * bytes, and a minimum of a full page.
1821  */
1822 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1823
1824 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1825                                size_t len)
1826 {
1827         struct sock *sk = sock->sk;
1828         struct sock *other = NULL;
1829         int err, size;
1830         struct sk_buff *skb;
1831         int sent = 0;
1832         struct scm_cookie scm;
1833         bool fds_sent = false;
1834         int data_len;
1835
1836         wait_for_unix_gc();
1837         err = scm_send(sock, msg, &scm, false);
1838         if (err < 0)
1839                 return err;
1840
1841         err = -EOPNOTSUPP;
1842         if (msg->msg_flags&MSG_OOB)
1843                 goto out_err;
1844
1845         if (msg->msg_namelen) {
1846                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1847                 goto out_err;
1848         } else {
1849                 err = -ENOTCONN;
1850                 other = unix_peer(sk);
1851                 if (!other)
1852                         goto out_err;
1853         }
1854
1855         if (sk->sk_shutdown & SEND_SHUTDOWN)
1856                 goto pipe_err;
1857
1858         while (sent < len) {
1859                 size = len - sent;
1860
1861                 /* Keep two messages in the pipe so it schedules better */
1862                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1863
1864                 /* allow fallback to order-0 allocations */
1865                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1866
1867                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1868
1869                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1870
1871                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1872                                            msg->msg_flags & MSG_DONTWAIT, &err,
1873                                            get_order(UNIX_SKB_FRAGS_SZ));
1874                 if (!skb)
1875                         goto out_err;
1876
1877                 /* Only send the fds in the first buffer */
1878                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1879                 if (err < 0) {
1880                         kfree_skb(skb);
1881                         goto out_err;
1882                 }
1883                 fds_sent = true;
1884
1885                 skb_put(skb, size - data_len);
1886                 skb->data_len = data_len;
1887                 skb->len = size;
1888                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1889                 if (err) {
1890                         kfree_skb(skb);
1891                         goto out_err;
1892                 }
1893
1894                 unix_state_lock(other);
1895
1896                 if (sock_flag(other, SOCK_DEAD) ||
1897                     (other->sk_shutdown & RCV_SHUTDOWN))
1898                         goto pipe_err_free;
1899
1900                 maybe_add_creds(skb, sock, other);
1901                 spin_lock(&other->sk_receive_queue.lock);
1902                 scm_stat_add(other, skb);
1903                 __skb_queue_tail(&other->sk_receive_queue, skb);
1904                 spin_unlock(&other->sk_receive_queue.lock);
1905                 unix_state_unlock(other);
1906                 other->sk_data_ready(other);
1907                 sent += size;
1908         }
1909
1910         scm_destroy(&scm);
1911
1912         return sent;
1913
1914 pipe_err_free:
1915         unix_state_unlock(other);
1916         kfree_skb(skb);
1917 pipe_err:
1918         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1919                 send_sig(SIGPIPE, current, 0);
1920         err = -EPIPE;
1921 out_err:
1922         scm_destroy(&scm);
1923         return sent ? : err;
1924 }
1925
1926 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1927                                     int offset, size_t size, int flags)
1928 {
1929         int err;
1930         bool send_sigpipe = false;
1931         bool init_scm = true;
1932         struct scm_cookie scm;
1933         struct sock *other, *sk = socket->sk;
1934         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1935
1936         if (flags & MSG_OOB)
1937                 return -EOPNOTSUPP;
1938
1939         other = unix_peer(sk);
1940         if (!other || sk->sk_state != TCP_ESTABLISHED)
1941                 return -ENOTCONN;
1942
1943         if (false) {
1944 alloc_skb:
1945                 unix_state_unlock(other);
1946                 mutex_unlock(&unix_sk(other)->iolock);
1947                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1948                                               &err, 0);
1949                 if (!newskb)
1950                         goto err;
1951         }
1952
1953         /* we must acquire iolock as we modify already present
1954          * skbs in the sk_receive_queue and mess with skb->len
1955          */
1956         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1957         if (err) {
1958                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1959                 goto err;
1960         }
1961
1962         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1963                 err = -EPIPE;
1964                 send_sigpipe = true;
1965                 goto err_unlock;
1966         }
1967
1968         unix_state_lock(other);
1969
1970         if (sock_flag(other, SOCK_DEAD) ||
1971             other->sk_shutdown & RCV_SHUTDOWN) {
1972                 err = -EPIPE;
1973                 send_sigpipe = true;
1974                 goto err_state_unlock;
1975         }
1976
1977         if (init_scm) {
1978                 err = maybe_init_creds(&scm, socket, other);
1979                 if (err)
1980                         goto err_state_unlock;
1981                 init_scm = false;
1982         }
1983
1984         skb = skb_peek_tail(&other->sk_receive_queue);
1985         if (tail && tail == skb) {
1986                 skb = newskb;
1987         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1988                 if (newskb) {
1989                         skb = newskb;
1990                 } else {
1991                         tail = skb;
1992                         goto alloc_skb;
1993                 }
1994         } else if (newskb) {
1995                 /* this is fast path, we don't necessarily need to
1996                  * call to kfree_skb even though with newskb == NULL
1997                  * this - does no harm
1998                  */
1999                 consume_skb(newskb);
2000                 newskb = NULL;
2001         }
2002
2003         if (skb_append_pagefrags(skb, page, offset, size)) {
2004                 tail = skb;
2005                 goto alloc_skb;
2006         }
2007
2008         skb->len += size;
2009         skb->data_len += size;
2010         skb->truesize += size;
2011         refcount_add(size, &sk->sk_wmem_alloc);
2012
2013         if (newskb) {
2014                 err = unix_scm_to_skb(&scm, skb, false);
2015                 if (err)
2016                         goto err_state_unlock;
2017                 spin_lock(&other->sk_receive_queue.lock);
2018                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2019                 spin_unlock(&other->sk_receive_queue.lock);
2020         }
2021
2022         unix_state_unlock(other);
2023         mutex_unlock(&unix_sk(other)->iolock);
2024
2025         other->sk_data_ready(other);
2026         scm_destroy(&scm);
2027         return size;
2028
2029 err_state_unlock:
2030         unix_state_unlock(other);
2031 err_unlock:
2032         mutex_unlock(&unix_sk(other)->iolock);
2033 err:
2034         kfree_skb(newskb);
2035         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2036                 send_sig(SIGPIPE, current, 0);
2037         if (!init_scm)
2038                 scm_destroy(&scm);
2039         return err;
2040 }
2041
2042 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2043                                   size_t len)
2044 {
2045         int err;
2046         struct sock *sk = sock->sk;
2047
2048         err = sock_error(sk);
2049         if (err)
2050                 return err;
2051
2052         if (sk->sk_state != TCP_ESTABLISHED)
2053                 return -ENOTCONN;
2054
2055         if (msg->msg_namelen)
2056                 msg->msg_namelen = 0;
2057
2058         return unix_dgram_sendmsg(sock, msg, len);
2059 }
2060
2061 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2062                                   size_t size, int flags)
2063 {
2064         struct sock *sk = sock->sk;
2065
2066         if (sk->sk_state != TCP_ESTABLISHED)
2067                 return -ENOTCONN;
2068
2069         return unix_dgram_recvmsg(sock, msg, size, flags);
2070 }
2071
2072 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2073 {
2074         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2075
2076         if (addr) {
2077                 msg->msg_namelen = addr->len;
2078                 memcpy(msg->msg_name, addr->name, addr->len);
2079         }
2080 }
2081
2082 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2083                               size_t size, int flags)
2084 {
2085         struct scm_cookie scm;
2086         struct sock *sk = sock->sk;
2087         struct unix_sock *u = unix_sk(sk);
2088         struct sk_buff *skb, *last;
2089         long timeo;
2090         int skip;
2091         int err;
2092
2093         err = -EOPNOTSUPP;
2094         if (flags&MSG_OOB)
2095                 goto out;
2096
2097         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2098
2099         do {
2100                 mutex_lock(&u->iolock);
2101
2102                 skip = sk_peek_offset(sk, flags);
2103                 skb = __skb_try_recv_datagram(sk, flags, scm_stat_del,
2104                                               &skip, &err, &last);
2105                 if (skb)
2106                         break;
2107
2108                 mutex_unlock(&u->iolock);
2109
2110                 if (err != -EAGAIN)
2111                         break;
2112         } while (timeo &&
2113                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2114
2115         if (!skb) { /* implies iolock unlocked */
2116                 unix_state_lock(sk);
2117                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2118                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2119                     (sk->sk_shutdown & RCV_SHUTDOWN))
2120                         err = 0;
2121                 unix_state_unlock(sk);
2122                 goto out;
2123         }
2124
2125         if (wq_has_sleeper(&u->peer_wait))
2126                 wake_up_interruptible_sync_poll(&u->peer_wait,
2127                                                 EPOLLOUT | EPOLLWRNORM |
2128                                                 EPOLLWRBAND);
2129
2130         if (msg->msg_name)
2131                 unix_copy_addr(msg, skb->sk);
2132
2133         if (size > skb->len - skip)
2134                 size = skb->len - skip;
2135         else if (size < skb->len - skip)
2136                 msg->msg_flags |= MSG_TRUNC;
2137
2138         err = skb_copy_datagram_msg(skb, skip, msg, size);
2139         if (err)
2140                 goto out_free;
2141
2142         if (sock_flag(sk, SOCK_RCVTSTAMP))
2143                 __sock_recv_timestamp(msg, sk, skb);
2144
2145         memset(&scm, 0, sizeof(scm));
2146
2147         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2148         unix_set_secdata(&scm, skb);
2149
2150         if (!(flags & MSG_PEEK)) {
2151                 if (UNIXCB(skb).fp)
2152                         unix_detach_fds(&scm, skb);
2153
2154                 sk_peek_offset_bwd(sk, skb->len);
2155         } else {
2156                 /* It is questionable: on PEEK we could:
2157                    - do not return fds - good, but too simple 8)
2158                    - return fds, and do not return them on read (old strategy,
2159                      apparently wrong)
2160                    - clone fds (I chose it for now, it is the most universal
2161                      solution)
2162
2163                    POSIX 1003.1g does not actually define this clearly
2164                    at all. POSIX 1003.1g doesn't define a lot of things
2165                    clearly however!
2166
2167                 */
2168
2169                 sk_peek_offset_fwd(sk, size);
2170
2171                 if (UNIXCB(skb).fp)
2172                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2173         }
2174         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2175
2176         scm_recv(sock, msg, &scm, flags);
2177
2178 out_free:
2179         skb_free_datagram(sk, skb);
2180         mutex_unlock(&u->iolock);
2181 out:
2182         return err;
2183 }
2184
2185 /*
2186  *      Sleep until more data has arrived. But check for races..
2187  */
2188 static long unix_stream_data_wait(struct sock *sk, long timeo,
2189                                   struct sk_buff *last, unsigned int last_len,
2190                                   bool freezable)
2191 {
2192         struct sk_buff *tail;
2193         DEFINE_WAIT(wait);
2194
2195         unix_state_lock(sk);
2196
2197         for (;;) {
2198                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2199
2200                 tail = skb_peek_tail(&sk->sk_receive_queue);
2201                 if (tail != last ||
2202                     (tail && tail->len != last_len) ||
2203                     sk->sk_err ||
2204                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2205                     signal_pending(current) ||
2206                     !timeo)
2207                         break;
2208
2209                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2210                 unix_state_unlock(sk);
2211                 if (freezable)
2212                         timeo = freezable_schedule_timeout(timeo);
2213                 else
2214                         timeo = schedule_timeout(timeo);
2215                 unix_state_lock(sk);
2216
2217                 if (sock_flag(sk, SOCK_DEAD))
2218                         break;
2219
2220                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2221         }
2222
2223         finish_wait(sk_sleep(sk), &wait);
2224         unix_state_unlock(sk);
2225         return timeo;
2226 }
2227
2228 static unsigned int unix_skb_len(const struct sk_buff *skb)
2229 {
2230         return skb->len - UNIXCB(skb).consumed;
2231 }
2232
2233 struct unix_stream_read_state {
2234         int (*recv_actor)(struct sk_buff *, int, int,
2235                           struct unix_stream_read_state *);
2236         struct socket *socket;
2237         struct msghdr *msg;
2238         struct pipe_inode_info *pipe;
2239         size_t size;
2240         int flags;
2241         unsigned int splice_flags;
2242 };
2243
2244 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2245                                     bool freezable)
2246 {
2247         struct scm_cookie scm;
2248         struct socket *sock = state->socket;
2249         struct sock *sk = sock->sk;
2250         struct unix_sock *u = unix_sk(sk);
2251         int copied = 0;
2252         int flags = state->flags;
2253         int noblock = flags & MSG_DONTWAIT;
2254         bool check_creds = false;
2255         int target;
2256         int err = 0;
2257         long timeo;
2258         int skip;
2259         size_t size = state->size;
2260         unsigned int last_len;
2261
2262         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2263                 err = -EINVAL;
2264                 goto out;
2265         }
2266
2267         if (unlikely(flags & MSG_OOB)) {
2268                 err = -EOPNOTSUPP;
2269                 goto out;
2270         }
2271
2272         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2273         timeo = sock_rcvtimeo(sk, noblock);
2274
2275         memset(&scm, 0, sizeof(scm));
2276
2277         /* Lock the socket to prevent queue disordering
2278          * while sleeps in memcpy_tomsg
2279          */
2280         mutex_lock(&u->iolock);
2281
2282         skip = max(sk_peek_offset(sk, flags), 0);
2283
2284         do {
2285                 int chunk;
2286                 bool drop_skb;
2287                 struct sk_buff *skb, *last;
2288
2289 redo:
2290                 unix_state_lock(sk);
2291                 if (sock_flag(sk, SOCK_DEAD)) {
2292                         err = -ECONNRESET;
2293                         goto unlock;
2294                 }
2295                 last = skb = skb_peek(&sk->sk_receive_queue);
2296                 last_len = last ? last->len : 0;
2297 again:
2298                 if (skb == NULL) {
2299                         if (copied >= target)
2300                                 goto unlock;
2301
2302                         /*
2303                          *      POSIX 1003.1g mandates this order.
2304                          */
2305
2306                         err = sock_error(sk);
2307                         if (err)
2308                                 goto unlock;
2309                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2310                                 goto unlock;
2311
2312                         unix_state_unlock(sk);
2313                         if (!timeo) {
2314                                 err = -EAGAIN;
2315                                 break;
2316                         }
2317
2318                         mutex_unlock(&u->iolock);
2319
2320                         timeo = unix_stream_data_wait(sk, timeo, last,
2321                                                       last_len, freezable);
2322
2323                         if (signal_pending(current)) {
2324                                 err = sock_intr_errno(timeo);
2325                                 scm_destroy(&scm);
2326                                 goto out;
2327                         }
2328
2329                         mutex_lock(&u->iolock);
2330                         goto redo;
2331 unlock:
2332                         unix_state_unlock(sk);
2333                         break;
2334                 }
2335
2336                 while (skip >= unix_skb_len(skb)) {
2337                         skip -= unix_skb_len(skb);
2338                         last = skb;
2339                         last_len = skb->len;
2340                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2341                         if (!skb)
2342                                 goto again;
2343                 }
2344
2345                 unix_state_unlock(sk);
2346
2347                 if (check_creds) {
2348                         /* Never glue messages from different writers */
2349                         if (!unix_skb_scm_eq(skb, &scm))
2350                                 break;
2351                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2352                         /* Copy credentials */
2353                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2354                         unix_set_secdata(&scm, skb);
2355                         check_creds = true;
2356                 }
2357
2358                 /* Copy address just once */
2359                 if (state->msg && state->msg->msg_name) {
2360                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2361                                          state->msg->msg_name);
2362                         unix_copy_addr(state->msg, skb->sk);
2363                         sunaddr = NULL;
2364                 }
2365
2366                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2367                 skb_get(skb);
2368                 chunk = state->recv_actor(skb, skip, chunk, state);
2369                 drop_skb = !unix_skb_len(skb);
2370                 /* skb is only safe to use if !drop_skb */
2371                 consume_skb(skb);
2372                 if (chunk < 0) {
2373                         if (copied == 0)
2374                                 copied = -EFAULT;
2375                         break;
2376                 }
2377                 copied += chunk;
2378                 size -= chunk;
2379
2380                 if (drop_skb) {
2381                         /* the skb was touched by a concurrent reader;
2382                          * we should not expect anything from this skb
2383                          * anymore and assume it invalid - we can be
2384                          * sure it was dropped from the socket queue
2385                          *
2386                          * let's report a short read
2387                          */
2388                         err = 0;
2389                         break;
2390                 }
2391
2392                 /* Mark read part of skb as used */
2393                 if (!(flags & MSG_PEEK)) {
2394                         UNIXCB(skb).consumed += chunk;
2395
2396                         sk_peek_offset_bwd(sk, chunk);
2397
2398                         if (UNIXCB(skb).fp) {
2399                                 spin_lock(&sk->sk_receive_queue.lock);
2400                                 scm_stat_del(sk, skb);
2401                                 spin_unlock(&sk->sk_receive_queue.lock);
2402                                 unix_detach_fds(&scm, skb);
2403                         }
2404
2405                         if (unix_skb_len(skb))
2406                                 break;
2407
2408                         skb_unlink(skb, &sk->sk_receive_queue);
2409                         consume_skb(skb);
2410
2411                         if (scm.fp)
2412                                 break;
2413                 } else {
2414                         /* It is questionable, see note in unix_dgram_recvmsg.
2415                          */
2416                         if (UNIXCB(skb).fp)
2417                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2418
2419                         sk_peek_offset_fwd(sk, chunk);
2420
2421                         if (UNIXCB(skb).fp)
2422                                 break;
2423
2424                         skip = 0;
2425                         last = skb;
2426                         last_len = skb->len;
2427                         unix_state_lock(sk);
2428                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2429                         if (skb)
2430                                 goto again;
2431                         unix_state_unlock(sk);
2432                         break;
2433                 }
2434         } while (size);
2435
2436         mutex_unlock(&u->iolock);
2437         if (state->msg)
2438                 scm_recv(sock, state->msg, &scm, flags);
2439         else
2440                 scm_destroy(&scm);
2441 out:
2442         return copied ? : err;
2443 }
2444
2445 static int unix_stream_read_actor(struct sk_buff *skb,
2446                                   int skip, int chunk,
2447                                   struct unix_stream_read_state *state)
2448 {
2449         int ret;
2450
2451         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2452                                     state->msg, chunk);
2453         return ret ?: chunk;
2454 }
2455
2456 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2457                                size_t size, int flags)
2458 {
2459         struct unix_stream_read_state state = {
2460                 .recv_actor = unix_stream_read_actor,
2461                 .socket = sock,
2462                 .msg = msg,
2463                 .size = size,
2464                 .flags = flags
2465         };
2466
2467         return unix_stream_read_generic(&state, true);
2468 }
2469
2470 static int unix_stream_splice_actor(struct sk_buff *skb,
2471                                     int skip, int chunk,
2472                                     struct unix_stream_read_state *state)
2473 {
2474         return skb_splice_bits(skb, state->socket->sk,
2475                                UNIXCB(skb).consumed + skip,
2476                                state->pipe, chunk, state->splice_flags);
2477 }
2478
2479 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2480                                        struct pipe_inode_info *pipe,
2481                                        size_t size, unsigned int flags)
2482 {
2483         struct unix_stream_read_state state = {
2484                 .recv_actor = unix_stream_splice_actor,
2485                 .socket = sock,
2486                 .pipe = pipe,
2487                 .size = size,
2488                 .splice_flags = flags,
2489         };
2490
2491         if (unlikely(*ppos))
2492                 return -ESPIPE;
2493
2494         if (sock->file->f_flags & O_NONBLOCK ||
2495             flags & SPLICE_F_NONBLOCK)
2496                 state.flags = MSG_DONTWAIT;
2497
2498         return unix_stream_read_generic(&state, false);
2499 }
2500
2501 static int unix_shutdown(struct socket *sock, int mode)
2502 {
2503         struct sock *sk = sock->sk;
2504         struct sock *other;
2505
2506         if (mode < SHUT_RD || mode > SHUT_RDWR)
2507                 return -EINVAL;
2508         /* This maps:
2509          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2510          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2511          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2512          */
2513         ++mode;
2514
2515         unix_state_lock(sk);
2516         sk->sk_shutdown |= mode;
2517         other = unix_peer(sk);
2518         if (other)
2519                 sock_hold(other);
2520         unix_state_unlock(sk);
2521         sk->sk_state_change(sk);
2522
2523         if (other &&
2524                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2525
2526                 int peer_mode = 0;
2527
2528                 if (mode&RCV_SHUTDOWN)
2529                         peer_mode |= SEND_SHUTDOWN;
2530                 if (mode&SEND_SHUTDOWN)
2531                         peer_mode |= RCV_SHUTDOWN;
2532                 unix_state_lock(other);
2533                 other->sk_shutdown |= peer_mode;
2534                 unix_state_unlock(other);
2535                 other->sk_state_change(other);
2536                 if (peer_mode == SHUTDOWN_MASK)
2537                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2538                 else if (peer_mode & RCV_SHUTDOWN)
2539                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2540         }
2541         if (other)
2542                 sock_put(other);
2543
2544         return 0;
2545 }
2546
2547 long unix_inq_len(struct sock *sk)
2548 {
2549         struct sk_buff *skb;
2550         long amount = 0;
2551
2552         if (sk->sk_state == TCP_LISTEN)
2553                 return -EINVAL;
2554
2555         spin_lock(&sk->sk_receive_queue.lock);
2556         if (sk->sk_type == SOCK_STREAM ||
2557             sk->sk_type == SOCK_SEQPACKET) {
2558                 skb_queue_walk(&sk->sk_receive_queue, skb)
2559                         amount += unix_skb_len(skb);
2560         } else {
2561                 skb = skb_peek(&sk->sk_receive_queue);
2562                 if (skb)
2563                         amount = skb->len;
2564         }
2565         spin_unlock(&sk->sk_receive_queue.lock);
2566
2567         return amount;
2568 }
2569 EXPORT_SYMBOL_GPL(unix_inq_len);
2570
2571 long unix_outq_len(struct sock *sk)
2572 {
2573         return sk_wmem_alloc_get(sk);
2574 }
2575 EXPORT_SYMBOL_GPL(unix_outq_len);
2576
2577 static int unix_open_file(struct sock *sk)
2578 {
2579         struct path path;
2580         struct file *f;
2581         int fd;
2582
2583         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2584                 return -EPERM;
2585
2586         if (!smp_load_acquire(&unix_sk(sk)->addr))
2587                 return -ENOENT;
2588
2589         path = unix_sk(sk)->path;
2590         if (!path.dentry)
2591                 return -ENOENT;
2592
2593         path_get(&path);
2594
2595         fd = get_unused_fd_flags(O_CLOEXEC);
2596         if (fd < 0)
2597                 goto out;
2598
2599         f = dentry_open(&path, O_PATH, current_cred());
2600         if (IS_ERR(f)) {
2601                 put_unused_fd(fd);
2602                 fd = PTR_ERR(f);
2603                 goto out;
2604         }
2605
2606         fd_install(fd, f);
2607 out:
2608         path_put(&path);
2609
2610         return fd;
2611 }
2612
2613 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2614 {
2615         struct sock *sk = sock->sk;
2616         long amount = 0;
2617         int err;
2618
2619         switch (cmd) {
2620         case SIOCOUTQ:
2621                 amount = unix_outq_len(sk);
2622                 err = put_user(amount, (int __user *)arg);
2623                 break;
2624         case SIOCINQ:
2625                 amount = unix_inq_len(sk);
2626                 if (amount < 0)
2627                         err = amount;
2628                 else
2629                         err = put_user(amount, (int __user *)arg);
2630                 break;
2631         case SIOCUNIXFILE:
2632                 err = unix_open_file(sk);
2633                 break;
2634         default:
2635                 err = -ENOIOCTLCMD;
2636                 break;
2637         }
2638         return err;
2639 }
2640
2641 #ifdef CONFIG_COMPAT
2642 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2643 {
2644         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2645 }
2646 #endif
2647
2648 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2649 {
2650         struct sock *sk = sock->sk;
2651         __poll_t mask;
2652
2653         sock_poll_wait(file, sock, wait);
2654         mask = 0;
2655
2656         /* exceptional events? */
2657         if (sk->sk_err)
2658                 mask |= EPOLLERR;
2659         if (sk->sk_shutdown == SHUTDOWN_MASK)
2660                 mask |= EPOLLHUP;
2661         if (sk->sk_shutdown & RCV_SHUTDOWN)
2662                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2663
2664         /* readable? */
2665         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2666                 mask |= EPOLLIN | EPOLLRDNORM;
2667
2668         /* Connection-based need to check for termination and startup */
2669         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2670             sk->sk_state == TCP_CLOSE)
2671                 mask |= EPOLLHUP;
2672
2673         /*
2674          * we set writable also when the other side has shut down the
2675          * connection. This prevents stuck sockets.
2676          */
2677         if (unix_writable(sk))
2678                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2679
2680         return mask;
2681 }
2682
2683 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2684                                     poll_table *wait)
2685 {
2686         struct sock *sk = sock->sk, *other;
2687         unsigned int writable;
2688         __poll_t mask;
2689
2690         sock_poll_wait(file, sock, wait);
2691         mask = 0;
2692
2693         /* exceptional events? */
2694         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2695                 mask |= EPOLLERR |
2696                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2697
2698         if (sk->sk_shutdown & RCV_SHUTDOWN)
2699                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2700         if (sk->sk_shutdown == SHUTDOWN_MASK)
2701                 mask |= EPOLLHUP;
2702
2703         /* readable? */
2704         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2705                 mask |= EPOLLIN | EPOLLRDNORM;
2706
2707         /* Connection-based need to check for termination and startup */
2708         if (sk->sk_type == SOCK_SEQPACKET) {
2709                 if (sk->sk_state == TCP_CLOSE)
2710                         mask |= EPOLLHUP;
2711                 /* connection hasn't started yet? */
2712                 if (sk->sk_state == TCP_SYN_SENT)
2713                         return mask;
2714         }
2715
2716         /* No write status requested, avoid expensive OUT tests. */
2717         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2718                 return mask;
2719
2720         writable = unix_writable(sk);
2721         if (writable) {
2722                 unix_state_lock(sk);
2723
2724                 other = unix_peer(sk);
2725                 if (other && unix_peer(other) != sk &&
2726                     unix_recvq_full(other) &&
2727                     unix_dgram_peer_wake_me(sk, other))
2728                         writable = 0;
2729
2730                 unix_state_unlock(sk);
2731         }
2732
2733         if (writable)
2734                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2735         else
2736                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2737
2738         return mask;
2739 }
2740
2741 #ifdef CONFIG_PROC_FS
2742
2743 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2744
2745 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2746 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2747 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2748
2749 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2750 {
2751         unsigned long offset = get_offset(*pos);
2752         unsigned long bucket = get_bucket(*pos);
2753         struct sock *sk;
2754         unsigned long count = 0;
2755
2756         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2757                 if (sock_net(sk) != seq_file_net(seq))
2758                         continue;
2759                 if (++count == offset)
2760                         break;
2761         }
2762
2763         return sk;
2764 }
2765
2766 static struct sock *unix_next_socket(struct seq_file *seq,
2767                                      struct sock *sk,
2768                                      loff_t *pos)
2769 {
2770         unsigned long bucket;
2771
2772         while (sk > (struct sock *)SEQ_START_TOKEN) {
2773                 sk = sk_next(sk);
2774                 if (!sk)
2775                         goto next_bucket;
2776                 if (sock_net(sk) == seq_file_net(seq))
2777                         return sk;
2778         }
2779
2780         do {
2781                 sk = unix_from_bucket(seq, pos);
2782                 if (sk)
2783                         return sk;
2784
2785 next_bucket:
2786                 bucket = get_bucket(*pos) + 1;
2787                 *pos = set_bucket_offset(bucket, 1);
2788         } while (bucket < ARRAY_SIZE(unix_socket_table));
2789
2790         return NULL;
2791 }
2792
2793 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2794         __acquires(unix_table_lock)
2795 {
2796         spin_lock(&unix_table_lock);
2797
2798         if (!*pos)
2799                 return SEQ_START_TOKEN;
2800
2801         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2802                 return NULL;
2803
2804         return unix_next_socket(seq, NULL, pos);
2805 }
2806
2807 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2808 {
2809         ++*pos;
2810         return unix_next_socket(seq, v, pos);
2811 }
2812
2813 static void unix_seq_stop(struct seq_file *seq, void *v)
2814         __releases(unix_table_lock)
2815 {
2816         spin_unlock(&unix_table_lock);
2817 }
2818
2819 static int unix_seq_show(struct seq_file *seq, void *v)
2820 {
2821
2822         if (v == SEQ_START_TOKEN)
2823                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2824                          "Inode Path\n");
2825         else {
2826                 struct sock *s = v;
2827                 struct unix_sock *u = unix_sk(s);
2828                 unix_state_lock(s);
2829
2830                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2831                         s,
2832                         refcount_read(&s->sk_refcnt),
2833                         0,
2834                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2835                         s->sk_type,
2836                         s->sk_socket ?
2837                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2838                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2839                         sock_i_ino(s));
2840
2841                 if (u->addr) {  // under unix_table_lock here
2842                         int i, len;
2843                         seq_putc(seq, ' ');
2844
2845                         i = 0;
2846                         len = u->addr->len - sizeof(short);
2847                         if (!UNIX_ABSTRACT(s))
2848                                 len--;
2849                         else {
2850                                 seq_putc(seq, '@');
2851                                 i++;
2852                         }
2853                         for ( ; i < len; i++)
2854                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2855                                          '@');
2856                 }
2857                 unix_state_unlock(s);
2858                 seq_putc(seq, '\n');
2859         }
2860
2861         return 0;
2862 }
2863
2864 static const struct seq_operations unix_seq_ops = {
2865         .start  = unix_seq_start,
2866         .next   = unix_seq_next,
2867         .stop   = unix_seq_stop,
2868         .show   = unix_seq_show,
2869 };
2870 #endif
2871
2872 static const struct net_proto_family unix_family_ops = {
2873         .family = PF_UNIX,
2874         .create = unix_create,
2875         .owner  = THIS_MODULE,
2876 };
2877
2878
2879 static int __net_init unix_net_init(struct net *net)
2880 {
2881         int error = -ENOMEM;
2882
2883         net->unx.sysctl_max_dgram_qlen = 10;
2884         if (unix_sysctl_register(net))
2885                 goto out;
2886
2887 #ifdef CONFIG_PROC_FS
2888         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2889                         sizeof(struct seq_net_private))) {
2890                 unix_sysctl_unregister(net);
2891                 goto out;
2892         }
2893 #endif
2894         error = 0;
2895 out:
2896         return error;
2897 }
2898
2899 static void __net_exit unix_net_exit(struct net *net)
2900 {
2901         unix_sysctl_unregister(net);
2902         remove_proc_entry("unix", net->proc_net);
2903 }
2904
2905 static struct pernet_operations unix_net_ops = {
2906         .init = unix_net_init,
2907         .exit = unix_net_exit,
2908 };
2909
2910 static int __init af_unix_init(void)
2911 {
2912         int rc = -1;
2913
2914         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2915
2916         rc = proto_register(&unix_proto, 1);
2917         if (rc != 0) {
2918                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2919                 goto out;
2920         }
2921
2922         sock_register(&unix_family_ops);
2923         register_pernet_subsys(&unix_net_ops);
2924 out:
2925         return rc;
2926 }
2927
2928 static void __exit af_unix_exit(void)
2929 {
2930         sock_unregister(PF_UNIX);
2931         proto_unregister(&unix_proto);
2932         unregister_pernet_subsys(&unix_net_ops);
2933 }
2934
2935 /* Earlier than device_initcall() so that other drivers invoking
2936    request_module() don't end up in a loop when modprobe tries
2937    to use a UNIX socket. But later than subsys_initcall() because
2938    we depend on stuff initialised there */
2939 fs_initcall(af_unix_init);
2940 module_exit(af_unix_exit);
2941
2942 MODULE_LICENSE("GPL");
2943 MODULE_ALIAS_NETPROTO(PF_UNIX);