aa6e2530e1ec62853844ec70be95d6652319cdf5
[linux-2.6-microblaze.git] / net / unix / af_unix.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:        Implementation of BSD Unix domain sockets.
4  *
5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *              Linus Torvalds  :       Assorted bug cures.
9  *              Niibe Yutaka    :       async I/O support.
10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
11  *              Alan Cox        :       Limit size of allocated blocks.
12  *              Alan Cox        :       Fixed the stupid socketpair bug.
13  *              Alan Cox        :       BSD compatibility fine tuning.
14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
15  *              Alan Cox        :       Sorted out a proper draft version of
16  *                                      file descriptor passing hacked up from
17  *                                      Mike Shaver's work.
18  *              Marty Leisner   :       Fixes to fd passing
19  *              Nick Nevin      :       recvmsg bugfix.
20  *              Alan Cox        :       Started proper garbage collector
21  *              Heiko EiBfeldt  :       Missing verify_area check
22  *              Alan Cox        :       Started POSIXisms
23  *              Andreas Schwab  :       Replace inode by dentry for proper
24  *                                      reference counting
25  *              Kirk Petersen   :       Made this a module
26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
27  *                                      Lots of bug fixes.
28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
29  *                                      by above two patches.
30  *           Andrea Arcangeli   :       If possible we block in connect(2)
31  *                                      if the max backlog of the listen socket
32  *                                      is been reached. This won't break
33  *                                      old apps and it will avoid huge amount
34  *                                      of socks hashed (this for unix_gc()
35  *                                      performances reasons).
36  *                                      Security fix that limits the max
37  *                                      number of socks to 2*max_files and
38  *                                      the number of skb queueable in the
39  *                                      dgram receiver.
40  *              Artur Skawina   :       Hash function optimizations
41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
42  *            Malcolm Beattie   :       Set peercred for socketpair
43  *           Michal Ostrowski   :       Module initialization cleanup.
44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
45  *                                      the core infrastructure is doing that
46  *                                      for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *      [TO FIX]
51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
52  *              other the moment one end closes.
53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *      [NOT TO FIX]
56  *      accept() returns a path name even if the connecting socket has closed
57  *              in the meantime (BSD loses the path and gives up).
58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *      BSD af_unix apparently has connect forgetting to block properly.
62  *              (need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *      Bug fixes and improvements.
66  *              - client shutdown killed server socket.
67  *              - removed all useless cli/sti pairs.
68  *
69  *      Semantic changes/extensions.
70  *              - generic control message passing.
71  *              - SCM_CREDENTIALS control message.
72  *              - "Abstract" (not FS based) socket bindings.
73  *                Abstract names are sequences of bytes (not zero terminated)
74  *                started by 0, so that this name space does not intersect
75  *                with BSD names.
76  */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116
117 #include "scm.h"
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191
192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199         return skb_queue_len_lockless(&sk->sk_receive_queue) >
200                 READ_ONCE(sk->sk_max_ack_backlog);
201 }
202
203 struct sock *unix_peer_get(struct sock *s)
204 {
205         struct sock *peer;
206
207         unix_state_lock(s);
208         peer = unix_peer(s);
209         if (peer)
210                 sock_hold(peer);
211         unix_state_unlock(s);
212         return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215
216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218         if (refcount_dec_and_test(&addr->refcnt))
219                 kfree(addr);
220 }
221
222 /*
223  *      Check unix socket name:
224  *              - should be not zero length.
225  *              - if started by not zero, should be NULL terminated (FS object)
226  *              - if started by zero, it is abstract name.
227  */
228
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231         *hashp = 0;
232
233         if (len <= sizeof(short) || len > sizeof(*sunaddr))
234                 return -EINVAL;
235         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236                 return -EINVAL;
237         if (sunaddr->sun_path[0]) {
238                 /*
239                  * This may look like an off by one error but it is a bit more
240                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
241                  * sun_path[108] doesn't as such exist.  However in kernel space
242                  * we are guaranteed that it is a valid memory location in our
243                  * kernel address buffer.
244                  */
245                 ((char *)sunaddr)[len] = 0;
246                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
247                 return len;
248         }
249
250         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251         return len;
252 }
253
254 static void __unix_remove_socket(struct sock *sk)
255 {
256         sk_del_node_init(sk);
257 }
258
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261         WARN_ON(!sk_unhashed(sk));
262         sk_add_node(sk, list);
263 }
264
265 static inline void unix_remove_socket(struct sock *sk)
266 {
267         spin_lock(&unix_table_lock);
268         __unix_remove_socket(sk);
269         spin_unlock(&unix_table_lock);
270 }
271
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274         spin_lock(&unix_table_lock);
275         __unix_insert_socket(list, sk);
276         spin_unlock(&unix_table_lock);
277 }
278
279 static struct sock *__unix_find_socket_byname(struct net *net,
280                                               struct sockaddr_un *sunname,
281                                               int len, int type, unsigned int hash)
282 {
283         struct sock *s;
284
285         sk_for_each(s, &unix_socket_table[hash ^ type]) {
286                 struct unix_sock *u = unix_sk(s);
287
288                 if (!net_eq(sock_net(s), net))
289                         continue;
290
291                 if (u->addr->len == len &&
292                     !memcmp(u->addr->name, sunname, len))
293                         return s;
294         }
295         return NULL;
296 }
297
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299                                                    struct sockaddr_un *sunname,
300                                                    int len, int type,
301                                                    unsigned int hash)
302 {
303         struct sock *s;
304
305         spin_lock(&unix_table_lock);
306         s = __unix_find_socket_byname(net, sunname, len, type, hash);
307         if (s)
308                 sock_hold(s);
309         spin_unlock(&unix_table_lock);
310         return s;
311 }
312
313 static struct sock *unix_find_socket_byinode(struct inode *i)
314 {
315         struct sock *s;
316
317         spin_lock(&unix_table_lock);
318         sk_for_each(s,
319                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320                 struct dentry *dentry = unix_sk(s)->path.dentry;
321
322                 if (dentry && d_backing_inode(dentry) == i) {
323                         sock_hold(s);
324                         goto found;
325                 }
326         }
327         s = NULL;
328 found:
329         spin_unlock(&unix_table_lock);
330         return s;
331 }
332
333 /* Support code for asymmetrically connected dgram sockets
334  *
335  * If a datagram socket is connected to a socket not itself connected
336  * to the first socket (eg, /dev/log), clients may only enqueue more
337  * messages if the present receive queue of the server socket is not
338  * "too large". This means there's a second writeability condition
339  * poll and sendmsg need to test. The dgram recv code will do a wake
340  * up on the peer_wait wait queue of a socket upon reception of a
341  * datagram which needs to be propagated to sleeping would-be writers
342  * since these might not have sent anything so far. This can't be
343  * accomplished via poll_wait because the lifetime of the server
344  * socket might be less than that of its clients if these break their
345  * association with it or if the server socket is closed while clients
346  * are still connected to it and there's no way to inform "a polling
347  * implementation" that it should let go of a certain wait queue
348  *
349  * In order to propagate a wake up, a wait_queue_entry_t of the client
350  * socket is enqueued on the peer_wait queue of the server socket
351  * whose wake function does a wake_up on the ordinary client socket
352  * wait queue. This connection is established whenever a write (or
353  * poll for write) hit the flow control condition and broken when the
354  * association to the server socket is dissolved or after a wake up
355  * was relayed.
356  */
357
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359                                       void *key)
360 {
361         struct unix_sock *u;
362         wait_queue_head_t *u_sleep;
363
364         u = container_of(q, struct unix_sock, peer_wake);
365
366         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367                             q);
368         u->peer_wake.private = NULL;
369
370         /* relaying can only happen while the wq still exists */
371         u_sleep = sk_sleep(&u->sk);
372         if (u_sleep)
373                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374
375         return 0;
376 }
377
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 {
380         struct unix_sock *u, *u_other;
381         int rc;
382
383         u = unix_sk(sk);
384         u_other = unix_sk(other);
385         rc = 0;
386         spin_lock(&u_other->peer_wait.lock);
387
388         if (!u->peer_wake.private) {
389                 u->peer_wake.private = other;
390                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391
392                 rc = 1;
393         }
394
395         spin_unlock(&u_other->peer_wait.lock);
396         return rc;
397 }
398
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400                                             struct sock *other)
401 {
402         struct unix_sock *u, *u_other;
403
404         u = unix_sk(sk);
405         u_other = unix_sk(other);
406         spin_lock(&u_other->peer_wait.lock);
407
408         if (u->peer_wake.private == other) {
409                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410                 u->peer_wake.private = NULL;
411         }
412
413         spin_unlock(&u_other->peer_wait.lock);
414 }
415
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417                                                    struct sock *other)
418 {
419         unix_dgram_peer_wake_disconnect(sk, other);
420         wake_up_interruptible_poll(sk_sleep(sk),
421                                    EPOLLOUT |
422                                    EPOLLWRNORM |
423                                    EPOLLWRBAND);
424 }
425
426 /* preconditions:
427  *      - unix_peer(sk) == other
428  *      - association is stable
429  */
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 {
432         int connected;
433
434         connected = unix_dgram_peer_wake_connect(sk, other);
435
436         /* If other is SOCK_DEAD, we want to make sure we signal
437          * POLLOUT, such that a subsequent write() can get a
438          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439          * to other and its full, we will hang waiting for POLLOUT.
440          */
441         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442                 return 1;
443
444         if (connected)
445                 unix_dgram_peer_wake_disconnect(sk, other);
446
447         return 0;
448 }
449
450 static int unix_writable(const struct sock *sk)
451 {
452         return sk->sk_state != TCP_LISTEN &&
453                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 }
455
456 static void unix_write_space(struct sock *sk)
457 {
458         struct socket_wq *wq;
459
460         rcu_read_lock();
461         if (unix_writable(sk)) {
462                 wq = rcu_dereference(sk->sk_wq);
463                 if (skwq_has_sleeper(wq))
464                         wake_up_interruptible_sync_poll(&wq->wait,
465                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467         }
468         rcu_read_unlock();
469 }
470
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472  * queue of packets arrived from previous peer. First, it allows to do
473  * flow control based only on wmem_alloc; second, sk connected to peer
474  * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 {
477         if (!skb_queue_empty(&sk->sk_receive_queue)) {
478                 skb_queue_purge(&sk->sk_receive_queue);
479                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480
481                 /* If one link of bidirectional dgram pipe is disconnected,
482                  * we signal error. Messages are lost. Do not make this,
483                  * when peer was not connected to us.
484                  */
485                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486                         other->sk_err = ECONNRESET;
487                         other->sk_error_report(other);
488                 }
489         }
490 }
491
492 static void unix_sock_destructor(struct sock *sk)
493 {
494         struct unix_sock *u = unix_sk(sk);
495
496         skb_queue_purge(&sk->sk_receive_queue);
497
498         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499         WARN_ON(!sk_unhashed(sk));
500         WARN_ON(sk->sk_socket);
501         if (!sock_flag(sk, SOCK_DEAD)) {
502                 pr_info("Attempt to release alive unix socket: %p\n", sk);
503                 return;
504         }
505
506         if (u->addr)
507                 unix_release_addr(u->addr);
508
509         atomic_long_dec(&unix_nr_socks);
510         local_bh_disable();
511         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512         local_bh_enable();
513 #ifdef UNIX_REFCNT_DEBUG
514         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515                 atomic_long_read(&unix_nr_socks));
516 #endif
517 }
518
519 static void unix_release_sock(struct sock *sk, int embrion)
520 {
521         struct unix_sock *u = unix_sk(sk);
522         struct path path;
523         struct sock *skpair;
524         struct sk_buff *skb;
525         int state;
526
527         unix_remove_socket(sk);
528
529         /* Clear state */
530         unix_state_lock(sk);
531         sock_orphan(sk);
532         sk->sk_shutdown = SHUTDOWN_MASK;
533         path         = u->path;
534         u->path.dentry = NULL;
535         u->path.mnt = NULL;
536         state = sk->sk_state;
537         sk->sk_state = TCP_CLOSE;
538         unix_state_unlock(sk);
539
540         wake_up_interruptible_all(&u->peer_wait);
541
542         skpair = unix_peer(sk);
543
544         if (skpair != NULL) {
545                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546                         unix_state_lock(skpair);
547                         /* No more writes */
548                         skpair->sk_shutdown = SHUTDOWN_MASK;
549                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550                                 skpair->sk_err = ECONNRESET;
551                         unix_state_unlock(skpair);
552                         skpair->sk_state_change(skpair);
553                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554                 }
555
556                 unix_dgram_peer_wake_disconnect(sk, skpair);
557                 sock_put(skpair); /* It may now die */
558                 unix_peer(sk) = NULL;
559         }
560
561         /* Try to flush out this socket. Throw out buffers at least */
562
563         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564                 if (state == TCP_LISTEN)
565                         unix_release_sock(skb->sk, 1);
566                 /* passed fds are erased in the kfree_skb hook        */
567                 UNIXCB(skb).consumed = skb->len;
568                 kfree_skb(skb);
569         }
570
571         if (path.dentry)
572                 path_put(&path);
573
574         sock_put(sk);
575
576         /* ---- Socket is dead now and most probably destroyed ---- */
577
578         /*
579          * Fixme: BSD difference: In BSD all sockets connected to us get
580          *        ECONNRESET and we die on the spot. In Linux we behave
581          *        like files and pipes do and wait for the last
582          *        dereference.
583          *
584          * Can't we simply set sock->err?
585          *
586          *        What the above comment does talk about? --ANK(980817)
587          */
588
589         if (unix_tot_inflight)
590                 unix_gc();              /* Garbage collect fds */
591 }
592
593 static void init_peercred(struct sock *sk)
594 {
595         put_pid(sk->sk_peer_pid);
596         if (sk->sk_peer_cred)
597                 put_cred(sk->sk_peer_cred);
598         sk->sk_peer_pid  = get_pid(task_tgid(current));
599         sk->sk_peer_cred = get_current_cred();
600 }
601
602 static void copy_peercred(struct sock *sk, struct sock *peersk)
603 {
604         put_pid(sk->sk_peer_pid);
605         if (sk->sk_peer_cred)
606                 put_cred(sk->sk_peer_cred);
607         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
608         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609 }
610
611 static int unix_listen(struct socket *sock, int backlog)
612 {
613         int err;
614         struct sock *sk = sock->sk;
615         struct unix_sock *u = unix_sk(sk);
616         struct pid *old_pid = NULL;
617
618         err = -EOPNOTSUPP;
619         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
620                 goto out;       /* Only stream/seqpacket sockets accept */
621         err = -EINVAL;
622         if (!u->addr)
623                 goto out;       /* No listens on an unbound socket */
624         unix_state_lock(sk);
625         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
626                 goto out_unlock;
627         if (backlog > sk->sk_max_ack_backlog)
628                 wake_up_interruptible_all(&u->peer_wait);
629         sk->sk_max_ack_backlog  = backlog;
630         sk->sk_state            = TCP_LISTEN;
631         /* set credentials so connect can copy them */
632         init_peercred(sk);
633         err = 0;
634
635 out_unlock:
636         unix_state_unlock(sk);
637         put_pid(old_pid);
638 out:
639         return err;
640 }
641
642 static int unix_release(struct socket *);
643 static int unix_bind(struct socket *, struct sockaddr *, int);
644 static int unix_stream_connect(struct socket *, struct sockaddr *,
645                                int addr_len, int flags);
646 static int unix_socketpair(struct socket *, struct socket *);
647 static int unix_accept(struct socket *, struct socket *, int, bool);
648 static int unix_getname(struct socket *, struct sockaddr *, int);
649 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
650 static __poll_t unix_dgram_poll(struct file *, struct socket *,
651                                     poll_table *);
652 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
653 #ifdef CONFIG_COMPAT
654 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
655 #endif
656 static int unix_shutdown(struct socket *, int);
657 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
658 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
659 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
660                                     size_t size, int flags);
661 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
662                                        struct pipe_inode_info *, size_t size,
663                                        unsigned int flags);
664 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
665 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
666 static int unix_dgram_connect(struct socket *, struct sockaddr *,
667                               int, int);
668 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
669 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
670                                   int);
671
672 static int unix_set_peek_off(struct sock *sk, int val)
673 {
674         struct unix_sock *u = unix_sk(sk);
675
676         if (mutex_lock_interruptible(&u->iolock))
677                 return -EINTR;
678
679         sk->sk_peek_off = val;
680         mutex_unlock(&u->iolock);
681
682         return 0;
683 }
684
685 #ifdef CONFIG_PROCFS
686 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
687 {
688         struct sock *sk = sock->sk;
689         struct unix_sock *u;
690
691         if (sk) {
692                 u = unix_sk(sock->sk);
693                 seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
694         }
695 }
696 #else
697 #define unix_show_fdinfo NULL
698 #endif
699
700 static const struct proto_ops unix_stream_ops = {
701         .family =       PF_UNIX,
702         .owner =        THIS_MODULE,
703         .release =      unix_release,
704         .bind =         unix_bind,
705         .connect =      unix_stream_connect,
706         .socketpair =   unix_socketpair,
707         .accept =       unix_accept,
708         .getname =      unix_getname,
709         .poll =         unix_poll,
710         .ioctl =        unix_ioctl,
711 #ifdef CONFIG_COMPAT
712         .compat_ioctl = unix_compat_ioctl,
713 #endif
714         .listen =       unix_listen,
715         .shutdown =     unix_shutdown,
716         .setsockopt =   sock_no_setsockopt,
717         .getsockopt =   sock_no_getsockopt,
718         .sendmsg =      unix_stream_sendmsg,
719         .recvmsg =      unix_stream_recvmsg,
720         .mmap =         sock_no_mmap,
721         .sendpage =     unix_stream_sendpage,
722         .splice_read =  unix_stream_splice_read,
723         .set_peek_off = unix_set_peek_off,
724         .show_fdinfo =  unix_show_fdinfo,
725 };
726
727 static const struct proto_ops unix_dgram_ops = {
728         .family =       PF_UNIX,
729         .owner =        THIS_MODULE,
730         .release =      unix_release,
731         .bind =         unix_bind,
732         .connect =      unix_dgram_connect,
733         .socketpair =   unix_socketpair,
734         .accept =       sock_no_accept,
735         .getname =      unix_getname,
736         .poll =         unix_dgram_poll,
737         .ioctl =        unix_ioctl,
738 #ifdef CONFIG_COMPAT
739         .compat_ioctl = unix_compat_ioctl,
740 #endif
741         .listen =       sock_no_listen,
742         .shutdown =     unix_shutdown,
743         .setsockopt =   sock_no_setsockopt,
744         .getsockopt =   sock_no_getsockopt,
745         .sendmsg =      unix_dgram_sendmsg,
746         .recvmsg =      unix_dgram_recvmsg,
747         .mmap =         sock_no_mmap,
748         .sendpage =     sock_no_sendpage,
749         .set_peek_off = unix_set_peek_off,
750         .show_fdinfo =  unix_show_fdinfo,
751 };
752
753 static const struct proto_ops unix_seqpacket_ops = {
754         .family =       PF_UNIX,
755         .owner =        THIS_MODULE,
756         .release =      unix_release,
757         .bind =         unix_bind,
758         .connect =      unix_stream_connect,
759         .socketpair =   unix_socketpair,
760         .accept =       unix_accept,
761         .getname =      unix_getname,
762         .poll =         unix_dgram_poll,
763         .ioctl =        unix_ioctl,
764 #ifdef CONFIG_COMPAT
765         .compat_ioctl = unix_compat_ioctl,
766 #endif
767         .listen =       unix_listen,
768         .shutdown =     unix_shutdown,
769         .setsockopt =   sock_no_setsockopt,
770         .getsockopt =   sock_no_getsockopt,
771         .sendmsg =      unix_seqpacket_sendmsg,
772         .recvmsg =      unix_seqpacket_recvmsg,
773         .mmap =         sock_no_mmap,
774         .sendpage =     sock_no_sendpage,
775         .set_peek_off = unix_set_peek_off,
776         .show_fdinfo =  unix_show_fdinfo,
777 };
778
779 static struct proto unix_proto = {
780         .name                   = "UNIX",
781         .owner                  = THIS_MODULE,
782         .obj_size               = sizeof(struct unix_sock),
783 };
784
785 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
786 {
787         struct sock *sk = NULL;
788         struct unix_sock *u;
789
790         atomic_long_inc(&unix_nr_socks);
791         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
792                 goto out;
793
794         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
795         if (!sk)
796                 goto out;
797
798         sock_init_data(sock, sk);
799
800         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
801         sk->sk_write_space      = unix_write_space;
802         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
803         sk->sk_destruct         = unix_sock_destructor;
804         u         = unix_sk(sk);
805         u->path.dentry = NULL;
806         u->path.mnt = NULL;
807         spin_lock_init(&u->lock);
808         atomic_long_set(&u->inflight, 0);
809         INIT_LIST_HEAD(&u->link);
810         mutex_init(&u->iolock); /* single task reading lock */
811         mutex_init(&u->bindlock); /* single task binding lock */
812         init_waitqueue_head(&u->peer_wait);
813         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
814         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
815         unix_insert_socket(unix_sockets_unbound(sk), sk);
816 out:
817         if (sk == NULL)
818                 atomic_long_dec(&unix_nr_socks);
819         else {
820                 local_bh_disable();
821                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
822                 local_bh_enable();
823         }
824         return sk;
825 }
826
827 static int unix_create(struct net *net, struct socket *sock, int protocol,
828                        int kern)
829 {
830         if (protocol && protocol != PF_UNIX)
831                 return -EPROTONOSUPPORT;
832
833         sock->state = SS_UNCONNECTED;
834
835         switch (sock->type) {
836         case SOCK_STREAM:
837                 sock->ops = &unix_stream_ops;
838                 break;
839                 /*
840                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
841                  *      nothing uses it.
842                  */
843         case SOCK_RAW:
844                 sock->type = SOCK_DGRAM;
845                 /* fall through */
846         case SOCK_DGRAM:
847                 sock->ops = &unix_dgram_ops;
848                 break;
849         case SOCK_SEQPACKET:
850                 sock->ops = &unix_seqpacket_ops;
851                 break;
852         default:
853                 return -ESOCKTNOSUPPORT;
854         }
855
856         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
857 }
858
859 static int unix_release(struct socket *sock)
860 {
861         struct sock *sk = sock->sk;
862
863         if (!sk)
864                 return 0;
865
866         unix_release_sock(sk, 0);
867         sock->sk = NULL;
868
869         return 0;
870 }
871
872 static int unix_autobind(struct socket *sock)
873 {
874         struct sock *sk = sock->sk;
875         struct net *net = sock_net(sk);
876         struct unix_sock *u = unix_sk(sk);
877         static u32 ordernum = 1;
878         struct unix_address *addr;
879         int err;
880         unsigned int retries = 0;
881
882         err = mutex_lock_interruptible(&u->bindlock);
883         if (err)
884                 return err;
885
886         err = 0;
887         if (u->addr)
888                 goto out;
889
890         err = -ENOMEM;
891         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
892         if (!addr)
893                 goto out;
894
895         addr->name->sun_family = AF_UNIX;
896         refcount_set(&addr->refcnt, 1);
897
898 retry:
899         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
900         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
901
902         spin_lock(&unix_table_lock);
903         ordernum = (ordernum+1)&0xFFFFF;
904
905         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
906                                       addr->hash)) {
907                 spin_unlock(&unix_table_lock);
908                 /*
909                  * __unix_find_socket_byname() may take long time if many names
910                  * are already in use.
911                  */
912                 cond_resched();
913                 /* Give up if all names seems to be in use. */
914                 if (retries++ == 0xFFFFF) {
915                         err = -ENOSPC;
916                         kfree(addr);
917                         goto out;
918                 }
919                 goto retry;
920         }
921         addr->hash ^= sk->sk_type;
922
923         __unix_remove_socket(sk);
924         smp_store_release(&u->addr, addr);
925         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
926         spin_unlock(&unix_table_lock);
927         err = 0;
928
929 out:    mutex_unlock(&u->bindlock);
930         return err;
931 }
932
933 static struct sock *unix_find_other(struct net *net,
934                                     struct sockaddr_un *sunname, int len,
935                                     int type, unsigned int hash, int *error)
936 {
937         struct sock *u;
938         struct path path;
939         int err = 0;
940
941         if (sunname->sun_path[0]) {
942                 struct inode *inode;
943                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
944                 if (err)
945                         goto fail;
946                 inode = d_backing_inode(path.dentry);
947                 err = inode_permission(inode, MAY_WRITE);
948                 if (err)
949                         goto put_fail;
950
951                 err = -ECONNREFUSED;
952                 if (!S_ISSOCK(inode->i_mode))
953                         goto put_fail;
954                 u = unix_find_socket_byinode(inode);
955                 if (!u)
956                         goto put_fail;
957
958                 if (u->sk_type == type)
959                         touch_atime(&path);
960
961                 path_put(&path);
962
963                 err = -EPROTOTYPE;
964                 if (u->sk_type != type) {
965                         sock_put(u);
966                         goto fail;
967                 }
968         } else {
969                 err = -ECONNREFUSED;
970                 u = unix_find_socket_byname(net, sunname, len, type, hash);
971                 if (u) {
972                         struct dentry *dentry;
973                         dentry = unix_sk(u)->path.dentry;
974                         if (dentry)
975                                 touch_atime(&unix_sk(u)->path);
976                 } else
977                         goto fail;
978         }
979         return u;
980
981 put_fail:
982         path_put(&path);
983 fail:
984         *error = err;
985         return NULL;
986 }
987
988 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
989 {
990         struct dentry *dentry;
991         struct path path;
992         int err = 0;
993         /*
994          * Get the parent directory, calculate the hash for last
995          * component.
996          */
997         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
998         err = PTR_ERR(dentry);
999         if (IS_ERR(dentry))
1000                 return err;
1001
1002         /*
1003          * All right, let's create it.
1004          */
1005         err = security_path_mknod(&path, dentry, mode, 0);
1006         if (!err) {
1007                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1008                 if (!err) {
1009                         res->mnt = mntget(path.mnt);
1010                         res->dentry = dget(dentry);
1011                 }
1012         }
1013         done_path_create(&path, dentry);
1014         return err;
1015 }
1016
1017 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1018 {
1019         struct sock *sk = sock->sk;
1020         struct net *net = sock_net(sk);
1021         struct unix_sock *u = unix_sk(sk);
1022         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1023         char *sun_path = sunaddr->sun_path;
1024         int err;
1025         unsigned int hash;
1026         struct unix_address *addr;
1027         struct hlist_head *list;
1028         struct path path = { };
1029
1030         err = -EINVAL;
1031         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1032             sunaddr->sun_family != AF_UNIX)
1033                 goto out;
1034
1035         if (addr_len == sizeof(short)) {
1036                 err = unix_autobind(sock);
1037                 goto out;
1038         }
1039
1040         err = unix_mkname(sunaddr, addr_len, &hash);
1041         if (err < 0)
1042                 goto out;
1043         addr_len = err;
1044
1045         if (sun_path[0]) {
1046                 umode_t mode = S_IFSOCK |
1047                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1048                 err = unix_mknod(sun_path, mode, &path);
1049                 if (err) {
1050                         if (err == -EEXIST)
1051                                 err = -EADDRINUSE;
1052                         goto out;
1053                 }
1054         }
1055
1056         err = mutex_lock_interruptible(&u->bindlock);
1057         if (err)
1058                 goto out_put;
1059
1060         err = -EINVAL;
1061         if (u->addr)
1062                 goto out_up;
1063
1064         err = -ENOMEM;
1065         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1066         if (!addr)
1067                 goto out_up;
1068
1069         memcpy(addr->name, sunaddr, addr_len);
1070         addr->len = addr_len;
1071         addr->hash = hash ^ sk->sk_type;
1072         refcount_set(&addr->refcnt, 1);
1073
1074         if (sun_path[0]) {
1075                 addr->hash = UNIX_HASH_SIZE;
1076                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1077                 spin_lock(&unix_table_lock);
1078                 u->path = path;
1079                 list = &unix_socket_table[hash];
1080         } else {
1081                 spin_lock(&unix_table_lock);
1082                 err = -EADDRINUSE;
1083                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1084                                               sk->sk_type, hash)) {
1085                         unix_release_addr(addr);
1086                         goto out_unlock;
1087                 }
1088
1089                 list = &unix_socket_table[addr->hash];
1090         }
1091
1092         err = 0;
1093         __unix_remove_socket(sk);
1094         smp_store_release(&u->addr, addr);
1095         __unix_insert_socket(list, sk);
1096
1097 out_unlock:
1098         spin_unlock(&unix_table_lock);
1099 out_up:
1100         mutex_unlock(&u->bindlock);
1101 out_put:
1102         if (err)
1103                 path_put(&path);
1104 out:
1105         return err;
1106 }
1107
1108 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1109 {
1110         if (unlikely(sk1 == sk2) || !sk2) {
1111                 unix_state_lock(sk1);
1112                 return;
1113         }
1114         if (sk1 < sk2) {
1115                 unix_state_lock(sk1);
1116                 unix_state_lock_nested(sk2);
1117         } else {
1118                 unix_state_lock(sk2);
1119                 unix_state_lock_nested(sk1);
1120         }
1121 }
1122
1123 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1124 {
1125         if (unlikely(sk1 == sk2) || !sk2) {
1126                 unix_state_unlock(sk1);
1127                 return;
1128         }
1129         unix_state_unlock(sk1);
1130         unix_state_unlock(sk2);
1131 }
1132
1133 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1134                               int alen, int flags)
1135 {
1136         struct sock *sk = sock->sk;
1137         struct net *net = sock_net(sk);
1138         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1139         struct sock *other;
1140         unsigned int hash;
1141         int err;
1142
1143         err = -EINVAL;
1144         if (alen < offsetofend(struct sockaddr, sa_family))
1145                 goto out;
1146
1147         if (addr->sa_family != AF_UNSPEC) {
1148                 err = unix_mkname(sunaddr, alen, &hash);
1149                 if (err < 0)
1150                         goto out;
1151                 alen = err;
1152
1153                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1154                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1155                         goto out;
1156
1157 restart:
1158                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1159                 if (!other)
1160                         goto out;
1161
1162                 unix_state_double_lock(sk, other);
1163
1164                 /* Apparently VFS overslept socket death. Retry. */
1165                 if (sock_flag(other, SOCK_DEAD)) {
1166                         unix_state_double_unlock(sk, other);
1167                         sock_put(other);
1168                         goto restart;
1169                 }
1170
1171                 err = -EPERM;
1172                 if (!unix_may_send(sk, other))
1173                         goto out_unlock;
1174
1175                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1176                 if (err)
1177                         goto out_unlock;
1178
1179         } else {
1180                 /*
1181                  *      1003.1g breaking connected state with AF_UNSPEC
1182                  */
1183                 other = NULL;
1184                 unix_state_double_lock(sk, other);
1185         }
1186
1187         /*
1188          * If it was connected, reconnect.
1189          */
1190         if (unix_peer(sk)) {
1191                 struct sock *old_peer = unix_peer(sk);
1192                 unix_peer(sk) = other;
1193                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1194
1195                 unix_state_double_unlock(sk, other);
1196
1197                 if (other != old_peer)
1198                         unix_dgram_disconnected(sk, old_peer);
1199                 sock_put(old_peer);
1200         } else {
1201                 unix_peer(sk) = other;
1202                 unix_state_double_unlock(sk, other);
1203         }
1204         return 0;
1205
1206 out_unlock:
1207         unix_state_double_unlock(sk, other);
1208         sock_put(other);
1209 out:
1210         return err;
1211 }
1212
1213 static long unix_wait_for_peer(struct sock *other, long timeo)
1214 {
1215         struct unix_sock *u = unix_sk(other);
1216         int sched;
1217         DEFINE_WAIT(wait);
1218
1219         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1220
1221         sched = !sock_flag(other, SOCK_DEAD) &&
1222                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1223                 unix_recvq_full(other);
1224
1225         unix_state_unlock(other);
1226
1227         if (sched)
1228                 timeo = schedule_timeout(timeo);
1229
1230         finish_wait(&u->peer_wait, &wait);
1231         return timeo;
1232 }
1233
1234 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1235                                int addr_len, int flags)
1236 {
1237         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1238         struct sock *sk = sock->sk;
1239         struct net *net = sock_net(sk);
1240         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1241         struct sock *newsk = NULL;
1242         struct sock *other = NULL;
1243         struct sk_buff *skb = NULL;
1244         unsigned int hash;
1245         int st;
1246         int err;
1247         long timeo;
1248
1249         err = unix_mkname(sunaddr, addr_len, &hash);
1250         if (err < 0)
1251                 goto out;
1252         addr_len = err;
1253
1254         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1255             (err = unix_autobind(sock)) != 0)
1256                 goto out;
1257
1258         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1259
1260         /* First of all allocate resources.
1261            If we will make it after state is locked,
1262            we will have to recheck all again in any case.
1263          */
1264
1265         err = -ENOMEM;
1266
1267         /* create new sock for complete connection */
1268         newsk = unix_create1(sock_net(sk), NULL, 0);
1269         if (newsk == NULL)
1270                 goto out;
1271
1272         /* Allocate skb for sending to listening sock */
1273         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1274         if (skb == NULL)
1275                 goto out;
1276
1277 restart:
1278         /*  Find listening sock. */
1279         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1280         if (!other)
1281                 goto out;
1282
1283         /* Latch state of peer */
1284         unix_state_lock(other);
1285
1286         /* Apparently VFS overslept socket death. Retry. */
1287         if (sock_flag(other, SOCK_DEAD)) {
1288                 unix_state_unlock(other);
1289                 sock_put(other);
1290                 goto restart;
1291         }
1292
1293         err = -ECONNREFUSED;
1294         if (other->sk_state != TCP_LISTEN)
1295                 goto out_unlock;
1296         if (other->sk_shutdown & RCV_SHUTDOWN)
1297                 goto out_unlock;
1298
1299         if (unix_recvq_full(other)) {
1300                 err = -EAGAIN;
1301                 if (!timeo)
1302                         goto out_unlock;
1303
1304                 timeo = unix_wait_for_peer(other, timeo);
1305
1306                 err = sock_intr_errno(timeo);
1307                 if (signal_pending(current))
1308                         goto out;
1309                 sock_put(other);
1310                 goto restart;
1311         }
1312
1313         /* Latch our state.
1314
1315            It is tricky place. We need to grab our state lock and cannot
1316            drop lock on peer. It is dangerous because deadlock is
1317            possible. Connect to self case and simultaneous
1318            attempt to connect are eliminated by checking socket
1319            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1320            check this before attempt to grab lock.
1321
1322            Well, and we have to recheck the state after socket locked.
1323          */
1324         st = sk->sk_state;
1325
1326         switch (st) {
1327         case TCP_CLOSE:
1328                 /* This is ok... continue with connect */
1329                 break;
1330         case TCP_ESTABLISHED:
1331                 /* Socket is already connected */
1332                 err = -EISCONN;
1333                 goto out_unlock;
1334         default:
1335                 err = -EINVAL;
1336                 goto out_unlock;
1337         }
1338
1339         unix_state_lock_nested(sk);
1340
1341         if (sk->sk_state != st) {
1342                 unix_state_unlock(sk);
1343                 unix_state_unlock(other);
1344                 sock_put(other);
1345                 goto restart;
1346         }
1347
1348         err = security_unix_stream_connect(sk, other, newsk);
1349         if (err) {
1350                 unix_state_unlock(sk);
1351                 goto out_unlock;
1352         }
1353
1354         /* The way is open! Fastly set all the necessary fields... */
1355
1356         sock_hold(sk);
1357         unix_peer(newsk)        = sk;
1358         newsk->sk_state         = TCP_ESTABLISHED;
1359         newsk->sk_type          = sk->sk_type;
1360         init_peercred(newsk);
1361         newu = unix_sk(newsk);
1362         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1363         otheru = unix_sk(other);
1364
1365         /* copy address information from listening to new sock
1366          *
1367          * The contents of *(otheru->addr) and otheru->path
1368          * are seen fully set up here, since we have found
1369          * otheru in hash under unix_table_lock.  Insertion
1370          * into the hash chain we'd found it in had been done
1371          * in an earlier critical area protected by unix_table_lock,
1372          * the same one where we'd set *(otheru->addr) contents,
1373          * as well as otheru->path and otheru->addr itself.
1374          *
1375          * Using smp_store_release() here to set newu->addr
1376          * is enough to make those stores, as well as stores
1377          * to newu->path visible to anyone who gets newu->addr
1378          * by smp_load_acquire().  IOW, the same warranties
1379          * as for unix_sock instances bound in unix_bind() or
1380          * in unix_autobind().
1381          */
1382         if (otheru->path.dentry) {
1383                 path_get(&otheru->path);
1384                 newu->path = otheru->path;
1385         }
1386         refcount_inc(&otheru->addr->refcnt);
1387         smp_store_release(&newu->addr, otheru->addr);
1388
1389         /* Set credentials */
1390         copy_peercred(sk, other);
1391
1392         sock->state     = SS_CONNECTED;
1393         sk->sk_state    = TCP_ESTABLISHED;
1394         sock_hold(newsk);
1395
1396         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1397         unix_peer(sk)   = newsk;
1398
1399         unix_state_unlock(sk);
1400
1401         /* take ten and and send info to listening sock */
1402         spin_lock(&other->sk_receive_queue.lock);
1403         __skb_queue_tail(&other->sk_receive_queue, skb);
1404         spin_unlock(&other->sk_receive_queue.lock);
1405         unix_state_unlock(other);
1406         other->sk_data_ready(other);
1407         sock_put(other);
1408         return 0;
1409
1410 out_unlock:
1411         if (other)
1412                 unix_state_unlock(other);
1413
1414 out:
1415         kfree_skb(skb);
1416         if (newsk)
1417                 unix_release_sock(newsk, 0);
1418         if (other)
1419                 sock_put(other);
1420         return err;
1421 }
1422
1423 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1424 {
1425         struct sock *ska = socka->sk, *skb = sockb->sk;
1426
1427         /* Join our sockets back to back */
1428         sock_hold(ska);
1429         sock_hold(skb);
1430         unix_peer(ska) = skb;
1431         unix_peer(skb) = ska;
1432         init_peercred(ska);
1433         init_peercred(skb);
1434
1435         if (ska->sk_type != SOCK_DGRAM) {
1436                 ska->sk_state = TCP_ESTABLISHED;
1437                 skb->sk_state = TCP_ESTABLISHED;
1438                 socka->state  = SS_CONNECTED;
1439                 sockb->state  = SS_CONNECTED;
1440         }
1441         return 0;
1442 }
1443
1444 static void unix_sock_inherit_flags(const struct socket *old,
1445                                     struct socket *new)
1446 {
1447         if (test_bit(SOCK_PASSCRED, &old->flags))
1448                 set_bit(SOCK_PASSCRED, &new->flags);
1449         if (test_bit(SOCK_PASSSEC, &old->flags))
1450                 set_bit(SOCK_PASSSEC, &new->flags);
1451 }
1452
1453 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1454                        bool kern)
1455 {
1456         struct sock *sk = sock->sk;
1457         struct sock *tsk;
1458         struct sk_buff *skb;
1459         int err;
1460
1461         err = -EOPNOTSUPP;
1462         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1463                 goto out;
1464
1465         err = -EINVAL;
1466         if (sk->sk_state != TCP_LISTEN)
1467                 goto out;
1468
1469         /* If socket state is TCP_LISTEN it cannot change (for now...),
1470          * so that no locks are necessary.
1471          */
1472
1473         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1474         if (!skb) {
1475                 /* This means receive shutdown. */
1476                 if (err == 0)
1477                         err = -EINVAL;
1478                 goto out;
1479         }
1480
1481         tsk = skb->sk;
1482         skb_free_datagram(sk, skb);
1483         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1484
1485         /* attach accepted sock to socket */
1486         unix_state_lock(tsk);
1487         newsock->state = SS_CONNECTED;
1488         unix_sock_inherit_flags(sock, newsock);
1489         sock_graft(tsk, newsock);
1490         unix_state_unlock(tsk);
1491         return 0;
1492
1493 out:
1494         return err;
1495 }
1496
1497
1498 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1499 {
1500         struct sock *sk = sock->sk;
1501         struct unix_address *addr;
1502         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1503         int err = 0;
1504
1505         if (peer) {
1506                 sk = unix_peer_get(sk);
1507
1508                 err = -ENOTCONN;
1509                 if (!sk)
1510                         goto out;
1511                 err = 0;
1512         } else {
1513                 sock_hold(sk);
1514         }
1515
1516         addr = smp_load_acquire(&unix_sk(sk)->addr);
1517         if (!addr) {
1518                 sunaddr->sun_family = AF_UNIX;
1519                 sunaddr->sun_path[0] = 0;
1520                 err = sizeof(short);
1521         } else {
1522                 err = addr->len;
1523                 memcpy(sunaddr, addr->name, addr->len);
1524         }
1525         sock_put(sk);
1526 out:
1527         return err;
1528 }
1529
1530 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1531 {
1532         int err = 0;
1533
1534         UNIXCB(skb).pid  = get_pid(scm->pid);
1535         UNIXCB(skb).uid = scm->creds.uid;
1536         UNIXCB(skb).gid = scm->creds.gid;
1537         UNIXCB(skb).fp = NULL;
1538         unix_get_secdata(scm, skb);
1539         if (scm->fp && send_fds)
1540                 err = unix_attach_fds(scm, skb);
1541
1542         skb->destructor = unix_destruct_scm;
1543         return err;
1544 }
1545
1546 static bool unix_passcred_enabled(const struct socket *sock,
1547                                   const struct sock *other)
1548 {
1549         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1550                !other->sk_socket ||
1551                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1552 }
1553
1554 /*
1555  * Some apps rely on write() giving SCM_CREDENTIALS
1556  * We include credentials if source or destination socket
1557  * asserted SOCK_PASSCRED.
1558  */
1559 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1560                             const struct sock *other)
1561 {
1562         if (UNIXCB(skb).pid)
1563                 return;
1564         if (unix_passcred_enabled(sock, other)) {
1565                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1566                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1567         }
1568 }
1569
1570 static int maybe_init_creds(struct scm_cookie *scm,
1571                             struct socket *socket,
1572                             const struct sock *other)
1573 {
1574         int err;
1575         struct msghdr msg = { .msg_controllen = 0 };
1576
1577         err = scm_send(socket, &msg, scm, false);
1578         if (err)
1579                 return err;
1580
1581         if (unix_passcred_enabled(socket, other)) {
1582                 scm->pid = get_pid(task_tgid(current));
1583                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1584         }
1585         return err;
1586 }
1587
1588 static bool unix_skb_scm_eq(struct sk_buff *skb,
1589                             struct scm_cookie *scm)
1590 {
1591         const struct unix_skb_parms *u = &UNIXCB(skb);
1592
1593         return u->pid == scm->pid &&
1594                uid_eq(u->uid, scm->creds.uid) &&
1595                gid_eq(u->gid, scm->creds.gid) &&
1596                unix_secdata_eq(scm, skb);
1597 }
1598
1599 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1600 {
1601         struct scm_fp_list *fp = UNIXCB(skb).fp;
1602         struct unix_sock *u = unix_sk(sk);
1603
1604         lockdep_assert_held(&sk->sk_receive_queue.lock);
1605
1606         if (unlikely(fp && fp->count))
1607                 u->scm_stat.nr_fds += fp->count;
1608 }
1609
1610 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1611 {
1612         struct scm_fp_list *fp = UNIXCB(skb).fp;
1613         struct unix_sock *u = unix_sk(sk);
1614
1615         lockdep_assert_held(&sk->sk_receive_queue.lock);
1616
1617         if (unlikely(fp && fp->count))
1618                 u->scm_stat.nr_fds -= fp->count;
1619 }
1620
1621 /*
1622  *      Send AF_UNIX data.
1623  */
1624
1625 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1626                               size_t len)
1627 {
1628         struct sock *sk = sock->sk;
1629         struct net *net = sock_net(sk);
1630         struct unix_sock *u = unix_sk(sk);
1631         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1632         struct sock *other = NULL;
1633         int namelen = 0; /* fake GCC */
1634         int err;
1635         unsigned int hash;
1636         struct sk_buff *skb;
1637         long timeo;
1638         struct scm_cookie scm;
1639         int data_len = 0;
1640         int sk_locked;
1641
1642         wait_for_unix_gc();
1643         err = scm_send(sock, msg, &scm, false);
1644         if (err < 0)
1645                 return err;
1646
1647         err = -EOPNOTSUPP;
1648         if (msg->msg_flags&MSG_OOB)
1649                 goto out;
1650
1651         if (msg->msg_namelen) {
1652                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1653                 if (err < 0)
1654                         goto out;
1655                 namelen = err;
1656         } else {
1657                 sunaddr = NULL;
1658                 err = -ENOTCONN;
1659                 other = unix_peer_get(sk);
1660                 if (!other)
1661                         goto out;
1662         }
1663
1664         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1665             && (err = unix_autobind(sock)) != 0)
1666                 goto out;
1667
1668         err = -EMSGSIZE;
1669         if (len > sk->sk_sndbuf - 32)
1670                 goto out;
1671
1672         if (len > SKB_MAX_ALLOC) {
1673                 data_len = min_t(size_t,
1674                                  len - SKB_MAX_ALLOC,
1675                                  MAX_SKB_FRAGS * PAGE_SIZE);
1676                 data_len = PAGE_ALIGN(data_len);
1677
1678                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1679         }
1680
1681         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1682                                    msg->msg_flags & MSG_DONTWAIT, &err,
1683                                    PAGE_ALLOC_COSTLY_ORDER);
1684         if (skb == NULL)
1685                 goto out;
1686
1687         err = unix_scm_to_skb(&scm, skb, true);
1688         if (err < 0)
1689                 goto out_free;
1690
1691         skb_put(skb, len - data_len);
1692         skb->data_len = data_len;
1693         skb->len = len;
1694         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1695         if (err)
1696                 goto out_free;
1697
1698         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1699
1700 restart:
1701         if (!other) {
1702                 err = -ECONNRESET;
1703                 if (sunaddr == NULL)
1704                         goto out_free;
1705
1706                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1707                                         hash, &err);
1708                 if (other == NULL)
1709                         goto out_free;
1710         }
1711
1712         if (sk_filter(other, skb) < 0) {
1713                 /* Toss the packet but do not return any error to the sender */
1714                 err = len;
1715                 goto out_free;
1716         }
1717
1718         sk_locked = 0;
1719         unix_state_lock(other);
1720 restart_locked:
1721         err = -EPERM;
1722         if (!unix_may_send(sk, other))
1723                 goto out_unlock;
1724
1725         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1726                 /*
1727                  *      Check with 1003.1g - what should
1728                  *      datagram error
1729                  */
1730                 unix_state_unlock(other);
1731                 sock_put(other);
1732
1733                 if (!sk_locked)
1734                         unix_state_lock(sk);
1735
1736                 err = 0;
1737                 if (unix_peer(sk) == other) {
1738                         unix_peer(sk) = NULL;
1739                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1740
1741                         unix_state_unlock(sk);
1742
1743                         unix_dgram_disconnected(sk, other);
1744                         sock_put(other);
1745                         err = -ECONNREFUSED;
1746                 } else {
1747                         unix_state_unlock(sk);
1748                 }
1749
1750                 other = NULL;
1751                 if (err)
1752                         goto out_free;
1753                 goto restart;
1754         }
1755
1756         err = -EPIPE;
1757         if (other->sk_shutdown & RCV_SHUTDOWN)
1758                 goto out_unlock;
1759
1760         if (sk->sk_type != SOCK_SEQPACKET) {
1761                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1762                 if (err)
1763                         goto out_unlock;
1764         }
1765
1766         /* other == sk && unix_peer(other) != sk if
1767          * - unix_peer(sk) == NULL, destination address bound to sk
1768          * - unix_peer(sk) == sk by time of get but disconnected before lock
1769          */
1770         if (other != sk &&
1771             unlikely(unix_peer(other) != sk &&
1772             unix_recvq_full_lockless(other))) {
1773                 if (timeo) {
1774                         timeo = unix_wait_for_peer(other, timeo);
1775
1776                         err = sock_intr_errno(timeo);
1777                         if (signal_pending(current))
1778                                 goto out_free;
1779
1780                         goto restart;
1781                 }
1782
1783                 if (!sk_locked) {
1784                         unix_state_unlock(other);
1785                         unix_state_double_lock(sk, other);
1786                 }
1787
1788                 if (unix_peer(sk) != other ||
1789                     unix_dgram_peer_wake_me(sk, other)) {
1790                         err = -EAGAIN;
1791                         sk_locked = 1;
1792                         goto out_unlock;
1793                 }
1794
1795                 if (!sk_locked) {
1796                         sk_locked = 1;
1797                         goto restart_locked;
1798                 }
1799         }
1800
1801         if (unlikely(sk_locked))
1802                 unix_state_unlock(sk);
1803
1804         if (sock_flag(other, SOCK_RCVTSTAMP))
1805                 __net_timestamp(skb);
1806         maybe_add_creds(skb, sock, other);
1807         spin_lock(&other->sk_receive_queue.lock);
1808         scm_stat_add(other, skb);
1809         __skb_queue_tail(&other->sk_receive_queue, skb);
1810         spin_unlock(&other->sk_receive_queue.lock);
1811         unix_state_unlock(other);
1812         other->sk_data_ready(other);
1813         sock_put(other);
1814         scm_destroy(&scm);
1815         return len;
1816
1817 out_unlock:
1818         if (sk_locked)
1819                 unix_state_unlock(sk);
1820         unix_state_unlock(other);
1821 out_free:
1822         kfree_skb(skb);
1823 out:
1824         if (other)
1825                 sock_put(other);
1826         scm_destroy(&scm);
1827         return err;
1828 }
1829
1830 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1831  * bytes, and a minimum of a full page.
1832  */
1833 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1834
1835 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1836                                size_t len)
1837 {
1838         struct sock *sk = sock->sk;
1839         struct sock *other = NULL;
1840         int err, size;
1841         struct sk_buff *skb;
1842         int sent = 0;
1843         struct scm_cookie scm;
1844         bool fds_sent = false;
1845         int data_len;
1846
1847         wait_for_unix_gc();
1848         err = scm_send(sock, msg, &scm, false);
1849         if (err < 0)
1850                 return err;
1851
1852         err = -EOPNOTSUPP;
1853         if (msg->msg_flags&MSG_OOB)
1854                 goto out_err;
1855
1856         if (msg->msg_namelen) {
1857                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1858                 goto out_err;
1859         } else {
1860                 err = -ENOTCONN;
1861                 other = unix_peer(sk);
1862                 if (!other)
1863                         goto out_err;
1864         }
1865
1866         if (sk->sk_shutdown & SEND_SHUTDOWN)
1867                 goto pipe_err;
1868
1869         while (sent < len) {
1870                 size = len - sent;
1871
1872                 /* Keep two messages in the pipe so it schedules better */
1873                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1874
1875                 /* allow fallback to order-0 allocations */
1876                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1877
1878                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1879
1880                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1881
1882                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1883                                            msg->msg_flags & MSG_DONTWAIT, &err,
1884                                            get_order(UNIX_SKB_FRAGS_SZ));
1885                 if (!skb)
1886                         goto out_err;
1887
1888                 /* Only send the fds in the first buffer */
1889                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1890                 if (err < 0) {
1891                         kfree_skb(skb);
1892                         goto out_err;
1893                 }
1894                 fds_sent = true;
1895
1896                 skb_put(skb, size - data_len);
1897                 skb->data_len = data_len;
1898                 skb->len = size;
1899                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1900                 if (err) {
1901                         kfree_skb(skb);
1902                         goto out_err;
1903                 }
1904
1905                 unix_state_lock(other);
1906
1907                 if (sock_flag(other, SOCK_DEAD) ||
1908                     (other->sk_shutdown & RCV_SHUTDOWN))
1909                         goto pipe_err_free;
1910
1911                 maybe_add_creds(skb, sock, other);
1912                 spin_lock(&other->sk_receive_queue.lock);
1913                 scm_stat_add(other, skb);
1914                 __skb_queue_tail(&other->sk_receive_queue, skb);
1915                 spin_unlock(&other->sk_receive_queue.lock);
1916                 unix_state_unlock(other);
1917                 other->sk_data_ready(other);
1918                 sent += size;
1919         }
1920
1921         scm_destroy(&scm);
1922
1923         return sent;
1924
1925 pipe_err_free:
1926         unix_state_unlock(other);
1927         kfree_skb(skb);
1928 pipe_err:
1929         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1930                 send_sig(SIGPIPE, current, 0);
1931         err = -EPIPE;
1932 out_err:
1933         scm_destroy(&scm);
1934         return sent ? : err;
1935 }
1936
1937 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1938                                     int offset, size_t size, int flags)
1939 {
1940         int err;
1941         bool send_sigpipe = false;
1942         bool init_scm = true;
1943         struct scm_cookie scm;
1944         struct sock *other, *sk = socket->sk;
1945         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1946
1947         if (flags & MSG_OOB)
1948                 return -EOPNOTSUPP;
1949
1950         other = unix_peer(sk);
1951         if (!other || sk->sk_state != TCP_ESTABLISHED)
1952                 return -ENOTCONN;
1953
1954         if (false) {
1955 alloc_skb:
1956                 unix_state_unlock(other);
1957                 mutex_unlock(&unix_sk(other)->iolock);
1958                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1959                                               &err, 0);
1960                 if (!newskb)
1961                         goto err;
1962         }
1963
1964         /* we must acquire iolock as we modify already present
1965          * skbs in the sk_receive_queue and mess with skb->len
1966          */
1967         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1968         if (err) {
1969                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1970                 goto err;
1971         }
1972
1973         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1974                 err = -EPIPE;
1975                 send_sigpipe = true;
1976                 goto err_unlock;
1977         }
1978
1979         unix_state_lock(other);
1980
1981         if (sock_flag(other, SOCK_DEAD) ||
1982             other->sk_shutdown & RCV_SHUTDOWN) {
1983                 err = -EPIPE;
1984                 send_sigpipe = true;
1985                 goto err_state_unlock;
1986         }
1987
1988         if (init_scm) {
1989                 err = maybe_init_creds(&scm, socket, other);
1990                 if (err)
1991                         goto err_state_unlock;
1992                 init_scm = false;
1993         }
1994
1995         skb = skb_peek_tail(&other->sk_receive_queue);
1996         if (tail && tail == skb) {
1997                 skb = newskb;
1998         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1999                 if (newskb) {
2000                         skb = newskb;
2001                 } else {
2002                         tail = skb;
2003                         goto alloc_skb;
2004                 }
2005         } else if (newskb) {
2006                 /* this is fast path, we don't necessarily need to
2007                  * call to kfree_skb even though with newskb == NULL
2008                  * this - does no harm
2009                  */
2010                 consume_skb(newskb);
2011                 newskb = NULL;
2012         }
2013
2014         if (skb_append_pagefrags(skb, page, offset, size)) {
2015                 tail = skb;
2016                 goto alloc_skb;
2017         }
2018
2019         skb->len += size;
2020         skb->data_len += size;
2021         skb->truesize += size;
2022         refcount_add(size, &sk->sk_wmem_alloc);
2023
2024         if (newskb) {
2025                 err = unix_scm_to_skb(&scm, skb, false);
2026                 if (err)
2027                         goto err_state_unlock;
2028                 spin_lock(&other->sk_receive_queue.lock);
2029                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2030                 spin_unlock(&other->sk_receive_queue.lock);
2031         }
2032
2033         unix_state_unlock(other);
2034         mutex_unlock(&unix_sk(other)->iolock);
2035
2036         other->sk_data_ready(other);
2037         scm_destroy(&scm);
2038         return size;
2039
2040 err_state_unlock:
2041         unix_state_unlock(other);
2042 err_unlock:
2043         mutex_unlock(&unix_sk(other)->iolock);
2044 err:
2045         kfree_skb(newskb);
2046         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2047                 send_sig(SIGPIPE, current, 0);
2048         if (!init_scm)
2049                 scm_destroy(&scm);
2050         return err;
2051 }
2052
2053 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2054                                   size_t len)
2055 {
2056         int err;
2057         struct sock *sk = sock->sk;
2058
2059         err = sock_error(sk);
2060         if (err)
2061                 return err;
2062
2063         if (sk->sk_state != TCP_ESTABLISHED)
2064                 return -ENOTCONN;
2065
2066         if (msg->msg_namelen)
2067                 msg->msg_namelen = 0;
2068
2069         return unix_dgram_sendmsg(sock, msg, len);
2070 }
2071
2072 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2073                                   size_t size, int flags)
2074 {
2075         struct sock *sk = sock->sk;
2076
2077         if (sk->sk_state != TCP_ESTABLISHED)
2078                 return -ENOTCONN;
2079
2080         return unix_dgram_recvmsg(sock, msg, size, flags);
2081 }
2082
2083 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2084 {
2085         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2086
2087         if (addr) {
2088                 msg->msg_namelen = addr->len;
2089                 memcpy(msg->msg_name, addr->name, addr->len);
2090         }
2091 }
2092
2093 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2094                               size_t size, int flags)
2095 {
2096         struct scm_cookie scm;
2097         struct sock *sk = sock->sk;
2098         struct unix_sock *u = unix_sk(sk);
2099         struct sk_buff *skb, *last;
2100         long timeo;
2101         int skip;
2102         int err;
2103
2104         err = -EOPNOTSUPP;
2105         if (flags&MSG_OOB)
2106                 goto out;
2107
2108         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2109
2110         do {
2111                 mutex_lock(&u->iolock);
2112
2113                 skip = sk_peek_offset(sk, flags);
2114                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2115                                               scm_stat_del, &skip, &err, &last);
2116                 if (skb)
2117                         break;
2118
2119                 mutex_unlock(&u->iolock);
2120
2121                 if (err != -EAGAIN)
2122                         break;
2123         } while (timeo &&
2124                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2125                                               &err, &timeo, last));
2126
2127         if (!skb) { /* implies iolock unlocked */
2128                 unix_state_lock(sk);
2129                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2130                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2131                     (sk->sk_shutdown & RCV_SHUTDOWN))
2132                         err = 0;
2133                 unix_state_unlock(sk);
2134                 goto out;
2135         }
2136
2137         if (wq_has_sleeper(&u->peer_wait))
2138                 wake_up_interruptible_sync_poll(&u->peer_wait,
2139                                                 EPOLLOUT | EPOLLWRNORM |
2140                                                 EPOLLWRBAND);
2141
2142         if (msg->msg_name)
2143                 unix_copy_addr(msg, skb->sk);
2144
2145         if (size > skb->len - skip)
2146                 size = skb->len - skip;
2147         else if (size < skb->len - skip)
2148                 msg->msg_flags |= MSG_TRUNC;
2149
2150         err = skb_copy_datagram_msg(skb, skip, msg, size);
2151         if (err)
2152                 goto out_free;
2153
2154         if (sock_flag(sk, SOCK_RCVTSTAMP))
2155                 __sock_recv_timestamp(msg, sk, skb);
2156
2157         memset(&scm, 0, sizeof(scm));
2158
2159         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2160         unix_set_secdata(&scm, skb);
2161
2162         if (!(flags & MSG_PEEK)) {
2163                 if (UNIXCB(skb).fp)
2164                         unix_detach_fds(&scm, skb);
2165
2166                 sk_peek_offset_bwd(sk, skb->len);
2167         } else {
2168                 /* It is questionable: on PEEK we could:
2169                    - do not return fds - good, but too simple 8)
2170                    - return fds, and do not return them on read (old strategy,
2171                      apparently wrong)
2172                    - clone fds (I chose it for now, it is the most universal
2173                      solution)
2174
2175                    POSIX 1003.1g does not actually define this clearly
2176                    at all. POSIX 1003.1g doesn't define a lot of things
2177                    clearly however!
2178
2179                 */
2180
2181                 sk_peek_offset_fwd(sk, size);
2182
2183                 if (UNIXCB(skb).fp)
2184                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2185         }
2186         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2187
2188         scm_recv(sock, msg, &scm, flags);
2189
2190 out_free:
2191         skb_free_datagram(sk, skb);
2192         mutex_unlock(&u->iolock);
2193 out:
2194         return err;
2195 }
2196
2197 /*
2198  *      Sleep until more data has arrived. But check for races..
2199  */
2200 static long unix_stream_data_wait(struct sock *sk, long timeo,
2201                                   struct sk_buff *last, unsigned int last_len,
2202                                   bool freezable)
2203 {
2204         struct sk_buff *tail;
2205         DEFINE_WAIT(wait);
2206
2207         unix_state_lock(sk);
2208
2209         for (;;) {
2210                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2211
2212                 tail = skb_peek_tail(&sk->sk_receive_queue);
2213                 if (tail != last ||
2214                     (tail && tail->len != last_len) ||
2215                     sk->sk_err ||
2216                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2217                     signal_pending(current) ||
2218                     !timeo)
2219                         break;
2220
2221                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2222                 unix_state_unlock(sk);
2223                 if (freezable)
2224                         timeo = freezable_schedule_timeout(timeo);
2225                 else
2226                         timeo = schedule_timeout(timeo);
2227                 unix_state_lock(sk);
2228
2229                 if (sock_flag(sk, SOCK_DEAD))
2230                         break;
2231
2232                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2233         }
2234
2235         finish_wait(sk_sleep(sk), &wait);
2236         unix_state_unlock(sk);
2237         return timeo;
2238 }
2239
2240 static unsigned int unix_skb_len(const struct sk_buff *skb)
2241 {
2242         return skb->len - UNIXCB(skb).consumed;
2243 }
2244
2245 struct unix_stream_read_state {
2246         int (*recv_actor)(struct sk_buff *, int, int,
2247                           struct unix_stream_read_state *);
2248         struct socket *socket;
2249         struct msghdr *msg;
2250         struct pipe_inode_info *pipe;
2251         size_t size;
2252         int flags;
2253         unsigned int splice_flags;
2254 };
2255
2256 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2257                                     bool freezable)
2258 {
2259         struct scm_cookie scm;
2260         struct socket *sock = state->socket;
2261         struct sock *sk = sock->sk;
2262         struct unix_sock *u = unix_sk(sk);
2263         int copied = 0;
2264         int flags = state->flags;
2265         int noblock = flags & MSG_DONTWAIT;
2266         bool check_creds = false;
2267         int target;
2268         int err = 0;
2269         long timeo;
2270         int skip;
2271         size_t size = state->size;
2272         unsigned int last_len;
2273
2274         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2275                 err = -EINVAL;
2276                 goto out;
2277         }
2278
2279         if (unlikely(flags & MSG_OOB)) {
2280                 err = -EOPNOTSUPP;
2281                 goto out;
2282         }
2283
2284         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2285         timeo = sock_rcvtimeo(sk, noblock);
2286
2287         memset(&scm, 0, sizeof(scm));
2288
2289         /* Lock the socket to prevent queue disordering
2290          * while sleeps in memcpy_tomsg
2291          */
2292         mutex_lock(&u->iolock);
2293
2294         skip = max(sk_peek_offset(sk, flags), 0);
2295
2296         do {
2297                 int chunk;
2298                 bool drop_skb;
2299                 struct sk_buff *skb, *last;
2300
2301 redo:
2302                 unix_state_lock(sk);
2303                 if (sock_flag(sk, SOCK_DEAD)) {
2304                         err = -ECONNRESET;
2305                         goto unlock;
2306                 }
2307                 last = skb = skb_peek(&sk->sk_receive_queue);
2308                 last_len = last ? last->len : 0;
2309 again:
2310                 if (skb == NULL) {
2311                         if (copied >= target)
2312                                 goto unlock;
2313
2314                         /*
2315                          *      POSIX 1003.1g mandates this order.
2316                          */
2317
2318                         err = sock_error(sk);
2319                         if (err)
2320                                 goto unlock;
2321                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2322                                 goto unlock;
2323
2324                         unix_state_unlock(sk);
2325                         if (!timeo) {
2326                                 err = -EAGAIN;
2327                                 break;
2328                         }
2329
2330                         mutex_unlock(&u->iolock);
2331
2332                         timeo = unix_stream_data_wait(sk, timeo, last,
2333                                                       last_len, freezable);
2334
2335                         if (signal_pending(current)) {
2336                                 err = sock_intr_errno(timeo);
2337                                 scm_destroy(&scm);
2338                                 goto out;
2339                         }
2340
2341                         mutex_lock(&u->iolock);
2342                         goto redo;
2343 unlock:
2344                         unix_state_unlock(sk);
2345                         break;
2346                 }
2347
2348                 while (skip >= unix_skb_len(skb)) {
2349                         skip -= unix_skb_len(skb);
2350                         last = skb;
2351                         last_len = skb->len;
2352                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2353                         if (!skb)
2354                                 goto again;
2355                 }
2356
2357                 unix_state_unlock(sk);
2358
2359                 if (check_creds) {
2360                         /* Never glue messages from different writers */
2361                         if (!unix_skb_scm_eq(skb, &scm))
2362                                 break;
2363                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2364                         /* Copy credentials */
2365                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2366                         unix_set_secdata(&scm, skb);
2367                         check_creds = true;
2368                 }
2369
2370                 /* Copy address just once */
2371                 if (state->msg && state->msg->msg_name) {
2372                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2373                                          state->msg->msg_name);
2374                         unix_copy_addr(state->msg, skb->sk);
2375                         sunaddr = NULL;
2376                 }
2377
2378                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2379                 skb_get(skb);
2380                 chunk = state->recv_actor(skb, skip, chunk, state);
2381                 drop_skb = !unix_skb_len(skb);
2382                 /* skb is only safe to use if !drop_skb */
2383                 consume_skb(skb);
2384                 if (chunk < 0) {
2385                         if (copied == 0)
2386                                 copied = -EFAULT;
2387                         break;
2388                 }
2389                 copied += chunk;
2390                 size -= chunk;
2391
2392                 if (drop_skb) {
2393                         /* the skb was touched by a concurrent reader;
2394                          * we should not expect anything from this skb
2395                          * anymore and assume it invalid - we can be
2396                          * sure it was dropped from the socket queue
2397                          *
2398                          * let's report a short read
2399                          */
2400                         err = 0;
2401                         break;
2402                 }
2403
2404                 /* Mark read part of skb as used */
2405                 if (!(flags & MSG_PEEK)) {
2406                         UNIXCB(skb).consumed += chunk;
2407
2408                         sk_peek_offset_bwd(sk, chunk);
2409
2410                         if (UNIXCB(skb).fp) {
2411                                 spin_lock(&sk->sk_receive_queue.lock);
2412                                 scm_stat_del(sk, skb);
2413                                 spin_unlock(&sk->sk_receive_queue.lock);
2414                                 unix_detach_fds(&scm, skb);
2415                         }
2416
2417                         if (unix_skb_len(skb))
2418                                 break;
2419
2420                         skb_unlink(skb, &sk->sk_receive_queue);
2421                         consume_skb(skb);
2422
2423                         if (scm.fp)
2424                                 break;
2425                 } else {
2426                         /* It is questionable, see note in unix_dgram_recvmsg.
2427                          */
2428                         if (UNIXCB(skb).fp)
2429                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2430
2431                         sk_peek_offset_fwd(sk, chunk);
2432
2433                         if (UNIXCB(skb).fp)
2434                                 break;
2435
2436                         skip = 0;
2437                         last = skb;
2438                         last_len = skb->len;
2439                         unix_state_lock(sk);
2440                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2441                         if (skb)
2442                                 goto again;
2443                         unix_state_unlock(sk);
2444                         break;
2445                 }
2446         } while (size);
2447
2448         mutex_unlock(&u->iolock);
2449         if (state->msg)
2450                 scm_recv(sock, state->msg, &scm, flags);
2451         else
2452                 scm_destroy(&scm);
2453 out:
2454         return copied ? : err;
2455 }
2456
2457 static int unix_stream_read_actor(struct sk_buff *skb,
2458                                   int skip, int chunk,
2459                                   struct unix_stream_read_state *state)
2460 {
2461         int ret;
2462
2463         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2464                                     state->msg, chunk);
2465         return ret ?: chunk;
2466 }
2467
2468 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2469                                size_t size, int flags)
2470 {
2471         struct unix_stream_read_state state = {
2472                 .recv_actor = unix_stream_read_actor,
2473                 .socket = sock,
2474                 .msg = msg,
2475                 .size = size,
2476                 .flags = flags
2477         };
2478
2479         return unix_stream_read_generic(&state, true);
2480 }
2481
2482 static int unix_stream_splice_actor(struct sk_buff *skb,
2483                                     int skip, int chunk,
2484                                     struct unix_stream_read_state *state)
2485 {
2486         return skb_splice_bits(skb, state->socket->sk,
2487                                UNIXCB(skb).consumed + skip,
2488                                state->pipe, chunk, state->splice_flags);
2489 }
2490
2491 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2492                                        struct pipe_inode_info *pipe,
2493                                        size_t size, unsigned int flags)
2494 {
2495         struct unix_stream_read_state state = {
2496                 .recv_actor = unix_stream_splice_actor,
2497                 .socket = sock,
2498                 .pipe = pipe,
2499                 .size = size,
2500                 .splice_flags = flags,
2501         };
2502
2503         if (unlikely(*ppos))
2504                 return -ESPIPE;
2505
2506         if (sock->file->f_flags & O_NONBLOCK ||
2507             flags & SPLICE_F_NONBLOCK)
2508                 state.flags = MSG_DONTWAIT;
2509
2510         return unix_stream_read_generic(&state, false);
2511 }
2512
2513 static int unix_shutdown(struct socket *sock, int mode)
2514 {
2515         struct sock *sk = sock->sk;
2516         struct sock *other;
2517
2518         if (mode < SHUT_RD || mode > SHUT_RDWR)
2519                 return -EINVAL;
2520         /* This maps:
2521          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2522          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2523          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2524          */
2525         ++mode;
2526
2527         unix_state_lock(sk);
2528         sk->sk_shutdown |= mode;
2529         other = unix_peer(sk);
2530         if (other)
2531                 sock_hold(other);
2532         unix_state_unlock(sk);
2533         sk->sk_state_change(sk);
2534
2535         if (other &&
2536                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2537
2538                 int peer_mode = 0;
2539
2540                 if (mode&RCV_SHUTDOWN)
2541                         peer_mode |= SEND_SHUTDOWN;
2542                 if (mode&SEND_SHUTDOWN)
2543                         peer_mode |= RCV_SHUTDOWN;
2544                 unix_state_lock(other);
2545                 other->sk_shutdown |= peer_mode;
2546                 unix_state_unlock(other);
2547                 other->sk_state_change(other);
2548                 if (peer_mode == SHUTDOWN_MASK)
2549                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2550                 else if (peer_mode & RCV_SHUTDOWN)
2551                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2552         }
2553         if (other)
2554                 sock_put(other);
2555
2556         return 0;
2557 }
2558
2559 long unix_inq_len(struct sock *sk)
2560 {
2561         struct sk_buff *skb;
2562         long amount = 0;
2563
2564         if (sk->sk_state == TCP_LISTEN)
2565                 return -EINVAL;
2566
2567         spin_lock(&sk->sk_receive_queue.lock);
2568         if (sk->sk_type == SOCK_STREAM ||
2569             sk->sk_type == SOCK_SEQPACKET) {
2570                 skb_queue_walk(&sk->sk_receive_queue, skb)
2571                         amount += unix_skb_len(skb);
2572         } else {
2573                 skb = skb_peek(&sk->sk_receive_queue);
2574                 if (skb)
2575                         amount = skb->len;
2576         }
2577         spin_unlock(&sk->sk_receive_queue.lock);
2578
2579         return amount;
2580 }
2581 EXPORT_SYMBOL_GPL(unix_inq_len);
2582
2583 long unix_outq_len(struct sock *sk)
2584 {
2585         return sk_wmem_alloc_get(sk);
2586 }
2587 EXPORT_SYMBOL_GPL(unix_outq_len);
2588
2589 static int unix_open_file(struct sock *sk)
2590 {
2591         struct path path;
2592         struct file *f;
2593         int fd;
2594
2595         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2596                 return -EPERM;
2597
2598         if (!smp_load_acquire(&unix_sk(sk)->addr))
2599                 return -ENOENT;
2600
2601         path = unix_sk(sk)->path;
2602         if (!path.dentry)
2603                 return -ENOENT;
2604
2605         path_get(&path);
2606
2607         fd = get_unused_fd_flags(O_CLOEXEC);
2608         if (fd < 0)
2609                 goto out;
2610
2611         f = dentry_open(&path, O_PATH, current_cred());
2612         if (IS_ERR(f)) {
2613                 put_unused_fd(fd);
2614                 fd = PTR_ERR(f);
2615                 goto out;
2616         }
2617
2618         fd_install(fd, f);
2619 out:
2620         path_put(&path);
2621
2622         return fd;
2623 }
2624
2625 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2626 {
2627         struct sock *sk = sock->sk;
2628         long amount = 0;
2629         int err;
2630
2631         switch (cmd) {
2632         case SIOCOUTQ:
2633                 amount = unix_outq_len(sk);
2634                 err = put_user(amount, (int __user *)arg);
2635                 break;
2636         case SIOCINQ:
2637                 amount = unix_inq_len(sk);
2638                 if (amount < 0)
2639                         err = amount;
2640                 else
2641                         err = put_user(amount, (int __user *)arg);
2642                 break;
2643         case SIOCUNIXFILE:
2644                 err = unix_open_file(sk);
2645                 break;
2646         default:
2647                 err = -ENOIOCTLCMD;
2648                 break;
2649         }
2650         return err;
2651 }
2652
2653 #ifdef CONFIG_COMPAT
2654 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2655 {
2656         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2657 }
2658 #endif
2659
2660 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2661 {
2662         struct sock *sk = sock->sk;
2663         __poll_t mask;
2664
2665         sock_poll_wait(file, sock, wait);
2666         mask = 0;
2667
2668         /* exceptional events? */
2669         if (sk->sk_err)
2670                 mask |= EPOLLERR;
2671         if (sk->sk_shutdown == SHUTDOWN_MASK)
2672                 mask |= EPOLLHUP;
2673         if (sk->sk_shutdown & RCV_SHUTDOWN)
2674                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2675
2676         /* readable? */
2677         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2678                 mask |= EPOLLIN | EPOLLRDNORM;
2679
2680         /* Connection-based need to check for termination and startup */
2681         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2682             sk->sk_state == TCP_CLOSE)
2683                 mask |= EPOLLHUP;
2684
2685         /*
2686          * we set writable also when the other side has shut down the
2687          * connection. This prevents stuck sockets.
2688          */
2689         if (unix_writable(sk))
2690                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2691
2692         return mask;
2693 }
2694
2695 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2696                                     poll_table *wait)
2697 {
2698         struct sock *sk = sock->sk, *other;
2699         unsigned int writable;
2700         __poll_t mask;
2701
2702         sock_poll_wait(file, sock, wait);
2703         mask = 0;
2704
2705         /* exceptional events? */
2706         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2707                 mask |= EPOLLERR |
2708                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2709
2710         if (sk->sk_shutdown & RCV_SHUTDOWN)
2711                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2712         if (sk->sk_shutdown == SHUTDOWN_MASK)
2713                 mask |= EPOLLHUP;
2714
2715         /* readable? */
2716         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2717                 mask |= EPOLLIN | EPOLLRDNORM;
2718
2719         /* Connection-based need to check for termination and startup */
2720         if (sk->sk_type == SOCK_SEQPACKET) {
2721                 if (sk->sk_state == TCP_CLOSE)
2722                         mask |= EPOLLHUP;
2723                 /* connection hasn't started yet? */
2724                 if (sk->sk_state == TCP_SYN_SENT)
2725                         return mask;
2726         }
2727
2728         /* No write status requested, avoid expensive OUT tests. */
2729         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2730                 return mask;
2731
2732         writable = unix_writable(sk);
2733         if (writable) {
2734                 unix_state_lock(sk);
2735
2736                 other = unix_peer(sk);
2737                 if (other && unix_peer(other) != sk &&
2738                     unix_recvq_full(other) &&
2739                     unix_dgram_peer_wake_me(sk, other))
2740                         writable = 0;
2741
2742                 unix_state_unlock(sk);
2743         }
2744
2745         if (writable)
2746                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2747         else
2748                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2749
2750         return mask;
2751 }
2752
2753 #ifdef CONFIG_PROC_FS
2754
2755 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2756
2757 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2758 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2759 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2760
2761 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2762 {
2763         unsigned long offset = get_offset(*pos);
2764         unsigned long bucket = get_bucket(*pos);
2765         struct sock *sk;
2766         unsigned long count = 0;
2767
2768         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2769                 if (sock_net(sk) != seq_file_net(seq))
2770                         continue;
2771                 if (++count == offset)
2772                         break;
2773         }
2774
2775         return sk;
2776 }
2777
2778 static struct sock *unix_next_socket(struct seq_file *seq,
2779                                      struct sock *sk,
2780                                      loff_t *pos)
2781 {
2782         unsigned long bucket;
2783
2784         while (sk > (struct sock *)SEQ_START_TOKEN) {
2785                 sk = sk_next(sk);
2786                 if (!sk)
2787                         goto next_bucket;
2788                 if (sock_net(sk) == seq_file_net(seq))
2789                         return sk;
2790         }
2791
2792         do {
2793                 sk = unix_from_bucket(seq, pos);
2794                 if (sk)
2795                         return sk;
2796
2797 next_bucket:
2798                 bucket = get_bucket(*pos) + 1;
2799                 *pos = set_bucket_offset(bucket, 1);
2800         } while (bucket < ARRAY_SIZE(unix_socket_table));
2801
2802         return NULL;
2803 }
2804
2805 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2806         __acquires(unix_table_lock)
2807 {
2808         spin_lock(&unix_table_lock);
2809
2810         if (!*pos)
2811                 return SEQ_START_TOKEN;
2812
2813         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2814                 return NULL;
2815
2816         return unix_next_socket(seq, NULL, pos);
2817 }
2818
2819 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2820 {
2821         ++*pos;
2822         return unix_next_socket(seq, v, pos);
2823 }
2824
2825 static void unix_seq_stop(struct seq_file *seq, void *v)
2826         __releases(unix_table_lock)
2827 {
2828         spin_unlock(&unix_table_lock);
2829 }
2830
2831 static int unix_seq_show(struct seq_file *seq, void *v)
2832 {
2833
2834         if (v == SEQ_START_TOKEN)
2835                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2836                          "Inode Path\n");
2837         else {
2838                 struct sock *s = v;
2839                 struct unix_sock *u = unix_sk(s);
2840                 unix_state_lock(s);
2841
2842                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2843                         s,
2844                         refcount_read(&s->sk_refcnt),
2845                         0,
2846                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2847                         s->sk_type,
2848                         s->sk_socket ?
2849                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2850                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2851                         sock_i_ino(s));
2852
2853                 if (u->addr) {  // under unix_table_lock here
2854                         int i, len;
2855                         seq_putc(seq, ' ');
2856
2857                         i = 0;
2858                         len = u->addr->len - sizeof(short);
2859                         if (!UNIX_ABSTRACT(s))
2860                                 len--;
2861                         else {
2862                                 seq_putc(seq, '@');
2863                                 i++;
2864                         }
2865                         for ( ; i < len; i++)
2866                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2867                                          '@');
2868                 }
2869                 unix_state_unlock(s);
2870                 seq_putc(seq, '\n');
2871         }
2872
2873         return 0;
2874 }
2875
2876 static const struct seq_operations unix_seq_ops = {
2877         .start  = unix_seq_start,
2878         .next   = unix_seq_next,
2879         .stop   = unix_seq_stop,
2880         .show   = unix_seq_show,
2881 };
2882 #endif
2883
2884 static const struct net_proto_family unix_family_ops = {
2885         .family = PF_UNIX,
2886         .create = unix_create,
2887         .owner  = THIS_MODULE,
2888 };
2889
2890
2891 static int __net_init unix_net_init(struct net *net)
2892 {
2893         int error = -ENOMEM;
2894
2895         net->unx.sysctl_max_dgram_qlen = 10;
2896         if (unix_sysctl_register(net))
2897                 goto out;
2898
2899 #ifdef CONFIG_PROC_FS
2900         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2901                         sizeof(struct seq_net_private))) {
2902                 unix_sysctl_unregister(net);
2903                 goto out;
2904         }
2905 #endif
2906         error = 0;
2907 out:
2908         return error;
2909 }
2910
2911 static void __net_exit unix_net_exit(struct net *net)
2912 {
2913         unix_sysctl_unregister(net);
2914         remove_proc_entry("unix", net->proc_net);
2915 }
2916
2917 static struct pernet_operations unix_net_ops = {
2918         .init = unix_net_init,
2919         .exit = unix_net_exit,
2920 };
2921
2922 static int __init af_unix_init(void)
2923 {
2924         int rc = -1;
2925
2926         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2927
2928         rc = proto_register(&unix_proto, 1);
2929         if (rc != 0) {
2930                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2931                 goto out;
2932         }
2933
2934         sock_register(&unix_family_ops);
2935         register_pernet_subsys(&unix_net_ops);
2936 out:
2937         return rc;
2938 }
2939
2940 static void __exit af_unix_exit(void)
2941 {
2942         sock_unregister(PF_UNIX);
2943         proto_unregister(&unix_proto);
2944         unregister_pernet_subsys(&unix_net_ops);
2945 }
2946
2947 /* Earlier than device_initcall() so that other drivers invoking
2948    request_module() don't end up in a loop when modprobe tries
2949    to use a UNIX socket. But later than subsys_initcall() because
2950    we depend on stuff initialised there */
2951 fs_initcall(af_unix_init);
2952 module_exit(af_unix_exit);
2953
2954 MODULE_LICENSE("GPL");
2955 MODULE_ALIAS_NETPROTO(PF_UNIX);