Linux 6.9-rc1
[linux-2.6-microblaze.git] / fs / pipe.c
index 74ae9fa..50c8a85 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  */
 
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r)          ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+                           const struct lockdep_map *b)
 {
-       if (pipe->files)
-               mutex_lock_nested(&pipe->mutex, subclass);
+       return cmp_int((unsigned long) a, (unsigned long) b);
 }
+#endif
 
 void pipe_lock(struct pipe_inode_info *pipe)
 {
-       /*
-        * pipe_lock() nests non-pipe inode locks (for writing to a file)
-        */
-       pipe_lock_nested(pipe, I_MUTEX_PARENT);
+       if (pipe->files)
+               mutex_lock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_lock);
 
@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-       mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-       mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
                      struct pipe_inode_info *pipe2)
 {
        BUG_ON(pipe1 == pipe2);
 
-       if (pipe1 < pipe2) {
-               pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-               pipe_lock_nested(pipe2, I_MUTEX_CHILD);
-       } else {
-               pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-               pipe_lock_nested(pipe1, I_MUTEX_CHILD);
-       }
+       if (pipe1 > pipe2)
+               swap(pipe1, pipe2);
+
+       pipe_lock(pipe1);
+       pipe_lock(pipe2);
 }
 
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -227,6 +217,36 @@ static inline bool pipe_readable(const struct pipe_inode_info *pipe)
        return !pipe_empty(head, tail) || !writers;
 }
 
+static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
+                                           struct pipe_buffer *buf,
+                                           unsigned int tail)
+{
+       pipe_buf_release(pipe, buf);
+
+       /*
+        * If the pipe has a watch_queue, we need additional protection
+        * by the spinlock because notifications get posted with only
+        * this spinlock, no mutex
+        */
+       if (pipe_has_watch_queue(pipe)) {
+               spin_lock_irq(&pipe->rd_wait.lock);
+#ifdef CONFIG_WATCH_QUEUE
+               if (buf->flags & PIPE_BUF_FLAG_LOSS)
+                       pipe->note_loss = true;
+#endif
+               pipe->tail = ++tail;
+               spin_unlock_irq(&pipe->rd_wait.lock);
+               return tail;
+       }
+
+       /*
+        * Without a watch_queue, we can simply increment the tail
+        * without the spinlock - the mutex is enough.
+        */
+       pipe->tail = ++tail;
+       return tail;
+}
+
 static ssize_t
 pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -241,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                return 0;
 
        ret = 0;
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
 
        /*
         * We only wake up writers if the pipe was full when we started
@@ -320,17 +340,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                                buf->len = 0;
                        }
 
-                       if (!buf->len) {
-                               pipe_buf_release(pipe, buf);
-                               spin_lock_irq(&pipe->rd_wait.lock);
-#ifdef CONFIG_WATCH_QUEUE
-                               if (buf->flags & PIPE_BUF_FLAG_LOSS)
-                                       pipe->note_loss = true;
-#endif
-                               tail++;
-                               pipe->tail = tail;
-                               spin_unlock_irq(&pipe->rd_wait.lock);
-                       }
+                       if (!buf->len)
+                               tail = pipe_update_tail(pipe, buf, tail);
                        total_len -= chars;
                        if (!total_len)
                                break;  /* common path: read succeeded */
@@ -342,11 +353,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                        break;
                if (ret)
                        break;
-               if (filp->f_flags & O_NONBLOCK) {
+               if ((filp->f_flags & O_NONBLOCK) ||
+                   (iocb->ki_flags & IOCB_NOWAIT)) {
                        ret = -EAGAIN;
                        break;
                }
-               __pipe_unlock(pipe);
+               mutex_unlock(&pipe->mutex);
 
                /*
                 * We only get here if we didn't actually read anything.
@@ -378,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
                        return -ERESTARTSYS;
 
-               __pipe_lock(pipe);
+               mutex_lock(&pipe->mutex);
                was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
                wake_next_reader = true;
        }
        if (pipe_empty(pipe->head, pipe->tail))
                wake_next_reader = false;
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
 
        if (was_full)
                wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -424,11 +436,23 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
        bool was_empty = false;
        bool wake_next_writer = false;
 
+       /*
+        * Reject writing to watch queue pipes before the point where we lock
+        * the pipe.
+        * Otherwise, lockdep would be unhappy if the caller already has another
+        * pipe locked.
+        * If we had to support locking a normal pipe and a notification pipe at
+        * the same time, we could set up lockdep annotations for that, but
+        * since we don't actually need that, it's simpler to just bail here.
+        */
+       if (pipe_has_watch_queue(pipe))
+               return -EXDEV;
+
        /* Null write succeeds. */
        if (unlikely(total_len == 0))
                return 0;
 
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
 
        if (!pipe->readers) {
                send_sig(SIGPIPE, current, 0);
@@ -436,13 +460,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                goto out;
        }
 
-#ifdef CONFIG_WATCH_QUEUE
-       if (pipe->watch_queue) {
-               ret = -EXDEV;
-               goto out;
-       }
-#endif
-
        /*
         * If it wasn't empty we try to merge new data into
         * the last buffer.
@@ -488,7 +505,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                head = pipe->head;
                if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
                        unsigned int mask = pipe->ring_size - 1;
-                       struct pipe_buffer *buf = &pipe->bufs[head & mask];
+                       struct pipe_buffer *buf;
                        struct page *page = pipe->tmp_page;
                        int copied;
 
@@ -506,16 +523,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                         * it, either the reader will consume it or it'll still
                         * be there for the next write.
                         */
-                       spin_lock_irq(&pipe->rd_wait.lock);
-
-                       head = pipe->head;
-                       if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-                               spin_unlock_irq(&pipe->rd_wait.lock);
-                               continue;
-                       }
-
                        pipe->head = head + 1;
-                       spin_unlock_irq(&pipe->rd_wait.lock);
 
                        /* Insert it into the buffer array */
                        buf = &pipe->bufs[head & mask];
@@ -536,7 +544,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                                break;
                        }
                        ret += copied;
-                       buf->offset = 0;
                        buf->len = copied;
 
                        if (!iov_iter_count(from))
@@ -547,7 +554,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                        continue;
 
                /* Wait for buffer space to become available. */
-               if (filp->f_flags & O_NONBLOCK) {
+               if ((filp->f_flags & O_NONBLOCK) ||
+                   (iocb->ki_flags & IOCB_NOWAIT)) {
                        if (!ret)
                                ret = -EAGAIN;
                        break;
@@ -564,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                 * after waiting we need to re-check whether the pipe
                 * become empty while we dropped the lock.
                 */
-               __pipe_unlock(pipe);
+               mutex_unlock(&pipe->mutex);
                if (was_empty)
                        wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-               __pipe_lock(pipe);
+               mutex_lock(&pipe->mutex);
                was_empty = pipe_empty(pipe->head, pipe->tail);
                wake_next_writer = true;
        }
 out:
        if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
                wake_next_writer = false;
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
 
        /*
         * If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -611,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case FIONREAD:
-               __pipe_lock(pipe);
+               mutex_lock(&pipe->mutex);
                count = 0;
                head = pipe->head;
                tail = pipe->tail;
@@ -621,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        count += pipe->bufs[tail & mask].len;
                        tail++;
                }
-               __pipe_unlock(pipe);
+               mutex_unlock(&pipe->mutex);
 
                return put_user(count, (int __user *)arg);
 
 #ifdef CONFIG_WATCH_QUEUE
        case IOC_WATCH_QUEUE_SET_SIZE: {
                int ret;
-               __pipe_lock(pipe);
+               mutex_lock(&pipe->mutex);
                ret = watch_queue_set_size(pipe, arg);
-               __pipe_unlock(pipe);
+               mutex_unlock(&pipe->mutex);
                return ret;
        }
 
@@ -716,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
 {
        struct pipe_inode_info *pipe = file->private_data;
 
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
        if (file->f_mode & FMODE_READ)
                pipe->readers--;
        if (file->f_mode & FMODE_WRITE)
@@ -729,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
        }
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
 
        put_pipe_info(inode, pipe);
        return 0;
@@ -741,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
        struct pipe_inode_info *pipe = filp->private_data;
        int retval = 0;
 
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
        if (filp->f_mode & FMODE_READ)
                retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
        if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -750,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
                        /* this can happen only if on == T */
                        fasync_helper(-1, filp, 0, &pipe->fasync_readers);
        }
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
        return retval;
 }
 
@@ -816,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
                pipe->nr_accounted = pipe_bufs;
                pipe->user = user;
                mutex_init(&pipe->mutex);
+               lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
                return pipe;
        }
 
@@ -853,14 +862,14 @@ void free_pipe_info(struct pipe_inode_info *pipe)
        kfree(pipe);
 }
 
-static struct vfsmount *pipe_mnt __read_mostly;
+static struct vfsmount *pipe_mnt __ro_after_init;
 
 /*
  * pipefs_dname() is called from d_path().
  */
 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
-       return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
+       return dynamic_dname(buffer, buflen, "pipe:[%lu]",
                                d_inode(dentry)->i_ino);
 }
 
@@ -897,7 +906,7 @@ static struct inode * get_pipe_inode(void)
        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       simple_inode_init_ts(inode);
 
        return inode;
 
@@ -976,6 +985,9 @@ static int __do_pipe_flags(int *fd, struct file **files, int flags)
        audit_fd_pair(fdr, fdw);
        fd[0] = fdr;
        fd[1] = fdw;
+       /* pipe groks IOCB_NOWAIT */
+       files[0]->f_mode |= FMODE_NOWAIT;
+       files[1]->f_mode |= FMODE_NOWAIT;
        return 0;
 
  err_fdr:
@@ -1123,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
        filp->private_data = pipe;
        /* OK, we have a pipe and it's pinned down */
 
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
 
        /* We can only do regular read/write on fifos */
        stream_open(inode, filp);
@@ -1193,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
        }
 
        /* Ok! */
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
        return 0;
 
 err_rd:
@@ -1209,7 +1221,7 @@ err_wr:
        goto err;
 
 err:
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
 
        put_pipe_info(inode, pipe);
        return ret;
@@ -1231,7 +1243,7 @@ const struct file_operations pipefifo_fops = {
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-unsigned int round_pipe_size(unsigned long size)
+unsigned int round_pipe_size(unsigned int size)
 {
        if (size > (1U << 31))
                return 0;
@@ -1303,6 +1315,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
        pipe->tail = tail;
        pipe->head = head;
 
+       if (!pipe_has_watch_queue(pipe)) {
+               pipe->max_usage = nr_slots;
+               pipe->nr_accounted = nr_slots;
+       }
+
        spin_unlock_irq(&pipe->rd_wait.lock);
 
        /* This might have made more room for writers */
@@ -1314,16 +1331,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
  */
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
 {
        unsigned long user_bufs;
        unsigned int nr_slots, size;
        long ret = 0;
 
-#ifdef CONFIG_WATCH_QUEUE
-       if (pipe->watch_queue)
+       if (pipe_has_watch_queue(pipe))
                return -EBUSY;
-#endif
 
        size = round_pipe_size(arg);
        nr_slots = size >> PAGE_SHIFT;
@@ -1356,8 +1371,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
        if (ret < 0)
                goto out_revert_acct;
 
-       pipe->max_usage = nr_slots;
-       pipe->nr_accounted = nr_slots;
        return pipe->max_usage * PAGE_SIZE;
 
 out_revert_acct:
@@ -1375,14 +1388,12 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
 
        if (file->f_op != &pipefifo_fops || !pipe)
                return NULL;
-#ifdef CONFIG_WATCH_QUEUE
-       if (for_splice && pipe->watch_queue)
+       if (for_splice && pipe_has_watch_queue(pipe))
                return NULL;
-#endif
        return pipe;
 }
 
-long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 {
        struct pipe_inode_info *pipe;
        long ret;
@@ -1391,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
        if (!pipe)
                return -EBADF;
 
-       __pipe_lock(pipe);
+       mutex_lock(&pipe->mutex);
 
        switch (cmd) {
        case F_SETPIPE_SZ:
@@ -1405,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
                break;
        }
 
-       __pipe_unlock(pipe);
+       mutex_unlock(&pipe->mutex);
        return ret;
 }
 
@@ -1487,7 +1498,6 @@ static struct ctl_table fs_pipe_sysctls[] = {
                .mode           = 0644,
                .proc_handler   = proc_doulongvec_minmax,
        },
-       { }
 };
 #endif