xfs: split the unaligned DIO write code out

author Dave Chinner <dchinner@redhat.com>

Sat, 23 Jan 2021 18:06:30 +0000 (10:06 -0800)

committer Darrick J. Wong <djwong@kernel.org>

Mon, 1 Feb 2021 17:47:19 +0000 (09:47 -0800)
author Dave Chinner <dchinner@redhat.com>
Sat, 23 Jan 2021 18:06:30 +0000 (10:06 -0800)
committer Darrick J. Wong <djwong@kernel.org>
Mon, 1 Feb 2021 17:47:19 +0000 (09:47 -0800)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 94bc2ea..c60ff7b 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -499,117 +499,117 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
  };
  
  /*
- * xfs_file_dio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. inode_dio_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
+ * Handle block aligned direct I/O writes
   */
-STATIC ssize_t
-xfs_file_dio_write(
+static noinline ssize_t
+xfs_file_dio_write_aligned(
+       struct xfs_inode        *ip,
         struct kiocb            *iocb,
         struct iov_iter         *from)
  {
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 ret = 0;
-       int                     unaligned_io = 0;
-       int                     iolock;
-       size_t                  count = iov_iter_count(from);
-       struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
+       int                     iolock = XFS_IOLOCK_SHARED;
+       ssize_t                 ret;
  
-       /* DIO must be aligned to device logical sector size */
-       if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
-               return -EINVAL;
+       ret = xfs_ilock_iocb(iocb, iolock);
+       if (ret)
+               return ret;
+       ret = xfs_file_write_checks(iocb, from, &iolock);
+       if (ret)
+               goto out_unlock;
  
         /*
-        * Don't take the exclusive iolock here unless the I/O is unaligned to
-        * the file system block size.  We don't need to consider the EOF
-        * extension case here because xfs_file_write_checks() will relock
-        * the inode as necessary for EOF zeroing cases and fill out the new
-        * inode size as appropriate.
+        * We don't need to hold the IOLOCK exclusively across the IO, so demote
+        * the iolock back to shared if we had to take the exclusive lock in
+        * xfs_file_write_checks() for other reasons.
          */
-       if ((iocb->ki_pos & mp->m_blockmask) ||
-           ((iocb->ki_pos + count) & mp->m_blockmask)) {
-               unaligned_io = 1;
-
-               /*
-                * We can't properly handle unaligned direct I/O to reflink
-                * files yet, as we can't unshare a partial block.
-                */
-               if (xfs_is_cow_inode(ip)) {
-                       trace_xfs_reflink_bounce_dio_write(iocb, from);
-                       return -ENOTBLK;
-               }
-               iolock = XFS_IOLOCK_EXCL;
-       } else {
+       if (iolock == XFS_IOLOCK_EXCL) {
+               xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
                 iolock = XFS_IOLOCK_SHARED;
         }
+       trace_xfs_file_direct_write(iocb, from);
+       ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+                          &xfs_dio_write_ops, 0);
+out_unlock:
+       if (iolock)
+               xfs_iunlock(ip, iolock);
+       return ret;
+}
  
-       if (iocb->ki_flags & IOCB_NOWAIT) {
-               /* unaligned dio always waits, bail */
-               if (unaligned_io)
-                       return -EAGAIN;
-               if (!xfs_ilock_nowait(ip, iolock))
-                       return -EAGAIN;
-       } else {
-               xfs_ilock(ip, iolock);
+/*
+ * Handle block unaligned direct I/O writes
+ *
+ * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
+ * them to be done in parallel with reads and other direct I/O writes.  However,
+ * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
+ * to do sub-block zeroing and that requires serialisation against other direct
+ * I/O to the same block.  In this case we need to serialise the submission of
+ * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
+ *
+ * This means that unaligned dio writes always block. There is no "nowait" fast
+ * path in this code - if IOCB_NOWAIT is set we simply return -EAGAIN up front
+ * and we don't have to worry about that anymore.
+ */
+static noinline ssize_t
+xfs_file_dio_write_unaligned(
+       struct xfs_inode        *ip,
+       struct kiocb            *iocb,
+       struct iov_iter         *from)
+{
+       int                     iolock = XFS_IOLOCK_EXCL;
+       ssize_t                 ret;
+
+       /* unaligned dio always waits, bail */
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
+       xfs_ilock(ip, iolock);
+
+       /*
+        * We can't properly handle unaligned direct I/O to reflink files yet,
+        * as we can't unshare a partial block.
+        */
+       if (xfs_is_cow_inode(ip)) {
+               trace_xfs_reflink_bounce_dio_write(iocb, from);
+               ret = -ENOTBLK;
+               goto out_unlock;
         }
  
         ret = xfs_file_write_checks(iocb, from, &iolock);
         if (ret)
-               goto out;
-       count = iov_iter_count(from);
+               goto out_unlock;
  
         /*
-        * If we are doing unaligned IO, we can't allow any other overlapping IO
-        * in-flight at the same time or we risk data corruption. Wait for all
-        * other IO to drain before we submit. If the IO is aligned, demote the
-        * iolock if we had to take the exclusive lock in
-        * xfs_file_write_checks() for other reasons.
+        * If we are doing unaligned I/O, this must be the only I/O in-flight.
+        * Otherwise we risk data corruption due to unwritten extent conversions
+        * from the AIO end_io handler.  Wait for all other I/O to drain first.
          */
-       if (unaligned_io) {
-               inode_dio_wait(inode);
-       } else if (iolock == XFS_IOLOCK_EXCL) {
-               xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
-               iolock = XFS_IOLOCK_SHARED;
-       }
+       inode_dio_wait(VFS_I(ip));
  
         trace_xfs_file_direct_write(iocb, from);
-       /*
-        * If unaligned, this is the only IO in-flight. Wait on it before we
-        * release the iolock to prevent subsequent overlapping IO.
-        */
         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-                          &xfs_dio_write_ops,
-                          unaligned_io ? IOMAP_DIO_FORCE_WAIT : 0);
-out:
+                          &xfs_dio_write_ops, IOMAP_DIO_FORCE_WAIT);
+out_unlock:
         if (iolock)
                 xfs_iunlock(ip, iolock);
         return ret;
  }
  
+static ssize_t
+xfs_file_dio_write(
+       struct kiocb            *iocb,
+       struct iov_iter         *from)
+{
+       struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
+       struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
+       size_t                  count = iov_iter_count(from);
+
+       /* direct I/O must be aligned to device logical sector size */
+       if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
+               return -EINVAL;
+       if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
+               return xfs_file_dio_write_unaligned(ip, iocb, from);
+       return xfs_file_dio_write_aligned(ip, iocb, from);
+}
+
  static noinline ssize_t
  xfs_file_dax_write(
         struct kiocb            *iocb,
author	Dave Chinner <dchinner@redhat.com>
	Sat, 23 Jan 2021 18:06:30 +0000 (10:06 -0800)
committer	Darrick J. Wong <djwong@kernel.org>
	Mon, 1 Feb 2021 17:47:19 +0000 (09:47 -0800)