xfs: xfs_iflock is no longer a completion

author Dave Chinner <dchinner@redhat.com>

Mon, 17 Aug 2020 23:41:01 +0000 (16:41 -0700)

committer Darrick J. Wong <darrick.wong@oracle.com>

Mon, 7 Sep 2020 01:05:51 +0000 (18:05 -0700)
author Dave Chinner <dchinner@redhat.com>
Mon, 17 Aug 2020 23:41:01 +0000 (16:41 -0700)
committer Darrick J. Wong <darrick.wong@oracle.com>
Mon, 7 Sep 2020 01:05:51 +0000 (18:05 -0700)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 101028e..aa6aad2 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -52,7 +52,6 @@ xfs_inode_alloc(
  
         XFS_STATS_INC(mp, vn_active);
         ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!xfs_isiflocked(ip));
         ASSERT(ip->i_ino == 0);
  
         /* initialise the xfs inode */
@@ -123,7 +122,7 @@ void
  xfs_inode_free(
         struct xfs_inode        *ip)
  {
-       ASSERT(!xfs_isiflocked(ip));
+       ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
  
         /*
          * Because we use RCU freeing we need to ensure the inode always
@@ -1035,23 +1034,21 @@ xfs_reclaim_inode(
  
         if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
                 goto out;
-       if (!xfs_iflock_nowait(ip))
+       if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
                 goto out_iunlock;
  
         if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                 xfs_iunpin_wait(ip);
-               /* xfs_iflush_abort() drops the flush lock */
                 xfs_iflush_abort(ip);
                 goto reclaim;
         }
         if (xfs_ipincount(ip))
-               goto out_ifunlock;
+               goto out_clear_flush;
         if (!xfs_inode_clean(ip))
-               goto out_ifunlock;
+               goto out_clear_flush;
  
-       xfs_ifunlock(ip);
+       xfs_iflags_clear(ip, XFS_IFLUSHING);
  reclaim:
-       ASSERT(!xfs_isiflocked(ip));
  
         /*
          * Because we use RCU freeing we need to ensure the inode always appears
@@ -1101,8 +1098,8 @@ reclaim:
         __xfs_inode_free(ip);
         return;
  
-out_ifunlock:
-       xfs_ifunlock(ip);
+out_clear_flush:
+       xfs_iflags_clear(ip, XFS_IFLUSHING);
  out_iunlock:
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  out:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index c06129c..2072bd2 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -598,22 +598,6 @@ xfs_lock_two_inodes(
         }
  }
  
-void
-__xfs_iflock(
-       struct xfs_inode        *ip)
-{
-       wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
-       DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
-
-       do {
-               prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               if (xfs_isiflocked(ip))
-                       io_schedule();
-       } while (!xfs_iflock_nowait(ip));
-
-       finish_wait(wq, &wait.wq_entry);
-}
-
  STATIC uint
  _xfs_dic2xflags(
         uint16_t                di_flags,
@@ -2531,11 +2515,8 @@ retry:
          * valid, the wrong inode or stale.
          */
         spin_lock(&ip->i_flags_lock);
-       if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
-               spin_unlock(&ip->i_flags_lock);
-               rcu_read_unlock();
-               return;
-       }
+       if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
+               goto out_iflags_unlock;
  
         /*
          * Don't try to lock/unlock the current inode, but we _cannot_ skip the
@@ -2552,16 +2533,14 @@ retry:
                 }
         }
         ip->i_flags |= XFS_ISTALE;
-       spin_unlock(&ip->i_flags_lock);
-       rcu_read_unlock();
  
         /*
-        * If we can't get the flush lock, the inode is already attached.  All
+        * If the inode is flushing, it is already attached to the buffer.  All
          * we needed to do here is mark the inode stale so buffer IO completion
          * will remove it from the AIL.
          */
         iip = ip->i_itemp;
-       if (!xfs_iflock_nowait(ip)) {
+       if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
                 ASSERT(!list_empty(&iip->ili_item.li_bio_list));
                 ASSERT(iip->ili_last_fields);
                 goto out_iunlock;
@@ -2573,10 +2552,12 @@ retry:
          * commit as the flock synchronises removal of the inode from the
          * cluster buffer against inode reclaim.
          */
-       if (!iip || list_empty(&iip->ili_item.li_bio_list)) {
-               xfs_ifunlock(ip);
+       if (!iip || list_empty(&iip->ili_item.li_bio_list))
                 goto out_iunlock;
-       }
+
+       __xfs_iflags_set(ip, XFS_IFLUSHING);
+       spin_unlock(&ip->i_flags_lock);
+       rcu_read_unlock();
  
         /* we have a dirty inode in memory that has not yet been flushed. */
         spin_lock(&iip->ili_lock);
@@ -2586,9 +2567,16 @@ retry:
         spin_unlock(&iip->ili_lock);
         ASSERT(iip->ili_last_fields);
  
+       if (ip != free_ip)
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return;
+
  out_iunlock:
         if (ip != free_ip)
                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_iflags_unlock:
+       spin_unlock(&ip->i_flags_lock);
+       rcu_read_unlock();
  }
  
  /*
@@ -2631,8 +2619,9 @@ xfs_ifree_cluster(
  
                 /*
                  * We obtain and lock the backing buffer first in the process
-                * here, as we have to ensure that any dirty inode that we
-                * can't get the flush lock on is attached to the buffer.
+                * here to ensure dirty inodes attached to the buffer remain in
+                * the flushing state while we mark them stale.
+                *
                  * If we scan the in-memory inodes first, then buffer IO can
                  * complete before we get a lock on it, and hence we may fail
                  * to mark all the active inodes on the buffer stale.
@@ -3443,7 +3432,7 @@ xfs_iflush(
         int                     error;
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(xfs_isiflocked(ip));
+       ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
         ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
                ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
         ASSERT(iip->ili_item.li_buf == bp);
@@ -3613,7 +3602,7 @@ xfs_iflush_cluster(
                 /*
                  * Quick and dirty check to avoid locks if possible.
                  */
-               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK))
+               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
                         continue;
                 if (xfs_ipincount(ip))
                         continue;
@@ -3627,7 +3616,7 @@ xfs_iflush_cluster(
                  */
                 spin_lock(&ip->i_flags_lock);
                 ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
-               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) {
+               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
                         spin_unlock(&ip->i_flags_lock);
                         continue;
                 }
@@ -3635,23 +3624,16 @@ xfs_iflush_cluster(
                 /*
                  * ILOCK will pin the inode against reclaim and prevent
                  * concurrent transactions modifying the inode while we are
-                * flushing the inode.
+                * flushing the inode. If we get the lock, set the flushing
+                * state before we drop the i_flags_lock.
                  */
                 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
                         spin_unlock(&ip->i_flags_lock);
                         continue;
                 }
+               __xfs_iflags_set(ip, XFS_IFLUSHING);
                 spin_unlock(&ip->i_flags_lock);
  
-               /*
-                * Skip inodes that are already flush locked as they have
-                * already been written to the buffer.
-                */
-               if (!xfs_iflock_nowait(ip)) {
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                       continue;
-               }
-
                 /*
                  * Abort flushing this inode if we are shut down because the
                  * inode may not currently be in the AIL. This can occur when
@@ -3661,7 +3643,6 @@ xfs_iflush_cluster(
                  */
                 if (XFS_FORCED_SHUTDOWN(mp)) {
                         xfs_iunpin_wait(ip);
-                       /* xfs_iflush_abort() drops the flush lock */
                         xfs_iflush_abort(ip);
                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
                         error = -EIO;
@@ -3670,7 +3651,7 @@ xfs_iflush_cluster(
  
                 /* don't block waiting on a log force to unpin dirty inodes */
                 if (xfs_ipincount(ip)) {
-                       xfs_ifunlock(ip);
+                       xfs_iflags_clear(ip, XFS_IFLUSHING);
                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
                         continue;
                 }
@@ -3678,7 +3659,7 @@ xfs_iflush_cluster(
                 if (!xfs_inode_clean(ip))
                         error = xfs_iflush(ip, bp);
                 else
-                       xfs_ifunlock(ip);
+                       xfs_iflags_clear(ip, XFS_IFLUSHING);
                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
                 if (error)
                         break;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index e9a8bb1..5ea962c 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -211,8 +211,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
  #define XFS_INEW               (1 << __XFS_INEW_BIT)
  #define XFS_ITRUNCATED         (1 << 5) /* truncated down so flush-on-close */
  #define XFS_IDIRTY_RELEASE     (1 << 6) /* dirty release already seen */
-#define __XFS_IFLOCK_BIT       7        /* inode is being flushed right now */
-#define XFS_IFLOCK             (1 << __XFS_IFLOCK_BIT)
+#define XFS_IFLUSHING          (1 << 7) /* inode is being flushed */
  #define __XFS_IPINNED_BIT      8        /* wakeup key for zero pin count */
  #define XFS_IPINNED            (1 << __XFS_IPINNED_BIT)
  #define XFS_IEOFBLOCKS         (1 << 9) /* has the preallocblocks tag set */
@@ -233,36 +232,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
         (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
          XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
  
-/*
- * Synchronize processes attempting to flush the in-core inode back to disk.
- */
-
-static inline int xfs_isiflocked(struct xfs_inode *ip)
-{
-       return xfs_iflags_test(ip, XFS_IFLOCK);
-}
-
-extern void __xfs_iflock(struct xfs_inode *ip);
-
-static inline int xfs_iflock_nowait(struct xfs_inode *ip)
-{
-       return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
-}
-
-static inline void xfs_iflock(struct xfs_inode *ip)
-{
-       if (!xfs_iflock_nowait(ip))
-               __xfs_iflock(ip);
-}
-
-static inline void xfs_ifunlock(struct xfs_inode *ip)
-{
-       ASSERT(xfs_isiflocked(ip));
-       xfs_iflags_clear(ip, XFS_IFLOCK);
-       smp_mb();
-       wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
-}
-
  /*
   * Flags for inode locking.
   * Bit ranges: 1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index 6c65938..099ae8e 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -491,8 +491,7 @@ xfs_inode_item_push(
             (ip->i_flags & XFS_ISTALE))
                 return XFS_ITEM_PINNED;
  
-       /* If the inode is already flush locked, we're already flushing. */
-       if (xfs_isiflocked(ip))
+       if (xfs_iflags_test(ip, XFS_IFLUSHING))
                 return XFS_ITEM_FLUSHING;
  
         if (!xfs_buf_trylock(bp))
@@ -703,7 +702,7 @@ xfs_iflush_finish(
                 iip->ili_last_fields = 0;
                 iip->ili_flush_lsn = 0;
                 spin_unlock(&iip->ili_lock);
-               xfs_ifunlock(iip->ili_inode);
+               xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
                 if (drop_buffer)
                         xfs_buf_rele(bp);
         }
@@ -711,8 +710,8 @@ xfs_iflush_finish(
  
  /*
   * Inode buffer IO completion routine.  It is responsible for removing inodes
- * attached to the buffer from the AIL if they have not been re-logged, as well
- * as completing the flush and unlocking the inode.
+ * attached to the buffer from the AIL if they have not been re-logged and
+ * completing the inode flush.
   */
  void
  xfs_iflush_done(
@@ -755,10 +754,10 @@ xfs_iflush_done(
  }
  
  /*
- * This is the inode flushing abort routine.  It is called from xfs_iflush when
+ * This is the inode flushing abort routine.  It is called when
   * the filesystem is shutting down to clean up the inode state.  It is
   * responsible for removing the inode item from the AIL if it has not been
- * re-logged, and unlocking the inode's flush lock.
+ * re-logged and clearing the inode's flush state.
   */
  void
  xfs_iflush_abort(
@@ -790,7 +789,7 @@ xfs_iflush_abort(
                 list_del_init(&iip->ili_item.li_bio_list);
                 spin_unlock(&iip->ili_lock);
         }
-       xfs_ifunlock(ip);
+       xfs_iflags_clear(ip, XFS_IFLUSHING);
         if (bp)
                 xfs_buf_rele(bp);
  }
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h

index 048b5e7..23a7b49 100644 (file)
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -25,8 +25,8 @@ struct xfs_inode_log_item {
          *
          * We need atomic changes between inode dirtying, inode flushing and
          * inode completion, but these all hold different combinations of
-        * ILOCK and iflock and hence we need some other method of serialising
-        * updates to the flush state.
+        * ILOCK and IFLUSHING and hence we need some other method of
+        * serialising updates to the flush state.
          */
         spinlock_t              ili_lock;          /* flush state lock */
         unsigned int            ili_last_fields;   /* fields when flushed */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 0bc623c..5d52249 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1059,11 +1059,12 @@ xfs_unmountfs(
          * We can potentially deadlock here if we have an inode cluster
          * that has been freed has its buffer still pinned in memory because
          * the transaction is still sitting in a iclog. The stale inodes
-        * on that buffer will have their flush locks held until the
-        * transaction hits the disk and the callbacks run. the inode
-        * flush takes the flush lock unconditionally and with nothing to
-        * push out the iclog we will never get that unlocked. hence we
-        * need to force the log first.
+        * on that buffer will be pinned to the buffer until the
+        * transaction hits the disk and the callbacks run. Pushing the AIL will
+        * skip the stale inodes and may never see the pinned buffer, so
+        * nothing will push out the iclog and unpin the buffer. Hence we
+        * need to force the log here to ensure all items are flushed into the
+        * AIL before we go any further.
          */
         xfs_log_force(mp, XFS_LOG_SYNC);
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 71ac6c1..68ec8db 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -654,11 +654,11 @@ xfs_fs_destroy_inode(
         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
  
         /*
-        * We always use background reclaim here because even if the
-        * inode is clean, it still may be under IO and hence we have
-        * to take the flush lock. The background reclaim path handles
-        * this more efficiently than we can here, so simply let background
-        * reclaim tear down all inodes.
+        * We always use background reclaim here because even if the inode is
+        * clean, it still may be under IO and hence we have wait for IO
+        * completion to occur before we can reclaim the inode. The background
+        * reclaim path handles this more efficiently than we can here, so
+        * simply let background reclaim tear down all inodes.
          */
         xfs_inode_set_reclaim_tag(ip);
  }
author	Dave Chinner <dchinner@redhat.com>
	Mon, 17 Aug 2020 23:41:01 +0000 (16:41 -0700)
committer	Darrick J. Wong <darrick.wong@oracle.com>
	Mon, 7 Sep 2020 01:05:51 +0000 (18:05 -0700)
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history
fs/xfs/xfs_inode_item.h		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history