Merge tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux-2.6-microblaze.git] / fs / xfs / xfs_inode.c
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 6f25178..28493c8 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -20,6 +20,7 @@
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_inode_item.h"
+#include "xfs_iunlink_item.h"
  #include "xfs_ialloc.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -125,12 +126,32 @@ xfs_ilock_attr_map_shared(
  {
         uint                    lock_mode = XFS_ILOCK_SHARED;
  
-       if (ip->i_afp && xfs_need_iread_extents(ip->i_afp))
+       if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
                 lock_mode = XFS_ILOCK_EXCL;
         xfs_ilock(ip, lock_mode);
         return lock_mode;
  }
  
+/*
+ * You can't set both SHARED and EXCL for the same lock,
+ * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
+ * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
+ * to set in lock_flags.
+ */
+static inline void
+xfs_lock_flags_assert(
+       uint            lock_flags)
+{
+       ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
+               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+       ASSERT(lock_flags != 0);
+}
+
  /*
   * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
   * multi-reader locks: invalidate_lock and the i_lock.  This routine allows
@@ -168,18 +189,7 @@ xfs_ilock(
  {
         trace_xfs_ilock(ip, lock_flags, _RET_IP_);
  
-       /*
-        * You can't set both SHARED and EXCL for the same lock,
-        * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
-        * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
-        */
-       ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
-              (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
-              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
-              (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+       xfs_lock_flags_assert(lock_flags);
  
         if (lock_flags & XFS_IOLOCK_EXCL) {
                 down_write_nested(&VFS_I(ip)->i_rwsem,
@@ -222,18 +232,7 @@ xfs_ilock_nowait(
  {
         trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
  
-       /*
-        * You can't set both SHARED and EXCL for the same lock,
-        * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
-        * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
-        */
-       ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
-              (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
-              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
-              (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+       xfs_lock_flags_assert(lock_flags);
  
         if (lock_flags & XFS_IOLOCK_EXCL) {
                 if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
@@ -291,19 +290,7 @@ xfs_iunlock(
         xfs_inode_t             *ip,
         uint                    lock_flags)
  {
-       /*
-        * You can't set both SHARED and EXCL for the same lock,
-        * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
-        * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
-        */
-       ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
-              (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
-              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
-       ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
-              (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
-       ASSERT(lock_flags != 0);
+       xfs_lock_flags_assert(lock_flags);
  
         if (lock_flags & XFS_IOLOCK_EXCL)
                 up_write(&VFS_I(ip)->i_rwsem);
@@ -379,8 +366,8 @@ xfs_isilocked(
         }
  
         if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
-               return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
-                               (lock_flags & XFS_IOLOCK_SHARED));
+               return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
+                               (lock_flags & XFS_MMAPLOCK_SHARED));
         }
  
         if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
@@ -649,7 +636,7 @@ xfs_ip2xflags(
                         flags |= FS_XFLAG_COWEXTSIZE;
         }
  
-       if (XFS_IFORK_Q(ip))
+       if (xfs_inode_has_attr_fork(ip))
                 flags |= FS_XFLAG_HASATTR;
         return flags;
  }
@@ -907,7 +894,7 @@ xfs_init_new_inode(
          */
         if (init_xattrs && xfs_has_attr(mp)) {
                 ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
-               ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
+               xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
         }
  
         /*
@@ -1307,8 +1294,8 @@ xfs_itruncate_clear_reflink_flags(
  
         if (!xfs_is_reflink_inode(ip))
                 return;
-       dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       dfork = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+       cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
         if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
                 ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
         if (cfork->if_bytes == 0)
@@ -1657,7 +1644,7 @@ xfs_inode_needs_inactive(
         struct xfs_inode        *ip)
  {
         struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_ifork        *cow_ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
  
         /*
          * If the inode is already free, then there can be nothing
@@ -1776,13 +1763,12 @@ xfs_inactive(
          * now.  The code calls a routine that recursively deconstructs the
          * attribute fork. If also blows away the in-core attribute fork.
          */
-       if (XFS_IFORK_Q(ip)) {
+       if (xfs_inode_has_attr_fork(ip)) {
                 error = xfs_attr_inactive(ip);
                 if (error)
                         goto out;
         }
  
-       ASSERT(!ip->i_afp);
         ASSERT(ip->i_forkoff == 0);
  
         /*
@@ -1815,195 +1801,69 @@ out:
   * because we must walk that list to find the inode that points to the inode
   * being removed from the unlinked hash bucket list.
   *
- * What if we modelled the unlinked list as a collection of records capturing
- * "X.next_unlinked = Y" relations?  If we indexed those records on Y, we'd
- * have a fast way to look up unlinked list predecessors, which avoids the
- * slow list walk.  That's exactly what we do here (in-core) with a per-AG
- * rhashtable.
- *
- * Because this is a backref cache, we ignore operational failures since the
- * iunlink code can fall back to the slow bucket walk.  The only errors that
- * should bubble out are for obviously incorrect situations.
+ * Hence we keep an in-memory double linked list to link each inode on an
+ * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
+ * based lists would require having 64 list heads in the perag, one for each
+ * list. This is expensive in terms of memory (think millions of AGs) and cache
+ * misses on lookups. Instead, use the fact that inodes on the unlinked list
+ * must be referenced at the VFS level to keep them on the list and hence we
+ * have an existence guarantee for inodes on the unlinked list.
   *
- * All users of the backref cache MUST hold the AGI buffer lock to serialize
- * access or have otherwise provided for concurrency control.
+ * Given we have an existence guarantee, we can use lockless inode cache lookups
+ * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
+ * for the double linked unlinked list, and we don't need any extra locking to
+ * keep the list safe as all manipulations are done under the AGI buffer lock.
+ * Keeping the list up to date does not require memory allocation, just finding
+ * the XFS inode and updating the next/prev unlinked list aginos.
   */
  
-/* Capture a "X.next_unlinked = Y" relationship. */
-struct xfs_iunlink {
-       struct rhash_head       iu_rhash_head;
-       xfs_agino_t             iu_agino;               /* X */
-       xfs_agino_t             iu_next_unlinked;       /* Y */
-};
-
-/* Unlinked list predecessor lookup hashtable construction */
-static int
-xfs_iunlink_obj_cmpfn(
-       struct rhashtable_compare_arg   *arg,
-       const void                      *obj)
-{
-       const xfs_agino_t               *key = arg->key;
-       const struct xfs_iunlink        *iu = obj;
-
-       if (iu->iu_next_unlinked != *key)
-               return 1;
-       return 0;
-}
-
-static const struct rhashtable_params xfs_iunlink_hash_params = {
-       .min_size               = XFS_AGI_UNLINKED_BUCKETS,
-       .key_len                = sizeof(xfs_agino_t),
-       .key_offset             = offsetof(struct xfs_iunlink,
-                                          iu_next_unlinked),
-       .head_offset            = offsetof(struct xfs_iunlink, iu_rhash_head),
-       .automatic_shrinking    = true,
-       .obj_cmpfn              = xfs_iunlink_obj_cmpfn,
-};
-
  /*
- * Return X, where X.next_unlinked == @agino.  Returns NULLAGINO if no such
- * relation is found.
+ * Find an inode on the unlinked list. This does not take references to the
+ * inode as we have existence guarantees by holding the AGI buffer lock and that
+ * only unlinked, referenced inodes can be on the unlinked inode list.  If we
+ * don't find the inode in cache, then let the caller handle the situation.
   */
-static xfs_agino_t
-xfs_iunlink_lookup_backref(
+static struct xfs_inode *
+xfs_iunlink_lookup(
         struct xfs_perag        *pag,
         xfs_agino_t             agino)
  {
-       struct xfs_iunlink      *iu;
-
-       iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
-                       xfs_iunlink_hash_params);
-       return iu ? iu->iu_agino : NULLAGINO;
-}
+       struct xfs_inode        *ip;
  
-/*
- * Take ownership of an iunlink cache entry and insert it into the hash table.
- * If successful, the entry will be owned by the cache; if not, it is freed.
- * Either way, the caller does not own @iu after this call.
- */
-static int
-xfs_iunlink_insert_backref(
-       struct xfs_perag        *pag,
-       struct xfs_iunlink      *iu)
-{
-       int                     error;
+       rcu_read_lock();
+       ip = radix_tree_lookup(&pag->pag_ici_root, agino);
  
-       error = rhashtable_insert_fast(&pag->pagi_unlinked_hash,
-                       &iu->iu_rhash_head, xfs_iunlink_hash_params);
         /*
-        * Fail loudly if there already was an entry because that's a sign of
-        * corruption of in-memory data.  Also fail loudly if we see an error
-        * code we didn't anticipate from the rhashtable code.  Currently we
-        * only anticipate ENOMEM.
+        * Inode not in memory or in RCU freeing limbo should not happen.
+        * Warn about this and let the caller handle the failure.
          */
-       if (error) {
-               WARN(error != -ENOMEM, "iunlink cache insert error %d", error);
-               kmem_free(iu);
+       if (WARN_ON_ONCE(!ip || !ip->i_ino)) {
+               rcu_read_unlock();
+               return NULL;
         }
-       /*
-        * Absorb any runtime errors that aren't a result of corruption because
-        * this is a cache and we can always fall back to bucket list scanning.
-        */
-       if (error != 0 && error != -EEXIST)
-               error = 0;
-       return error;
+       ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM));
+       rcu_read_unlock();
+       return ip;
  }
  
-/* Remember that @prev_agino.next_unlinked = @this_agino. */
+/* Update the prev pointer of the next agino. */
  static int
-xfs_iunlink_add_backref(
+xfs_iunlink_update_backref(
         struct xfs_perag        *pag,
         xfs_agino_t             prev_agino,
-       xfs_agino_t             this_agino)
-{
-       struct xfs_iunlink      *iu;
-
-       if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
-               return 0;
-
-       iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
-       iu->iu_agino = prev_agino;
-       iu->iu_next_unlinked = this_agino;
-
-       return xfs_iunlink_insert_backref(pag, iu);
-}
-
-/*
- * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked.
- * If @next_unlinked is NULLAGINO, we drop the backref and exit.  If there
- * wasn't any such entry then we don't bother.
- */
-static int
-xfs_iunlink_change_backref(
-       struct xfs_perag        *pag,
-       xfs_agino_t             agino,
-       xfs_agino_t             next_unlinked)
+       xfs_agino_t             next_agino)
  {
-       struct xfs_iunlink      *iu;
-       int                     error;
-
-       /* Look up the old entry; if there wasn't one then exit. */
-       iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
-                       xfs_iunlink_hash_params);
-       if (!iu)
-               return 0;
-
-       /*
-        * Remove the entry.  This shouldn't ever return an error, but if we
-        * couldn't remove the old entry we don't want to add it again to the
-        * hash table, and if the entry disappeared on us then someone's
-        * violated the locking rules and we need to fail loudly.  Either way
-        * we cannot remove the inode because internal state is or would have
-        * been corrupt.
-        */
-       error = rhashtable_remove_fast(&pag->pagi_unlinked_hash,
-                       &iu->iu_rhash_head, xfs_iunlink_hash_params);
-       if (error)
-               return error;
+       struct xfs_inode        *ip;
  
-       /* If there is no new next entry just free our item and return. */
-       if (next_unlinked == NULLAGINO) {
-               kmem_free(iu);
+       /* No update necessary if we are at the end of the list. */
+       if (next_agino == NULLAGINO)
                 return 0;
-       }
-
-       /* Update the entry and re-add it to the hash table. */
-       iu->iu_next_unlinked = next_unlinked;
-       return xfs_iunlink_insert_backref(pag, iu);
-}
-
-/* Set up the in-core predecessor structures. */
-int
-xfs_iunlink_init(
-       struct xfs_perag        *pag)
-{
-       return rhashtable_init(&pag->pagi_unlinked_hash,
-                       &xfs_iunlink_hash_params);
-}
-
-/* Free the in-core predecessor structures. */
-static void
-xfs_iunlink_free_item(
-       void                    *ptr,
-       void                    *arg)
-{
-       struct xfs_iunlink      *iu = ptr;
-       bool                    *freed_anything = arg;
-
-       *freed_anything = true;
-       kmem_free(iu);
-}
-
-void
-xfs_iunlink_destroy(
-       struct xfs_perag        *pag)
-{
-       bool                    freed_anything = false;
-
-       rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
-                       xfs_iunlink_free_item, &freed_anything);
  
-       ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount));
+       ip = xfs_iunlink_lookup(pag, next_agino);
+       if (!ip)
+               return -EFSCORRUPTED;
+       ip->i_prev_unlinked = prev_agino;
+       return 0;
  }
  
  /*
@@ -2022,7 +1882,7 @@ xfs_iunlink_update_bucket(
         xfs_agino_t             old_value;
         int                     offset;
  
-       ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino));
+       ASSERT(xfs_verify_agino_or_null(pag, new_agino));
  
         old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
         trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
@@ -2045,88 +1905,53 @@ xfs_iunlink_update_bucket(
         return 0;
  }
  
-/* Set an on-disk inode's next_unlinked pointer. */
-STATIC void
-xfs_iunlink_update_dinode(
-       struct xfs_trans        *tp,
-       struct xfs_perag        *pag,
-       xfs_agino_t             agino,
-       struct xfs_buf          *ibp,
-       struct xfs_dinode       *dip,
-       struct xfs_imap         *imap,
-       xfs_agino_t             next_agino)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       int                     offset;
-
-       ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
-
-       trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino,
-                       be32_to_cpu(dip->di_next_unlinked), next_agino);
-
-       dip->di_next_unlinked = cpu_to_be32(next_agino);
-       offset = imap->im_boffset +
-                       offsetof(struct xfs_dinode, di_next_unlinked);
-
-       /* need to recalc the inode CRC if appropriate */
-       xfs_dinode_calc_crc(mp, dip);
-       xfs_trans_inode_buf(tp, ibp);
-       xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
-}
-
-/* Set an in-core inode's unlinked pointer and return the old value. */
-STATIC int
-xfs_iunlink_update_inode(
+static int
+xfs_iunlink_insert_inode(
         struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
         struct xfs_perag        *pag,
-       xfs_agino_t             next_agino,
-       xfs_agino_t             *old_next_agino)
+       struct xfs_buf          *agibp,
+       struct xfs_inode        *ip)
  {
         struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_dinode       *dip;
-       struct xfs_buf          *ibp;
-       xfs_agino_t             old_value;
+       struct xfs_agi          *agi = agibp->b_addr;
+       xfs_agino_t             next_agino;
+       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+       short                   bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
         int                     error;
  
-       ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
-
-       error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
-       if (error)
-               return error;
-       dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
-
-       /* Make sure the old pointer isn't garbage. */
-       old_value = be32_to_cpu(dip->di_next_unlinked);
-       if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) {
-               xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
-                               sizeof(*dip), __this_address);
-               error = -EFSCORRUPTED;
-               goto out;
+       /*
+        * Get the index into the agi hash table for the list this inode will
+        * go on.  Make sure the pointer isn't garbage and that this inode
+        * isn't already on the list.
+        */
+       next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+       if (next_agino == agino ||
+           !xfs_verify_agino_or_null(pag, next_agino)) {
+               xfs_buf_mark_corrupt(agibp);
+               return -EFSCORRUPTED;
         }
  
         /*
-        * Since we're updating a linked list, we should never find that the
-        * current pointer is the same as the new value, unless we're
-        * terminating the list.
+        * Update the prev pointer in the next inode to point back to this
+        * inode.
          */
-       *old_next_agino = old_value;
-       if (old_value == next_agino) {
-               if (next_agino != NULLAGINO) {
-                       xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
-                                       dip, sizeof(*dip), __this_address);
-                       error = -EFSCORRUPTED;
-               }
-               goto out;
+       error = xfs_iunlink_update_backref(pag, agino, next_agino);
+       if (error)
+               return error;
+
+       if (next_agino != NULLAGINO) {
+               /*
+                * There is already another inode in the bucket, so point this
+                * inode to the current head of the list.
+                */
+               error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
+               if (error)
+                       return error;
+               ip->i_next_unlinked = next_agino;
         }
  
-       /* Ok, update the new pointer. */
-       xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
-                       ibp, dip, &ip->i_imap, next_agino);
-       return 0;
-out:
-       xfs_trans_brelse(tp, ibp);
-       return error;
+       /* Point the head of the list to point to this inode. */
+       return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
  }
  
  /*
@@ -2143,11 +1968,7 @@ xfs_iunlink(
  {
         struct xfs_mount        *mp = tp->t_mountp;
         struct xfs_perag        *pag;
-       struct xfs_agi          *agi;
         struct xfs_buf          *agibp;
-       xfs_agino_t             next_agino;
-       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
-       short                   bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
         int                     error;
  
         ASSERT(VFS_I(ip)->i_nlink == 0);
@@ -2157,202 +1978,38 @@ xfs_iunlink(
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
  
         /* Get the agi buffer first.  It ensures lock ordering on the list. */
-       error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
+       error = xfs_read_agi(pag, tp, &agibp);
         if (error)
                 goto out;
-       agi = agibp->b_addr;
-
-       /*
-        * Get the index into the agi hash table for the list this inode will
-        * go on.  Make sure the pointer isn't garbage and that this inode
-        * isn't already on the list.
-        */
-       next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
-       if (next_agino == agino ||
-           !xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) {
-               xfs_buf_mark_corrupt(agibp);
-               error = -EFSCORRUPTED;
-               goto out;
-       }
-
-       if (next_agino != NULLAGINO) {
-               xfs_agino_t             old_agino;
-
-               /*
-                * There is already another inode in the bucket, so point this
-                * inode to the current head of the list.
-                */
-               error = xfs_iunlink_update_inode(tp, ip, pag, next_agino,
-                               &old_agino);
-               if (error)
-                       goto out;
-               ASSERT(old_agino == NULLAGINO);
  
-               /*
-                * agino has been unlinked, add a backref from the next inode
-                * back to agino.
-                */
-               error = xfs_iunlink_add_backref(pag, agino, next_agino);
-               if (error)
-                       goto out;
-       }
-
-       /* Point the head of the list to point to this inode. */
-       error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+       error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
  out:
         xfs_perag_put(pag);
         return error;
  }
  
-/* Return the imap, dinode pointer, and buffer for an inode. */
-STATIC int
-xfs_iunlink_map_ino(
-       struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
-       xfs_agino_t             agino,
-       struct xfs_imap         *imap,
-       struct xfs_dinode       **dipp,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       int                     error;
-
-       imap->im_blkno = 0;
-       error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0);
-       if (error) {
-               xfs_warn(mp, "%s: xfs_imap returned error %d.",
-                               __func__, error);
-               return error;
-       }
-
-       error = xfs_imap_to_bp(mp, tp, imap, bpp);
-       if (error) {
-               xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
-                               __func__, error);
-               return error;
-       }
-
-       *dipp = xfs_buf_offset(*bpp, imap->im_boffset);
-       return 0;
-}
-
-/*
- * Walk the unlinked chain from @head_agino until we find the inode that
- * points to @target_agino.  Return the inode number, map, dinode pointer,
- * and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp.
- *
- * @tp, @pag, @head_agino, and @target_agino are input parameters.
- * @agino, @imap, @dipp, and @bpp are all output parameters.
- *
- * Do not call this function if @target_agino is the head of the list.
- */
-STATIC int
-xfs_iunlink_map_prev(
-       struct xfs_trans        *tp,
-       struct xfs_perag        *pag,
-       xfs_agino_t             head_agino,
-       xfs_agino_t             target_agino,
-       xfs_agino_t             *agino,
-       struct xfs_imap         *imap,
-       struct xfs_dinode       **dipp,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       xfs_agino_t             next_agino;
-       int                     error;
-
-       ASSERT(head_agino != target_agino);
-       *bpp = NULL;
-
-       /* See if our backref cache can find it faster. */
-       *agino = xfs_iunlink_lookup_backref(pag, target_agino);
-       if (*agino != NULLAGINO) {
-               error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap,
-                               dipp, bpp);
-               if (error)
-                       return error;
-
-               if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino)
-                       return 0;
-
-               /*
-                * If we get here the cache contents were corrupt, so drop the
-                * buffer and fall back to walking the bucket list.
-                */
-               xfs_trans_brelse(tp, *bpp);
-               *bpp = NULL;
-               WARN_ON_ONCE(1);
-       }
-
-       trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno);
-
-       /* Otherwise, walk the entire bucket until we find it. */
-       next_agino = head_agino;
-       while (next_agino != target_agino) {
-               xfs_agino_t     unlinked_agino;
-
-               if (*bpp)
-                       xfs_trans_brelse(tp, *bpp);
-
-               *agino = next_agino;
-               error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap,
-                               dipp, bpp);
-               if (error)
-                       return error;
-
-               unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked);
-               /*
-                * Make sure this pointer is valid and isn't an obvious
-                * infinite loop.
-                */
-               if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) ||
-                   next_agino == unlinked_agino) {
-                       XFS_CORRUPTION_ERROR(__func__,
-                                       XFS_ERRLEVEL_LOW, mp,
-                                       *dipp, sizeof(**dipp));
-                       error = -EFSCORRUPTED;
-                       return error;
-               }
-               next_agino = unlinked_agino;
-       }
-
-       return 0;
-}
-
-/*
- * Pull the on-disk inode from the AGI unlinked list.
- */
-STATIC int
-xfs_iunlink_remove(
+static int
+xfs_iunlink_remove_inode(
         struct xfs_trans        *tp,
         struct xfs_perag        *pag,
+       struct xfs_buf          *agibp,
         struct xfs_inode        *ip)
  {
         struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_agi          *agi;
-       struct xfs_buf          *agibp;
-       struct xfs_buf          *last_ibp;
-       struct xfs_dinode       *last_dip = NULL;
+       struct xfs_agi          *agi = agibp->b_addr;
         xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
-       xfs_agino_t             next_agino;
         xfs_agino_t             head_agino;
         short                   bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
         int                     error;
  
         trace_xfs_iunlink_remove(ip);
  
-       /* Get the agi buffer first.  It ensures lock ordering on the list. */
-       error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
-       if (error)
-               return error;
-       agi = agibp->b_addr;
-
         /*
          * Get the index into the agi hash table for the list this inode will
          * go on.  Make sure the head pointer isn't garbage.
          */
         head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
-       if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) {
+       if (!xfs_verify_agino(pag, head_agino)) {
                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
                                 agi, sizeof(*agi));
                 return -EFSCORRUPTED;
@@ -2363,52 +2020,60 @@ xfs_iunlink_remove(
          * the old pointer value so that we can update whatever was previous
          * to us in the list to point to whatever was next in the list.
          */
-       error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino);
+       error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
         if (error)
                 return error;
  
         /*
-        * If there was a backref pointing from the next inode back to this
-        * one, remove it because we've removed this inode from the list.
-        *
-        * Later, if this inode was in the middle of the list we'll update
-        * this inode's backref to point from the next inode.
+        * Update the prev pointer in the next inode to point back to previous
+        * inode in the chain.
          */
-       if (next_agino != NULLAGINO) {
-               error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO);
-               if (error)
-                       return error;
-       }
+       error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
+                       ip->i_next_unlinked);
+       if (error)
+               return error;
  
         if (head_agino != agino) {
-               struct xfs_imap imap;
-               xfs_agino_t     prev_agino;
+               struct xfs_inode        *prev_ip;
  
-               /* We need to search the list for the inode being freed. */
-               error = xfs_iunlink_map_prev(tp, pag, head_agino, agino,
-                               &prev_agino, &imap, &last_dip, &last_ibp);
-               if (error)
-                       return error;
+               prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
+               if (!prev_ip)
+                       return -EFSCORRUPTED;
  
-               /* Point the previous inode on the list to the next inode. */
-               xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp,
-                               last_dip, &imap, next_agino);
-
-               /*
-                * Now we deal with the backref for this inode.  If this inode
-                * pointed at a real inode, change the backref that pointed to
-                * us to point to our old next.  If this inode was the end of
-                * the list, delete the backref that pointed to us.  Note that
-                * change_backref takes care of deleting the backref if
-                * next_agino is NULLAGINO.
-                */
-               return xfs_iunlink_change_backref(agibp->b_pag, agino,
-                               next_agino);
+               error = xfs_iunlink_log_inode(tp, prev_ip, pag,
+                               ip->i_next_unlinked);
+               prev_ip->i_next_unlinked = ip->i_next_unlinked;
+       } else {
+               /* Point the head of the list to the next unlinked inode. */
+               error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
+                               ip->i_next_unlinked);
         }
  
-       /* Point the head of the list to the next unlinked inode. */
-       return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
-                       next_agino);
+       ip->i_next_unlinked = NULLAGINO;
+       ip->i_prev_unlinked = NULLAGINO;
+       return error;
+}
+
+/*
+ * Pull the on-disk inode from the AGI unlinked list.
+ */
+STATIC int
+xfs_iunlink_remove(
+       struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
+       struct xfs_inode        *ip)
+{
+       struct xfs_buf          *agibp;
+       int                     error;
+
+       trace_xfs_iunlink_remove(ip);
+
+       /* Get the agi buffer first.  It ensures lock ordering on the list. */
+       error = xfs_read_agi(pag, tp, &agibp);
+       if (error)
+               return error;
+
+       return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
  }
  
  /*
@@ -3046,10 +2711,12 @@ out_trans_abort:
  static int
  xfs_rename_alloc_whiteout(
         struct user_namespace   *mnt_userns,
+       struct xfs_name         *src_name,
         struct xfs_inode        *dp,
         struct xfs_inode        **wip)
  {
         struct xfs_inode        *tmpfile;
+       struct qstr             name;
         int                     error;
  
         error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE,
@@ -3057,6 +2724,15 @@ xfs_rename_alloc_whiteout(
         if (error)
                 return error;
  
+       name.name = src_name->name;
+       name.len = src_name->len;
+       error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name);
+       if (error) {
+               xfs_finish_inode_setup(tmpfile);
+               xfs_irele(tmpfile);
+               return error;
+       }
+
         /*
          * Prepare the tmpfile inode as if it were created through the VFS.
          * Complete the inode setup and flag it as linkable.  nlink is already
@@ -3107,7 +2783,8 @@ xfs_rename(
          * appropriately.
          */
         if (flags & RENAME_WHITEOUT) {
-               error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
+               error = xfs_rename_alloc_whiteout(mnt_userns, src_name,
+                                                 target_dp, &wip);
                 if (error)
                         return error;
  
@@ -3243,11 +2920,13 @@ retry:
                 if (inodes[i] == wip ||
                     (inodes[i] == target_ip &&
                      (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) {
-                       struct xfs_buf  *bp;
-                       xfs_agnumber_t  agno;
+                       struct xfs_perag        *pag;
+                       struct xfs_buf          *bp;
  
-                       agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino);
-                       error = xfs_read_agi(mp, tp, agno, &bp);
+                       pag = xfs_perag_get(mp,
+                                       XFS_INO_TO_AGNO(mp, inodes[i]->i_ino));
+                       error = xfs_read_agi(pag, tp, &bp);
+                       xfs_perag_put(pag);
                         if (error)
                                 goto out_trans_cancel;
                 }
@@ -3466,13 +3145,13 @@ xfs_iflush(
                         goto flush_out;
                 }
         }
-       if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
+       if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
                                 ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                         "%s: detected corrupt incore inode %llu, "
                         "total extents = %llu nblocks = %lld, ptr "PTR_FMT,
                         __func__, ip->i_ino,
-                       ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
+                       ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af),
                         ip->i_nblocks, ip);
                 goto flush_out;
         }
@@ -3502,7 +3181,8 @@ xfs_iflush(
         if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
             xfs_ifork_verify_local_data(ip))
                 goto flush_out;
-       if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL &&
+       if (xfs_inode_has_attr_fork(ip) &&
+           ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
             xfs_ifork_verify_local_attr(ip))
                 goto flush_out;
  
@@ -3520,7 +3200,7 @@ xfs_iflush(
         }
  
         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-       if (XFS_IFORK_Q(ip))
+       if (xfs_inode_has_attr_fork(ip))
                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
  
         /*