Merge tag 'xfs-5.10-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst

index f461d6c..86de8a1 100644 (file)
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -210,6 +210,28 @@ When mounting an XFS filesystem, the following options are accepted.
         inconsistent namespace presentation during or after a
         failover event.
  
+Deprecation of V4 Format
+========================
+
+The V4 filesystem format lacks certain features that are supported by
+the V5 format, such as metadata checksumming, strengthened metadata
+verification, and the ability to store timestamps past the year 2038.
+Because of this, the V4 format is deprecated.  All users should upgrade
+by backing up their files, reformatting, and restoring from the backup.
+
+Administrators and users can detect a V4 filesystem by running xfs_info
+against a filesystem mountpoint and checking for a string containing
+"crc=".  If no such string is found, please upgrade xfsprogs to the
+latest version and try again.
+
+The deprecation will take place in two parts.  Support for mounting V4
+filesystems can now be disabled at kernel build time via Kconfig option.
+The option will default to yes until September 2025, at which time it
+will be changed to default to no.  In September 2030, support will be
+removed from the codebase entirely.
+
+Note: Distributors may choose to withdraw V4 format support earlier than
+the dates listed above.
  
  Deprecated Mount Options
  ========================
@@ -217,6 +239,9 @@ Deprecated Mount Options
  ===========================     ================
    Name                         Removal Schedule
  ===========================     ================
+Mounting with V4 filesystem     September 2030
+ikeep/noikeep                  September 2025
+attr2/noattr2                  September 2025
  ===========================     ================
  
  
@@ -331,7 +356,12 @@ The following sysctls are available for the XFS filesystem:
  Deprecated Sysctls
  ==================
  
-None at present.
+===========================     ================
+  Name                         Removal Schedule
+===========================     ================
+fs.xfs.irix_sgid_inherit        September 2025
+fs.xfs.irix_symlink_mode        September 2025
+===========================     ================
  
  
  Removed Sysctls
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig

index e685299..9fac5ea 100644 (file)
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,31 @@ config XFS_FS
           system of your root partition is compiled as a module, you'll need
           to use an initial ramdisk (initrd) to boot.
  
+config XFS_SUPPORT_V4
+       bool "Support deprecated V4 (crc=0) format"
+       depends on XFS_FS
+       default y
+       help
+         The V4 filesystem format lacks certain features that are supported
+         by the V5 format, such as metadata checksumming, strengthened
+         metadata verification, and the ability to store timestamps past the
+         year 2038.  Because of this, the V4 format is deprecated.  All users
+         should upgrade by backing up their files, reformatting, and restoring
+         from the backup.
+
+         Administrators and users can detect a V4 filesystem by running
+         xfs_info against a filesystem mountpoint and checking for a string
+         beginning with "crc=".  If the string "crc=0" is found, the
+         filesystem is a V4 filesystem.  If no such string is found, please
+         upgrade xfsprogs to the latest version and try again.
+
+         This option will become default N in September 2025.  Support for the
+         V4 format will be removed entirely in September 2030.  Distributors
+         can say N here to withdraw support earlier.
+
+         To continue supporting the old V4 format (crc=0), say Y.
+         To close off an attack surface, say N.
+
  config XFS_QUOTA
         bool "XFS Quota support"
         depends on XFS_FS
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c

index 3f80ced..48d8e9c 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -96,8 +96,6 @@ xfs_attr3_rmt_verify(
  {
         struct xfs_attr3_rmt_hdr *rmt = ptr;
  
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return __this_address;
         if (!xfs_verify_magic(bp, rmt->rm_magic))
                 return __this_address;
         if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 1b0a01b..d9a6924 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5046,20 +5046,25 @@ xfs_bmap_del_extent_real(
  
         flags = XFS_ILOG_CORE;
         if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
-               xfs_fsblock_t   bno;
                 xfs_filblks_t   len;
                 xfs_extlen_t    mod;
  
-               bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
-                                 &mod);
-               ASSERT(mod == 0);
                 len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
                                   &mod);
                 ASSERT(mod == 0);
  
-               error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
-               if (error)
-                       goto done;
+               if (!(bflags & XFS_BMAPI_REMAP)) {
+                       xfs_fsblock_t   bno;
+
+                       bno = div_u64_rem(del->br_startblock,
+                                       mp->m_sb.sb_rextsize, &mod);
+                       ASSERT(mod == 0);
+
+                       error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+                       if (error)
+                               goto done;
+               }
+
                 do_fx = 0;
                 nblks = len * mp->m_sb.sb_rextsize;
                 qfield = XFS_TRANS_DQ_RTBCOUNT;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h

index b40a4e8..b876b44 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -15,8 +15,8 @@
   */
  #define XFS_DA_NODE_MAGIC      0xfebe  /* magic number: non-leaf blocks */
  #define XFS_ATTR_LEAF_MAGIC    0xfbee  /* magic number: attribute leaf blks */
-#define        XFS_DIR2_LEAF1_MAGIC    0xd2f1  /* magic number: v2 dirlf single blks */
-#define        XFS_DIR2_LEAFN_MAGIC    0xd2ff  /* magic number: v2 dirlf multi blks */
+#define XFS_DIR2_LEAF1_MAGIC   0xd2f1  /* magic number: v2 dirlf single blks */
+#define XFS_DIR2_LEAFN_MAGIC   0xd2ff  /* magic number: v2 dirlf multi blks */
  
  typedef struct xfs_da_blkinfo {
         __be32          forw;                   /* previous block in list */
@@ -35,8 +35,8 @@ typedef struct xfs_da_blkinfo {
   */
  #define XFS_DA3_NODE_MAGIC     0x3ebe  /* magic number: non-leaf blocks */
  #define XFS_ATTR3_LEAF_MAGIC   0x3bee  /* magic number: attribute leaf blks */
-#define        XFS_DIR3_LEAF1_MAGIC    0x3df1  /* magic number: v2 dirlf single blks */
-#define        XFS_DIR3_LEAFN_MAGIC    0x3dff  /* magic number: v2 dirlf multi blks */
+#define XFS_DIR3_LEAF1_MAGIC   0x3df1  /* magic number: v3 dirlf single blks */
+#define XFS_DIR3_LEAFN_MAGIC   0x3dff  /* magic number: v3 dirlf multi blks */
  
  struct xfs_da3_blkinfo {
         /*
@@ -61,7 +61,7 @@ struct xfs_da3_blkinfo {
   * Since we have duplicate keys, use a binary search but always follow
   * all match in the block, not just the first match found.
   */
-#define        XFS_DA_NODE_MAXDEPTH    5       /* max depth of Btree */
+#define XFS_DA_NODE_MAXDEPTH   5       /* max depth of Btree */
  
  typedef struct xfs_da_node_hdr {
         struct xfs_da_blkinfo   info;   /* block type, links, etc. */
@@ -746,14 +746,14 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
   */
  static inline int xfs_attr_leaf_entsize_remote(int nlen)
  {
-       return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
-               XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+       return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 +
+                       nlen, XFS_ATTR_LEAF_NAME_ALIGN);
  }
  
  static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
  {
-       return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
-               XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
+       return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 +
+                       nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
  }
  
  static inline int xfs_attr_leaf_entsize_local_max(int bsize)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c

index d8f5862..eff4a12 100644 (file)
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -16,6 +16,8 @@
  #include "xfs_inode.h"
  #include "xfs_inode_item.h"
  #include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_log.h"
  
  /*
   * Deferred Operations in XFS
@@ -186,8 +188,9 @@ xfs_defer_create_intent(
  {
         const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
  
-       dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
-                       dfp->dfp_count, sort);
+       if (!dfp->dfp_intent)
+               dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
+                                                    dfp->dfp_count, sort);
  }
  
  /*
@@ -311,22 +314,6 @@ xfs_defer_trans_roll(
         return error;
  }
  
-/*
- * Reset an already used dfops after finish.
- */
-static void
-xfs_defer_reset(
-       struct xfs_trans        *tp)
-{
-       ASSERT(list_empty(&tp->t_dfops));
-
-       /*
-        * Low mode state transfers across transaction rolls to mirror dfops
-        * lifetime. Clear it now that dfops is reset.
-        */
-       tp->t_flags &= ~XFS_TRANS_LOWMODE;
-}
-
  /*
   * Free up any items left in the list.
   */
@@ -359,6 +346,58 @@ xfs_defer_cancel_list(
         }
  }
  
+/*
+ * Prevent a log intent item from pinning the tail of the log by logging a
+ * done item to release the intent item; and then log a new intent item.
+ * The caller should provide a fresh transaction and roll it after we're done.
+ */
+static int
+xfs_defer_relog(
+       struct xfs_trans                **tpp,
+       struct list_head                *dfops)
+{
+       struct xlog                     *log = (*tpp)->t_mountp->m_log;
+       struct xfs_defer_pending        *dfp;
+       xfs_lsn_t                       threshold_lsn = NULLCOMMITLSN;
+
+
+       ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+       list_for_each_entry(dfp, dfops, dfp_list) {
+               /*
+                * If the log intent item for this deferred op is not a part of
+                * the current log checkpoint, relog the intent item to keep
+                * the log tail moving forward.  We're ok with this being racy
+                * because an incorrect decision means we'll be a little slower
+                * at pushing the tail.
+                */
+               if (dfp->dfp_intent == NULL ||
+                   xfs_log_item_in_current_chkpt(dfp->dfp_intent))
+                       continue;
+
+               /*
+                * Figure out where we need the tail to be in order to maintain
+                * the minimum required free space in the log.  Only sample
+                * the log threshold once per call.
+                */
+               if (threshold_lsn == NULLCOMMITLSN) {
+                       threshold_lsn = xlog_grant_push_threshold(log, 0);
+                       if (threshold_lsn == NULLCOMMITLSN)
+                               break;
+               }
+               if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
+                       continue;
+
+               trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
+               XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
+               dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
+       }
+
+       if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
+               return xfs_defer_trans_roll(tpp);
+       return 0;
+}
+
  /*
   * Log an intent-done item for the first pending intent, and finish the work
   * items.
@@ -390,6 +429,7 @@ xfs_defer_finish_one(
                         list_add(li, &dfp->dfp_work);
                         dfp->dfp_count++;
                         dfp->dfp_done = NULL;
+                       dfp->dfp_intent = NULL;
                         xfs_defer_create_intent(tp, dfp, false);
                 }
  
@@ -428,13 +468,27 @@ xfs_defer_finish_noroll(
  
         /* Until we run out of pending work to finish... */
         while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
+               /*
+                * Deferred items that are created in the process of finishing
+                * other deferred work items should be queued at the head of
+                * the pending list, which puts them ahead of the deferred work
+                * that was created by the caller.  This keeps the number of
+                * pending work items to a minimum, which decreases the amount
+                * of time that any one intent item can stick around in memory,
+                * pinning the log tail.
+                */
                 xfs_defer_create_intents(*tp);
-               list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
+               list_splice_init(&(*tp)->t_dfops, &dop_pending);
  
                 error = xfs_defer_trans_roll(tp);
                 if (error)
                         goto out_shutdown;
  
+               /* Possibly relog intent items to keep the log moving. */
+               error = xfs_defer_relog(tp, &dop_pending);
+               if (error)
+                       goto out_shutdown;
+
                 dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
                                        dfp_list);
                 error = xfs_defer_finish_one(*tp, dfp);
@@ -475,7 +529,10 @@ xfs_defer_finish(
                         return error;
                 }
         }
-       xfs_defer_reset(*tp);
+
+       /* Reset LOWMODE now that we've finished all the dfops. */
+       ASSERT(list_empty(&(*tp)->t_dfops));
+       (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
         return 0;
  }
  
@@ -549,6 +606,139 @@ xfs_defer_move(
          * that behavior.
          */
         dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
+       stp->t_flags &= ~XFS_TRANS_LOWMODE;
+}
+
+/*
+ * Prepare a chain of fresh deferred ops work items to be completed later.  Log
+ * recovery requires the ability to put off until later the actual finishing
+ * work so that it can process unfinished items recovered from the log in
+ * correct order.
+ *
+ * Create and log intent items for all the work that we're capturing so that we
+ * can be assured that the items will get replayed if the system goes down
+ * before log recovery gets a chance to finish the work it put off.  The entire
+ * deferred ops state is transferred to the capture structure and the
+ * transaction is then ready for the caller to commit it.  If there are no
+ * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
+ */
+static struct xfs_defer_capture *
+xfs_defer_ops_capture(
+       struct xfs_trans                *tp,
+       struct xfs_inode                *capture_ip)
+{
+       struct xfs_defer_capture        *dfc;
+
+       if (list_empty(&tp->t_dfops))
+               return NULL;
+
+       /* Create an object to capture the defer ops. */
+       dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
+       INIT_LIST_HEAD(&dfc->dfc_list);
+       INIT_LIST_HEAD(&dfc->dfc_dfops);
+
+       xfs_defer_create_intents(tp);
+
+       /* Move the dfops chain and transaction state to the capture struct. */
+       list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
+       dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
+       tp->t_flags &= ~XFS_TRANS_LOWMODE;
+
+       /* Capture the remaining block reservations along with the dfops. */
+       dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
+       dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
+
+       /* Preserve the log reservation size. */
+       dfc->dfc_logres = tp->t_log_res;
+
+       /*
+        * Grab an extra reference to this inode and attach it to the capture
+        * structure.
+        */
+       if (capture_ip) {
+               ihold(VFS_I(capture_ip));
+               dfc->dfc_capture_ip = capture_ip;
+       }
+
+       return dfc;
+}
+
+/* Release all resources that we used to capture deferred ops. */
+void
+xfs_defer_ops_release(
+       struct xfs_mount                *mp,
+       struct xfs_defer_capture        *dfc)
+{
+       xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+       if (dfc->dfc_capture_ip)
+               xfs_irele(dfc->dfc_capture_ip);
+       kmem_free(dfc);
+}
+
+/*
+ * Capture any deferred ops and commit the transaction.  This is the last step
+ * needed to finish a log intent item that we recovered from the log.  If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure.  The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
+ */
+int
+xfs_defer_ops_capture_and_commit(
+       struct xfs_trans                *tp,
+       struct xfs_inode                *capture_ip,
+       struct list_head                *capture_list)
+{
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_defer_capture        *dfc;
+       int                             error;
+
+       ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
+       /* If we don't capture anything, commit transaction and exit. */
+       dfc = xfs_defer_ops_capture(tp, capture_ip);
+       if (!dfc)
+               return xfs_trans_commit(tp);
+
+       /* Commit the transaction and add the capture structure to the list. */
+       error = xfs_trans_commit(tp);
+       if (error) {
+               xfs_defer_ops_release(mp, dfc);
+               return error;
+       }
+
+       list_add_tail(&dfc->dfc_list, capture_list);
+       return 0;
+}
+
+/*
+ * Attach a chain of captured deferred ops to a new transaction and free the
+ * capture structure.  If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
+ */
+void
+xfs_defer_ops_continue(
+       struct xfs_defer_capture        *dfc,
+       struct xfs_trans                *tp,
+       struct xfs_inode                **captured_ipp)
+{
+       ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+       ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+       /* Lock and join the captured inode to the new transaction. */
+       if (dfc->dfc_capture_ip) {
+               xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+               xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+       }
+       *captured_ipp = dfc->dfc_capture_ip;
+
+       /* Move captured dfops chain and state to the transaction. */
+       list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
+       tp->t_flags |= dfc->dfc_tpflags;
  
-       xfs_defer_reset(stp);
+       kmem_free(dfc);
  }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h

index 6b2ca58..05472f7 100644 (file)
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -8,6 +8,7 @@
  
  struct xfs_btree_cur;
  struct xfs_defer_op_type;
+struct xfs_defer_capture;
  
  /*
   * Header for deferred operation list.
@@ -63,4 +64,40 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
  extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
  extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
  
+/*
+ * This structure enables a dfops user to detach the chain of deferred
+ * operations from a transaction so that they can be continued later.
+ */
+struct xfs_defer_capture {
+       /* List of other capture structures. */
+       struct list_head        dfc_list;
+
+       /* Deferred ops state saved from the transaction. */
+       struct list_head        dfc_dfops;
+       unsigned int            dfc_tpflags;
+
+       /* Block reservations for the data and rt devices. */
+       unsigned int            dfc_blkres;
+       unsigned int            dfc_rtxres;
+
+       /* Log reservation saved from the transaction. */
+       unsigned int            dfc_logres;
+
+       /*
+        * An inode reference that must be maintained to complete the deferred
+        * work.
+        */
+       struct xfs_inode        *dfc_capture_ip;
+};
+
+/*
+ * Functions to capture a chain of deferred operations and continue them later.
+ * This doesn't normally happen except log recovery.
+ */
+int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
+               struct xfs_inode *capture_ip, struct list_head *capture_list);
+void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+               struct xfs_inode **captured_ipp);
+void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d);
+
  #endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h

index 5366661..ef5eaf3 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -17,7 +17,7 @@ struct xfs_dinode;
   */
  struct xfs_icdinode {
         uint16_t        di_flushiter;   /* incremented on flush */
-       uint32_t        di_projid;      /* owner's project id */
+       prid_t          di_projid;      /* owner's project id */
         xfs_fsize_t     di_size;        /* number of bytes in file */
         xfs_rfsblock_t  di_nblocks;     /* # of direct & btree blocks used */
         xfs_extlen_t    di_extsize;     /* basic/minimum extent size for file */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c

index 27c3926..340c83f 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2505,12 +2505,15 @@ xfs_rmap_map_extent(
         int                     whichfork,
         struct xfs_bmbt_irec    *PREV)
  {
+       enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
+
         if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
                 return;
  
-       __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
-                       XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino,
-                       whichfork, PREV);
+       if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+               type = XFS_RMAP_MAP_SHARED;
+
+       __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
  }
  
  /* Unmap an extent out of a file. */
@@ -2521,12 +2524,15 @@ xfs_rmap_unmap_extent(
         int                     whichfork,
         struct xfs_bmbt_irec    *PREV)
  {
+       enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
+
         if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
                 return;
  
-       __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
-                       XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino,
-                       whichfork, PREV);
+       if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+               type = XFS_RMAP_UNMAP_SHARED;
+
+       __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
  }
  
  /*
@@ -2543,12 +2549,15 @@ xfs_rmap_convert_extent(
         int                     whichfork,
         struct xfs_bmbt_irec    *PREV)
  {
+       enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
+
         if (!xfs_rmap_update_is_needed(mp, whichfork))
                 return;
  
-       __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
-                       XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino,
-                       whichfork, PREV);
+       if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
+               type = XFS_RMAP_CONVERT_SHARED;
+
+       __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
  }
  
  /* Schedule the creation of an rmap for non-file data. */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c

index 1d9fa8a..6c1aba1 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1018,7 +1018,6 @@ xfs_rtalloc_query_range(
         struct xfs_mount                *mp = tp->t_mountp;
         xfs_rtblock_t                   rtstart;
         xfs_rtblock_t                   rtend;
-       xfs_rtblock_t                   rem;
         int                             is_free;
         int                             error = 0;
  
@@ -1027,13 +1026,12 @@ xfs_rtalloc_query_range(
         if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
             low_rec->ar_startext == high_rec->ar_startext)
                 return 0;
-       if (high_rec->ar_startext > mp->m_sb.sb_rextents)
-               high_rec->ar_startext = mp->m_sb.sb_rextents;
+       high_rec->ar_startext = min(high_rec->ar_startext,
+                       mp->m_sb.sb_rextents - 1);
  
         /* Iterate the bitmap, looking for discrepancies. */
         rtstart = low_rec->ar_startext;
-       rem = high_rec->ar_startext - rtstart;
-       while (rem) {
+       while (rtstart <= high_rec->ar_startext) {
                 /* Is the first block free? */
                 error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
                                 &is_free);
@@ -1042,7 +1040,7 @@ xfs_rtalloc_query_range(
  
                 /* How long does the extent go for? */
                 error = xfs_rtfind_forw(mp, tp, rtstart,
-                               high_rec->ar_startext - 1, &rtend);
+                               high_rec->ar_startext, &rtend);
                 if (error)
                         break;
  
@@ -1055,7 +1053,6 @@ xfs_rtalloc_query_range(
                                 break;
                 }
  
-               rem -= rtend - rtstart + 1;
                 rtstart = rtend + 1;
         }
  
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c

index e56786f..653f328 100644 (file)
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -441,6 +441,20 @@ xchk_da_btree_block(
                 goto out_freebp;
         }
  
+       /*
+        * If we've been handed a block that is below the dabtree root, does
+        * its hashval match what the parent block expected to see?
+        */
+       if (level > 0) {
+               struct xfs_da_node_entry        *key;
+
+               key = xchk_da_btree_node_entry(ds, level - 1);
+               if (be32_to_cpu(key->hashval) != blk->hashval) {
+                       xchk_da_set_corrupt(ds, level);
+                       goto out_freebp;
+               }
+       }
+
  out:
         return error;
  out_freebp:
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c

index ec36913..9e16a4d 100644 (file)
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -24,6 +24,7 @@
  #include "xfs_error.h"
  #include "xfs_log_priv.h"
  #include "xfs_log_recover.h"
+#include "xfs_quota.h"
  
  kmem_zone_t    *xfs_bui_zone;
  kmem_zone_t    *xfs_bud_zone;
@@ -423,30 +424,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
  STATIC int
  xfs_bui_item_recover(
         struct xfs_log_item             *lip,
-       struct xfs_trans                *parent_tp)
+       struct list_head                *capture_list)
  {
         struct xfs_bmbt_irec            irec;
         struct xfs_bui_log_item         *buip = BUI_ITEM(lip);
         struct xfs_trans                *tp;
         struct xfs_inode                *ip = NULL;
-       struct xfs_mount                *mp = parent_tp->t_mountp;
+       struct xfs_mount                *mp = lip->li_mountp;
         struct xfs_map_extent           *bmap;
         struct xfs_bud_log_item         *budp;
         xfs_fsblock_t                   startblock_fsb;
         xfs_fsblock_t                   inode_fsb;
         xfs_filblks_t                   count;
         xfs_exntst_t                    state;
-       enum xfs_bmap_intent_type       type;
-       bool                            op_ok;
         unsigned int                    bui_type;
         int                             whichfork;
         int                             error = 0;
  
         /* Only one mapping operation per BUI... */
-       if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-               xfs_bui_release(buip);
+       if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
                 return -EFSCORRUPTED;
-       }
  
         /*
          * First check the validity of the extent described by the
@@ -457,76 +454,58 @@ xfs_bui_item_recover(
                            XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
         inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
                         XFS_INO_TO_FSB(mp, bmap->me_owner)));
-       switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+       state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+                       XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+       whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+                       XFS_ATTR_FORK : XFS_DATA_FORK;
+       bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+       switch (bui_type) {
         case XFS_BMAP_MAP:
         case XFS_BMAP_UNMAP:
-               op_ok = true;
                 break;
         default:
-               op_ok = false;
-               break;
+               return -EFSCORRUPTED;
         }
-       if (!op_ok || startblock_fsb == 0 ||
+       if (startblock_fsb == 0 ||
             bmap->me_len == 0 ||
             inode_fsb == 0 ||
             startblock_fsb >= mp->m_sb.sb_dblocks ||
             bmap->me_len >= mp->m_sb.sb_agblocks ||
             inode_fsb >= mp->m_sb.sb_dblocks ||
-           (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
-               /*
-                * This will pull the BUI from the AIL and
-                * free the memory associated with it.
-                */
-               xfs_bui_release(buip);
+           (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
                 return -EFSCORRUPTED;
-       }
  
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
-                       XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+       /* Grab the inode. */
+       error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
         if (error)
                 return error;
-       /*
-        * Recovery stashes all deferred ops during intent processing and
-        * finishes them on completion. Transfer current dfops state to this
-        * transaction and transfer the result back before we return.
-        */
-       xfs_defer_move(tp, parent_tp);
-       budp = xfs_trans_get_bud(tp, buip);
  
-       /* Grab the inode. */
-       error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
+       error = xfs_qm_dqattach(ip);
         if (error)
-               goto err_inode;
+               goto err_rele;
  
         if (VFS_I(ip)->i_nlink == 0)
                 xfs_iflags_set(ip, XFS_IRECOVERY);
  
-       /* Process deferred bmap item. */
-       state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
-                       XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
-       whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
-                       XFS_ATTR_FORK : XFS_DATA_FORK;
-       bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
-       switch (bui_type) {
-       case XFS_BMAP_MAP:
-       case XFS_BMAP_UNMAP:
-               type = bui_type;
-               break;
-       default:
-               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
-               error = -EFSCORRUPTED;
-               goto err_inode;
-       }
+       /* Allocate transaction and do the work. */
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+       if (error)
+               goto err_rele;
+
+       budp = xfs_trans_get_bud(tp, buip);
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, 0);
  
         count = bmap->me_len;
-       error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
-                       bmap->me_startoff, bmap->me_startblock, &count, state);
+       error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+                       whichfork, bmap->me_startoff, bmap->me_startblock,
+                       &count, state);
         if (error)
-               goto err_inode;
+               goto err_cancel;
  
         if (count > 0) {
-               ASSERT(type == XFS_BMAP_UNMAP);
+               ASSERT(bui_type == XFS_BMAP_UNMAP);
                 irec.br_startblock = bmap->me_startblock;
                 irec.br_blockcount = count;
                 irec.br_startoff = bmap->me_startoff;
@@ -534,20 +513,24 @@ xfs_bui_item_recover(
                 xfs_bmap_unmap_extent(tp, ip, &irec);
         }
  
-       xfs_defer_move(parent_tp, tp);
-       error = xfs_trans_commit(tp);
+       /*
+        * Commit transaction, which frees the transaction and saves the inode
+        * for later replay activities.
+        */
+       error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list);
+       if (error)
+               goto err_unlock;
+
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
         xfs_irele(ip);
+       return 0;
  
-       return error;
-
-err_inode:
-       xfs_defer_move(parent_tp, tp);
+err_cancel:
         xfs_trans_cancel(tp);
-       if (ip) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               xfs_irele(ip);
-       }
+err_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
+       xfs_irele(ip);
         return error;
  }
  
@@ -559,6 +542,32 @@ xfs_bui_item_match(
         return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
  }
  
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_bui_item_relog(
+       struct xfs_log_item             *intent,
+       struct xfs_trans                *tp)
+{
+       struct xfs_bud_log_item         *budp;
+       struct xfs_bui_log_item         *buip;
+       struct xfs_map_extent           *extp;
+       unsigned int                    count;
+
+       count = BUI_ITEM(intent)->bui_format.bui_nextents;
+       extp = BUI_ITEM(intent)->bui_format.bui_extents;
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       budp = xfs_trans_get_bud(tp, BUI_ITEM(intent));
+       set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+       buip = xfs_bui_init(tp->t_mountp);
+       memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp));
+       atomic_set(&buip->bui_next_extent, count);
+       xfs_trans_add_item(tp, &buip->bui_item);
+       set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+       return &buip->bui_item;
+}
+
  static const struct xfs_item_ops xfs_bui_item_ops = {
         .iop_size       = xfs_bui_item_size,
         .iop_format     = xfs_bui_item_format,
@@ -566,6 +575,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
         .iop_release    = xfs_bui_item_release,
         .iop_recover    = xfs_bui_item_recover,
         .iop_match      = xfs_bui_item_match,
+       .iop_relog      = xfs_bui_item_relog,
  };
  
  /*
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c

index 24c7a8d..d44e8b4 100644 (file)
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -719,6 +719,8 @@ xlog_recover_get_buf_lsn(
         case XFS_ABTC_MAGIC:
         case XFS_RMAP_CRC_MAGIC:
         case XFS_REFC_CRC_MAGIC:
+       case XFS_FIBT_CRC_MAGIC:
+       case XFS_FIBT_MAGIC:
         case XFS_IBT_CRC_MAGIC:
         case XFS_IBT_MAGIC: {
                 struct xfs_btree_block *btb = blk;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index 3072814..1d95ed3 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -831,8 +831,8 @@ xfs_qm_dqget_checks(
  }
  
  /*
- * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked
- * dquot, doing an allocation (if requested) as needed.
+ * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a
+ * locked dquot, doing an allocation (if requested) as needed.
   */
  int
  xfs_qm_dqget(
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index 6cb8cd1..6c11bfc 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -585,10 +585,10 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
  STATIC int
  xfs_efi_item_recover(
         struct xfs_log_item             *lip,
-       struct xfs_trans                *parent_tp)
+       struct list_head                *capture_list)
  {
         struct xfs_efi_log_item         *efip = EFI_ITEM(lip);
-       struct xfs_mount                *mp = parent_tp->t_mountp;
+       struct xfs_mount                *mp = lip->li_mountp;
         struct xfs_efd_log_item         *efdp;
         struct xfs_trans                *tp;
         struct xfs_extent               *extp;
@@ -608,14 +608,8 @@ xfs_efi_item_recover(
                 if (startblock_fsb == 0 ||
                     extp->ext_len == 0 ||
                     startblock_fsb >= mp->m_sb.sb_dblocks ||
-                   extp->ext_len >= mp->m_sb.sb_agblocks) {
-                       /*
-                        * This will pull the EFI from the AIL and
-                        * free the memory associated with it.
-                        */
-                       xfs_efi_release(efip);
+                   extp->ext_len >= mp->m_sb.sb_agblocks)
                         return -EFSCORRUPTED;
-               }
         }
  
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -633,8 +627,7 @@ xfs_efi_item_recover(
  
         }
  
-       error = xfs_trans_commit(tp);
-       return error;
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
  
  abort_error:
         xfs_trans_cancel(tp);
@@ -649,6 +642,34 @@ xfs_efi_item_match(
         return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
  }
  
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_efi_item_relog(
+       struct xfs_log_item             *intent,
+       struct xfs_trans                *tp)
+{
+       struct xfs_efd_log_item         *efdp;
+       struct xfs_efi_log_item         *efip;
+       struct xfs_extent               *extp;
+       unsigned int                    count;
+
+       count = EFI_ITEM(intent)->efi_format.efi_nextents;
+       extp = EFI_ITEM(intent)->efi_format.efi_extents;
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       efdp = xfs_trans_get_efd(tp, EFI_ITEM(intent), count);
+       efdp->efd_next_extent = count;
+       memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
+       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+       efip = xfs_efi_init(tp->t_mountp, count);
+       memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
+       atomic_set(&efip->efi_next_extent, count);
+       xfs_trans_add_item(tp, &efip->efi_item);
+       set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
+       return &efip->efi_item;
+}
+
  static const struct xfs_item_ops xfs_efi_item_ops = {
         .iop_size       = xfs_efi_item_size,
         .iop_format     = xfs_efi_item_format,
@@ -656,6 +677,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
         .iop_release    = xfs_efi_item_release,
         .iop_recover    = xfs_efi_item_recover,
         .iop_match      = xfs_efi_item_match,
+       .iop_relog      = xfs_efi_item_relog,
  };
  
  /*
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index 1a88025..db23e45 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -33,39 +33,7 @@ enum xfs_fstrm_alloc {
  /*
   * Allocation group filestream associations are tracked with per-ag atomic
   * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it. The mount
- * point's m_peraglock is used to protect these counters from per-ag array
- * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
- * about to reallocate the array, it calls xfs_filestream_flush() with the
- * m_peraglock held in write mode.
- *
- * Since xfs_mru_cache_flush() guarantees that all the free functions for all
- * the cache elements have finished executing before it returns, it's safe for
- * the free functions to use the atomic counters without m_peraglock protection.
- * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
- * whether it was called with the m_peraglock held in read mode, write mode or
- * not held at all.  The race condition this addresses is the following:
- *
- *  - The work queue scheduler fires and pulls a filestream directory cache
- *    element off the LRU end of the cache for deletion, then gets pre-empted.
- *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
- *    remaining items from the cache and reallocates the mount point's per-ag
- *    array, resetting all the counters to zero.
- *  - The work queue thread resumes and calls the free function for the element
- *    it started cleaning up earlier.  In the process it decrements the
- *    filestreams counter for an AG that now has no references.
- *
- * With a shrinkfs feature, the above scenario could panic the system.
- *
- * All other uses of the following macros should be protected by either the
- * m_peraglock held in read mode, or the cache's internal locking exposed by the
- * interval between a call to xfs_mru_cache_lookup() and a call to
- * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
- * when new elements are added to the cache.
- *
- * Combined, these locking rules ensure that no associations will ever exist in
- * the cache that reference per-ag array elements that have since been
- * reallocated.
+ * particular AG already has active filestreams associated with it.
   */
  int
  xfs_filestream_peek_ag(
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c

index 4eebcec..9ce5e7d 100644 (file)
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -26,7 +26,7 @@
  #include "xfs_rtalloc.h"
  
  /* Convert an xfs_fsmap to an fsmap. */
-void
+static void
  xfs_fsmap_from_internal(
         struct fsmap            *dest,
         struct xfs_fsmap        *src)
@@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap(
  /* getfsmap query state */
  struct xfs_getfsmap_info {
         struct xfs_fsmap_head   *head;
-       xfs_fsmap_format_t      formatter;      /* formatting fn */
-       void                    *format_arg;    /* format buffer */
+       struct fsmap            *fsmap_recs;    /* mapping records */
         struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
         xfs_daddr_t             next_daddr;     /* next daddr we expect */
         u64                     missing_owner;  /* owner of holes */
@@ -224,6 +223,20 @@ xfs_getfsmap_is_shared(
         return 0;
  }
  
+static inline void
+xfs_getfsmap_format(
+       struct xfs_mount                *mp,
+       struct xfs_fsmap                *xfm,
+       struct xfs_getfsmap_info        *info)
+{
+       struct fsmap                    *rec;
+
+       trace_xfs_getfsmap_mapping(mp, xfm);
+
+       rec = &info->fsmap_recs[info->head->fmh_entries++];
+       xfs_fsmap_from_internal(rec, xfm);
+}
+
  /*
   * Format a reverse mapping for getfsmap, having translated rm_startblock
   * into the appropriate daddr units.
@@ -256,6 +269,9 @@ xfs_getfsmap_helper(
  
         /* Are we just counting mappings? */
         if (info->head->fmh_count == 0) {
+               if (info->head->fmh_entries == UINT_MAX)
+                       return -ECANCELED;
+
                 if (rec_daddr > info->next_daddr)
                         info->head->fmh_entries++;
  
@@ -285,10 +301,7 @@ xfs_getfsmap_helper(
                 fmr.fmr_offset = 0;
                 fmr.fmr_length = rec_daddr - info->next_daddr;
                 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
-               error = info->formatter(&fmr, info->format_arg);
-               if (error)
-                       return error;
-               info->head->fmh_entries++;
+               xfs_getfsmap_format(mp, &fmr, info);
         }
  
         if (info->last)
@@ -320,11 +333,8 @@ xfs_getfsmap_helper(
                 if (shared)
                         fmr.fmr_flags |= FMR_OF_SHARED;
         }
-       error = info->formatter(&fmr, info->format_arg);
-       if (error)
-               return error;
-       info->head->fmh_entries++;
  
+       xfs_getfsmap_format(mp, &fmr, info);
  out:
         rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
         if (info->next_daddr < rec_daddr)
@@ -792,11 +802,11 @@ xfs_getfsmap_check_keys(
  #endif /* CONFIG_XFS_RT */
  
  /*
- * Get filesystem's extents as described in head, and format for
- * output.  Calls formatter to fill the user's buffer until all
- * extents are mapped, until the passed-in head->fmh_count slots have
- * been filled, or until the formatter short-circuits the loop, if it
- * is tracking filled-in extents on its own.
+ * Get filesystem's extents as described in head, and format for output. Fills
+ * in the supplied records array until there are no more reverse mappings to
+ * return or head.fmh_entries == head.fmh_count.  In the second case, this
+ * function returns -ECANCELED to indicate that more records would have been
+ * returned.
   *
   * Key to Confusion
   * ----------------
@@ -816,8 +826,7 @@ int
  xfs_getfsmap(
         struct xfs_mount                *mp,
         struct xfs_fsmap_head           *head,
-       xfs_fsmap_format_t              formatter,
-       void                            *arg)
+       struct fsmap                    *fsmap_recs)
  {
         struct xfs_trans                *tp = NULL;
         struct xfs_fsmap                dkeys[2];       /* per-dev keys */
@@ -892,8 +901,7 @@ xfs_getfsmap(
  
         info.next_daddr = head->fmh_keys[0].fmr_physical +
                           head->fmh_keys[0].fmr_length;
-       info.formatter = formatter;
-       info.format_arg = arg;
+       info.fsmap_recs = fsmap_recs;
         info.head = head;
  
         /*
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h

index c6c5773..a077578 100644 (file)
--- a/fs/xfs/xfs_fsmap.h
+++ b/fs/xfs/xfs_fsmap.h
@@ -27,13 +27,9 @@ struct xfs_fsmap_head {
         struct xfs_fsmap fmh_keys[2];   /* low and high keys */
  };
  
-void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
  void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
  
-/* fsmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
-
  int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
-               xfs_fsmap_format_t formatter, void *arg);
+               struct fsmap *out_recs);
  
  #endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 4962497..2bfbcf2 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -698,6 +698,68 @@ out_unlock:
         return error;
  }
  
+/* Propagate di_flags from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags(
+       struct xfs_inode        *ip,
+       const struct xfs_inode  *pip)
+{
+       unsigned int            di_flags = 0;
+       umode_t                 mode = VFS_I(ip)->i_mode;
+
+       if (S_ISDIR(mode)) {
+               if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
+                       di_flags |= XFS_DIFLAG_RTINHERIT;
+               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                       ip->i_d.di_extsize = pip->i_d.di_extsize;
+               }
+               if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+                       di_flags |= XFS_DIFLAG_PROJINHERIT;
+       } else if (S_ISREG(mode)) {
+               if ((pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) &&
+                   xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+                       di_flags |= XFS_DIFLAG_REALTIME;
+               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                       di_flags |= XFS_DIFLAG_EXTSIZE;
+                       ip->i_d.di_extsize = pip->i_d.di_extsize;
+               }
+       }
+       if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
+           xfs_inherit_noatime)
+               di_flags |= XFS_DIFLAG_NOATIME;
+       if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
+           xfs_inherit_nodump)
+               di_flags |= XFS_DIFLAG_NODUMP;
+       if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
+           xfs_inherit_sync)
+               di_flags |= XFS_DIFLAG_SYNC;
+       if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
+           xfs_inherit_nosymlinks)
+               di_flags |= XFS_DIFLAG_NOSYMLINKS;
+       if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
+           xfs_inherit_nodefrag)
+               di_flags |= XFS_DIFLAG_NODEFRAG;
+       if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+               di_flags |= XFS_DIFLAG_FILESTREAM;
+
+       ip->i_d.di_flags |= di_flags;
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static void
+xfs_inode_inherit_flags2(
+       struct xfs_inode        *ip,
+       const struct xfs_inode  *pip)
+{
+       if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+               ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+               ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
+       }
+       if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+               ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
+}
+
  /*
   * Allocate an inode on disk and return a copy of its in-core version.
   * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
@@ -841,54 +903,10 @@ xfs_ialloc(
                 break;
         case S_IFREG:
         case S_IFDIR:
-               if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-                       uint            di_flags = 0;
-
-                       if (S_ISDIR(mode)) {
-                               if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
-                                       di_flags |= XFS_DIFLAG_RTINHERIT;
-                               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
-                                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-                                       ip->i_d.di_extsize = pip->i_d.di_extsize;
-                               }
-                               if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-                                       di_flags |= XFS_DIFLAG_PROJINHERIT;
-                       } else if (S_ISREG(mode)) {
-                               if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
-                                       di_flags |= XFS_DIFLAG_REALTIME;
-                               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
-                                       di_flags |= XFS_DIFLAG_EXTSIZE;
-                                       ip->i_d.di_extsize = pip->i_d.di_extsize;
-                               }
-                       }
-                       if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
-                           xfs_inherit_noatime)
-                               di_flags |= XFS_DIFLAG_NOATIME;
-                       if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
-                           xfs_inherit_nodump)
-                               di_flags |= XFS_DIFLAG_NODUMP;
-                       if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
-                           xfs_inherit_sync)
-                               di_flags |= XFS_DIFLAG_SYNC;
-                       if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
-                           xfs_inherit_nosymlinks)
-                               di_flags |= XFS_DIFLAG_NOSYMLINKS;
-                       if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
-                           xfs_inherit_nodefrag)
-                               di_flags |= XFS_DIFLAG_NODEFRAG;
-                       if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
-                               di_flags |= XFS_DIFLAG_FILESTREAM;
-
-                       ip->i_d.di_flags |= di_flags;
-               }
-               if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) {
-                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
-                               ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
-                               ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
-                       }
-                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
-                               ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX;
-               }
+               if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY))
+                       xfs_inode_inherit_flags(ip, pip);
+               if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY))
+                       xfs_inode_inherit_flags2(ip, pip);
                 /* FALLTHROUGH */
         case S_IFLNK:
                 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -1516,17 +1534,10 @@ xfs_itruncate_extents_flags(
                 if (error)
                         goto out;
  
-               /*
-                * Duplicate the transaction that has the permanent
-                * reservation and commit the old transaction.
-                */
+               /* free the just unmapped extents */
                 error = xfs_defer_finish(&tp);
                 if (error)
                         goto out;
-
-               error = xfs_trans_roll_inode(&tp, ip);
-               if (error)
-                       goto out;
         }
  
         if (whichfork == XFS_DATA_FORK) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index bca7659..3fbd98f 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1716,39 +1716,17 @@ out_free_buf:
         return error;
  }
  
-struct getfsmap_info {
-       struct xfs_mount        *mp;
-       struct fsmap_head __user *data;
-       unsigned int            idx;
-       __u32                   last_flags;
-};
-
-STATIC int
-xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
-{
-       struct getfsmap_info    *info = priv;
-       struct fsmap            fm;
-
-       trace_xfs_getfsmap_mapping(info->mp, xfm);
-
-       info->last_flags = xfm->fmr_flags;
-       xfs_fsmap_from_internal(&fm, xfm);
-       if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
-                       sizeof(struct fsmap)))
-               return -EFAULT;
-
-       return 0;
-}
-
  STATIC int
  xfs_ioc_getfsmap(
         struct xfs_inode        *ip,
         struct fsmap_head       __user *arg)
  {
-       struct getfsmap_info    info = { NULL };
         struct xfs_fsmap_head   xhead = {0};
         struct fsmap_head       head;
-       bool                    aborted = false;
+       struct fsmap            *recs;
+       unsigned int            count;
+       __u32                   last_flags = 0;
+       bool                    done = false;
         int                     error;
  
         if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1760,38 +1738,112 @@ xfs_ioc_getfsmap(
                        sizeof(head.fmh_keys[1].fmr_reserved)))
                 return -EINVAL;
  
+       /*
+        * Use an internal memory buffer so that we don't have to copy fsmap
+        * data to userspace while holding locks.  Start by trying to allocate
+        * up to 128k for the buffer, but fall back to a single page if needed.
+        */
+       count = min_t(unsigned int, head.fmh_count,
+                       131072 / sizeof(struct fsmap));
+       recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+       if (!recs) {
+               count = min_t(unsigned int, head.fmh_count,
+                               PAGE_SIZE / sizeof(struct fsmap));
+               recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
+               if (!recs)
+                       return -ENOMEM;
+       }
+
         xhead.fmh_iflags = head.fmh_iflags;
-       xhead.fmh_count = head.fmh_count;
         xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
         xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
  
         trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
         trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
  
-       info.mp = ip->i_mount;
-       info.data = arg;
-       error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
-       if (error == -ECANCELED) {
-               error = 0;
-               aborted = true;
-       } else if (error)
-               return error;
+       head.fmh_entries = 0;
+       do {
+               struct fsmap __user     *user_recs;
+               struct fsmap            *last_rec;
+
+               user_recs = &arg->fmh_recs[head.fmh_entries];
+               xhead.fmh_entries = 0;
+               xhead.fmh_count = min_t(unsigned int, count,
+                                       head.fmh_count - head.fmh_entries);
+
+               /* Run query, record how many entries we got. */
+               error = xfs_getfsmap(ip->i_mount, &xhead, recs);
+               switch (error) {
+               case 0:
+                       /*
+                        * There are no more records in the result set.  Copy
+                        * whatever we got to userspace and break out.
+                        */
+                       done = true;
+                       break;
+               case -ECANCELED:
+                       /*
+                        * The internal memory buffer is full.  Copy whatever
+                        * records we got to userspace and go again if we have
+                        * not yet filled the userspace buffer.
+                        */
+                       error = 0;
+                       break;
+               default:
+                       goto out_free;
+               }
+               head.fmh_entries += xhead.fmh_entries;
+               head.fmh_oflags = xhead.fmh_oflags;
  
-       /* If we didn't abort, set the "last" flag in the last fmx */
-       if (!aborted && info.idx) {
-               info.last_flags |= FMR_OF_LAST;
-               if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
-                               &info.last_flags, sizeof(info.last_flags)))
-                       return -EFAULT;
+               /*
+                * If the caller wanted a record count or there aren't any
+                * new records to return, we're done.
+                */
+               if (head.fmh_count == 0 || xhead.fmh_entries == 0)
+                       break;
+
+               /* Copy all the records we got out to userspace. */
+               if (copy_to_user(user_recs, recs,
+                                xhead.fmh_entries * sizeof(struct fsmap))) {
+                       error = -EFAULT;
+                       goto out_free;
+               }
+
+               /* Remember the last record flags we copied to userspace. */
+               last_rec = &recs[xhead.fmh_entries - 1];
+               last_flags = last_rec->fmr_flags;
+
+               /* Set up the low key for the next iteration. */
+               xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
+               trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
+       } while (!done && head.fmh_entries < head.fmh_count);
+
+       /*
+        * If there are no more records in the query result set and we're not
+        * in counting mode, mark the last record returned with the LAST flag.
+        */
+       if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
+               struct fsmap __user     *user_rec;
+
+               last_flags |= FMR_OF_LAST;
+               user_rec = &arg->fmh_recs[head.fmh_entries - 1];
+
+               if (copy_to_user(&user_rec->fmr_flags, &last_flags,
+                                       sizeof(last_flags))) {
+                       error = -EFAULT;
+                       goto out_free;
+               }
         }
  
         /* copy back header */
-       head.fmh_entries = xhead.fmh_entries;
-       head.fmh_oflags = xhead.fmh_oflags;
-       if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
-               return -EFAULT;
+       if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
+               error = -EFAULT;
+               goto out_free;
+       }
  
-       return 0;
+out_free:
+       kmem_free(recs);
+       return error;
  }
  
  STATIC int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 80a13c8..5e16545 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -237,7 +237,7 @@ xfs_vn_create(
         umode_t         mode,
         bool            flags)
  {
-       return xfs_vn_mknod(dir, dentry, mode, 0);
+       return xfs_generic_create(dir, dentry, mode, 0, false);
  }
  
  STATIC int
@@ -246,7 +246,7 @@ xfs_vn_mkdir(
         struct dentry   *dentry,
         umode_t         mode)
  {
-       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+       return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
  }
  
  STATIC struct dentry *
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

index ab737fe..ad10097 100644 (file)
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -123,7 +123,6 @@ typedef __u32                       xfs_nlink_t;
  #define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
  #define EFSBADCRC      EBADMSG         /* Bad CRC detected */
  
-#define SYNCHRONIZE()  barrier()
  #define __return_address __builtin_return_address(0)
  
  /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index ad0c69e..fa2d05e 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1475,14 +1475,14 @@ xlog_commit_record(
  }
  
  /*
- * Push on the buffer cache code if we ever use more than 75% of the on-disk
- * log space.  This code pushes on the lsn which would supposedly free up
- * the 25% which we want to leave free.  We may need to adopt a policy which
- * pushes on an lsn which is further along in the log once we reach the high
- * water mark.  In this manner, we would be creating a low water mark.
+ * Compute the LSN that we'd need to push the log tail towards in order to have
+ * (a) enough on-disk log space to log the number of bytes specified, (b) at
+ * least 25% of the log space free, and (c) at least 256 blocks free.  If the
+ * log free space already meets all three thresholds, this function returns
+ * NULLCOMMITLSN.
   */
-STATIC void
-xlog_grant_push_ail(
+xfs_lsn_t
+xlog_grant_push_threshold(
         struct xlog     *log,
         int             need_bytes)
  {
@@ -1508,7 +1508,7 @@ xlog_grant_push_ail(
         free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
         free_threshold = max(free_threshold, 256);
         if (free_blocks >= free_threshold)
-               return;
+               return NULLCOMMITLSN;
  
         xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
                                                 &threshold_block);
@@ -1528,13 +1528,33 @@ xlog_grant_push_ail(
         if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
                 threshold_lsn = last_sync_lsn;
  
+       return threshold_lsn;
+}
+
+/*
+ * Push the tail of the log if we need to do so to maintain the free log space
+ * thresholds set out by xlog_grant_push_threshold.  We may need to adopt a
+ * policy which pushes on an lsn which is further along in the log once we
+ * reach the high water mark.  In this manner, we would be creating a low water
+ * mark.
+ */
+STATIC void
+xlog_grant_push_ail(
+       struct xlog     *log,
+       int             need_bytes)
+{
+       xfs_lsn_t       threshold_lsn;
+
+       threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
+       if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
+               return;
+
         /*
          * Get the transaction layer to kick the dirty buffers out to
          * disk asynchronously. No point in trying to do this if
          * the filesystem is shutting down.
          */
-       if (!XLOG_FORCED_SHUTDOWN(log))
-               xfs_ail_push(log->l_ailp, threshold_lsn);
+       xfs_ail_push(log->l_ailp, threshold_lsn);
  }
  
  /*
@@ -1604,9 +1624,7 @@ xlog_cksum(
                 int             i;
                 int             xheads;
  
-               xheads = size / XLOG_HEADER_CYCLE_SIZE;
-               if (size % XLOG_HEADER_CYCLE_SIZE)
-                       xheads++;
+               xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
  
                 for (i = 1; i < xheads; i++) {
                         crc = crc32c(crc, &xhdr[i].hic_xheader,
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index 1412d69..58c3fcb 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -141,4 +141,6 @@ void        xfs_log_quiesce(struct xfs_mount *mp);
  bool   xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
  bool   xfs_log_in_recovery(struct xfs_mount *);
  
+xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
+
  #endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index a17d788..a8289ad 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -371,6 +371,19 @@ out:
         return error;
  }
  
+static inline int
+xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh)
+{
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               int     h_size = be32_to_cpu(rh->h_size);
+
+               if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) &&
+                   h_size > XLOG_HEADER_CYCLE_SIZE)
+                       return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
+       }
+       return 1;
+}
+
  /*
   * Potentially backup over partial log record write.
   *
@@ -463,15 +476,7 @@ xlog_find_verify_log_record(
          * reset last_blk.  Only when last_blk points in the middle of a log
          * record do we update last_blk.
          */
-       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               uint    h_size = be32_to_cpu(head->h_size);
-
-               xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
-               if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                       xhdrs++;
-       } else {
-               xhdrs = 1;
-       }
+       xhdrs = xlog_logrec_hblks(log, head);
  
         if (*last_blk - i + extra_bblks !=
             BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
@@ -1158,22 +1163,7 @@ xlog_check_unmount_rec(
          * below. We won't want to clear the unmount record if there is one, so
          * we pass the lsn of the unmount record rather than the block after it.
          */
-       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               int     h_size = be32_to_cpu(rhead->h_size);
-               int     h_version = be32_to_cpu(rhead->h_version);
-
-               if ((h_version & XLOG_VERSION_2) &&
-                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                               hblks++;
-               } else {
-                       hblks = 1;
-               }
-       } else {
-               hblks = 1;
-       }
-
+       hblks = xlog_logrec_hblks(log, rhead);
         after_umount_blk = xlog_wrap_logbno(log,
                         rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)));
  
@@ -2444,44 +2434,66 @@ xlog_recover_process_data(
  /* Take all the collected deferred ops and finish them in order. */
  static int
  xlog_finish_defer_ops(
-       struct xfs_trans        *parent_tp)
+       struct xfs_mount        *mp,
+       struct list_head        *capture_list)
  {
-       struct xfs_mount        *mp = parent_tp->t_mountp;
+       struct xfs_defer_capture *dfc, *next;
         struct xfs_trans        *tp;
-       int64_t                 freeblks;
-       uint                    resblks;
-       int                     error;
+       struct xfs_inode        *ip;
+       int                     error = 0;
  
-       /*
-        * We're finishing the defer_ops that accumulated as a result of
-        * recovering unfinished intent items during log recovery.  We
-        * reserve an itruncate transaction because it is the largest
-        * permanent transaction type.  Since we're the only user of the fs
-        * right now, take 93% (15/16) of the available free blocks.  Use
-        * weird math to avoid a 64-bit division.
-        */
-       freeblks = percpu_counter_sum(&mp->m_fdblocks);
-       if (freeblks <= 0)
-               return -ENOSPC;
-       resblks = min_t(int64_t, UINT_MAX, freeblks);
-       resblks = (resblks * 15) >> 4;
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
-                       0, XFS_TRANS_RESERVE, &tp);
-       if (error)
-               return error;
-       /* transfer all collected dfops to this transaction */
-       xfs_defer_move(tp, parent_tp);
+       list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+               struct xfs_trans_res    resv;
+
+               /*
+                * Create a new transaction reservation from the captured
+                * information.  Set logcount to 1 to force the new transaction
+                * to regrant every roll so that we can make forward progress
+                * in recovery no matter how full the log might be.
+                */
+               resv.tr_logres = dfc->dfc_logres;
+               resv.tr_logcount = 1;
+               resv.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+
+               error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
+                               dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
+               if (error)
+                       return error;
  
-       return xfs_trans_commit(tp);
+               /*
+                * Transfer to this new transaction all the dfops we captured
+                * from recovering a single intent item.
+                */
+               list_del_init(&dfc->dfc_list);
+               xfs_defer_ops_continue(dfc, tp, &ip);
+
+               error = xfs_trans_commit(tp);
+               if (ip) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       xfs_irele(ip);
+               }
+               if (error)
+                       return error;
+       }
+
+       ASSERT(list_empty(capture_list));
+       return 0;
  }
  
-/* Is this log item a deferred action intent? */
-static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+/* Release all the captured defer ops and capture structures in this list. */
+static void
+xlog_abort_defer_ops(
+       struct xfs_mount                *mp,
+       struct list_head                *capture_list)
  {
-       return lip->li_ops->iop_recover != NULL &&
-              lip->li_ops->iop_match != NULL;
-}
+       struct xfs_defer_capture        *dfc;
+       struct xfs_defer_capture        *next;
  
+       list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+               list_del_init(&dfc->dfc_list);
+               xfs_defer_ops_release(mp, dfc);
+       }
+}
  /*
   * When this is called, all of the log intent items which did not have
   * corresponding log done items should be in the AIL.  What we do now
@@ -2502,35 +2514,23 @@ STATIC int
  xlog_recover_process_intents(
         struct xlog             *log)
  {
-       struct xfs_trans        *parent_tp;
+       LIST_HEAD(capture_list);
         struct xfs_ail_cursor   cur;
         struct xfs_log_item     *lip;
         struct xfs_ail          *ailp;
-       int                     error;
+       int                     error = 0;
  #if defined(DEBUG) || defined(XFS_WARN)
         xfs_lsn_t               last_lsn;
  #endif
  
-       /*
-        * The intent recovery handlers commit transactions to complete recovery
-        * for individual intents, but any new deferred operations that are
-        * queued during that process are held off until the very end. The
-        * purpose of this transaction is to serve as a container for deferred
-        * operations. Each intent recovery handler must transfer dfops here
-        * before its local transaction commits, and we'll finish the entire
-        * list below.
-        */
-       error = xfs_trans_alloc_empty(log->l_mp, &parent_tp);
-       if (error)
-               return error;
-
         ailp = log->l_ailp;
         spin_lock(&ailp->ail_lock);
-       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
  #if defined(DEBUG) || defined(XFS_WARN)
         last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
  #endif
-       while (lip != NULL) {
+       for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+            lip != NULL;
+            lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
                 /*
                  * We're done when we see something other than an intent.
                  * There should be no intents left in the AIL now.
@@ -2552,26 +2552,29 @@ xlog_recover_process_intents(
  
                 /*
                  * NOTE: If your intent processing routine can create more
-                * deferred ops, you /must/ attach them to the transaction in
-                * this routine or else those subsequent intents will get
+                * deferred ops, you /must/ attach them to the capture list in
+                * the recover routine or else those subsequent intents will be
                  * replayed in the wrong order!
                  */
-               if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
-                       spin_unlock(&ailp->ail_lock);
-                       error = lip->li_ops->iop_recover(lip, parent_tp);
-                       spin_lock(&ailp->ail_lock);
-               }
+               spin_unlock(&ailp->ail_lock);
+               error = lip->li_ops->iop_recover(lip, &capture_list);
+               spin_lock(&ailp->ail_lock);
                 if (error)
-                       goto out;
-               lip = xfs_trans_ail_cursor_next(ailp, &cur);
+                       break;
         }
-out:
+
         xfs_trans_ail_cursor_done(&cur);
         spin_unlock(&ailp->ail_lock);
-       if (!error)
-               error = xlog_finish_defer_ops(parent_tp);
-       xfs_trans_cancel(parent_tp);
+       if (error)
+               goto err;
+
+       error = xlog_finish_defer_ops(log->l_mp, &capture_list);
+       if (error)
+               goto err;
  
+       return 0;
+err:
+       xlog_abort_defer_ops(log->l_mp, &capture_list);
         return error;
  }
  
@@ -2878,7 +2881,8 @@ STATIC int
  xlog_valid_rec_header(
         struct xlog             *log,
         struct xlog_rec_header  *rhead,
-       xfs_daddr_t             blkno)
+       xfs_daddr_t             blkno,
+       int                     bufsize)
  {
         int                     hlen;
  
@@ -2894,10 +2898,14 @@ xlog_valid_rec_header(
                 return -EFSCORRUPTED;
         }
  
-       /* LR body must have data or it wouldn't have been written */
+       /*
+        * LR body must have data (or it wouldn't have been written)
+        * and h_len must not be greater than LR buffer size.
+        */
         hlen = be32_to_cpu(rhead->h_len);
-       if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX))
+       if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize))
                 return -EFSCORRUPTED;
+
         if (XFS_IS_CORRUPT(log->l_mp,
                            blkno > log->l_logBBsize || blkno > INT_MAX))
                 return -EFSCORRUPTED;
@@ -2958,9 +2966,6 @@ xlog_do_recovery_pass(
                         goto bread_err1;
  
                 rhead = (xlog_rec_header_t *)offset;
-               error = xlog_valid_rec_header(log, rhead, tail_blk);
-               if (error)
-                       goto bread_err1;
  
                 /*
                  * xfsprogs has a bug where record length is based on lsunit but
@@ -2975,30 +2980,22 @@ xlog_do_recovery_pass(
                  */
                 h_size = be32_to_cpu(rhead->h_size);
                 h_len = be32_to_cpu(rhead->h_len);
-               if (h_len > h_size) {
-                       if (h_len <= log->l_mp->m_logbsize &&
-                           be32_to_cpu(rhead->h_num_logops) == 1) {
-                               xfs_warn(log->l_mp,
+               if (h_len > h_size && h_len <= log->l_mp->m_logbsize &&
+                   rhead->h_num_logops == cpu_to_be32(1)) {
+                       xfs_warn(log->l_mp,
                 "invalid iclog size (%d bytes), using lsunit (%d bytes)",
-                                        h_size, log->l_mp->m_logbsize);
-                               h_size = log->l_mp->m_logbsize;
-                       } else {
-                               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW,
-                                               log->l_mp);
-                               error = -EFSCORRUPTED;
-                               goto bread_err1;
-                       }
+                                h_size, log->l_mp->m_logbsize);
+                       h_size = log->l_mp->m_logbsize;
                 }
  
-               if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
-                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                               hblks++;
+               error = xlog_valid_rec_header(log, rhead, tail_blk, h_size);
+               if (error)
+                       goto bread_err1;
+
+               hblks = xlog_logrec_hblks(log, rhead);
+               if (hblks != 1) {
                         kmem_free(hbp);
                         hbp = xlog_alloc_buffer(log, hblks);
-               } else {
-                       hblks = 1;
                 }
         } else {
                 ASSERT(log->l_sectBBsize == 1);
@@ -3070,7 +3067,7 @@ xlog_do_recovery_pass(
                         }
                         rhead = (xlog_rec_header_t *)offset;
                         error = xlog_valid_rec_header(log, rhead,
-                                               split_hblks ? blk_no : 0);
+                                       split_hblks ? blk_no : 0, h_size);
                         if (error)
                                 goto bread_err2;
  
@@ -3151,7 +3148,7 @@ xlog_do_recovery_pass(
                         goto bread_err2;
  
                 rhead = (xlog_rec_header_t *)offset;
-               error = xlog_valid_rec_header(log, rhead, blk_no);
+               error = xlog_valid_rec_header(log, rhead, blk_no, h_size);
                 if (error)
                         goto bread_err2;
  
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index 3f82e0c..b2a9abe 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -249,7 +249,6 @@ xfs_qm_unmount_quotas(
  STATIC int
  xfs_qm_dqattach_one(
         struct xfs_inode        *ip,
-       xfs_dqid_t              id,
         xfs_dqtype_t            type,
         bool                    doalloc,
         struct xfs_dquot        **IO_idqpp)
@@ -330,23 +329,23 @@ xfs_qm_dqattach_locked(
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
         if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
-               error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)),
-                               XFS_DQTYPE_USER, doalloc, &ip->i_udquot);
+               error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
+                               doalloc, &ip->i_udquot);
                 if (error)
                         goto done;
                 ASSERT(ip->i_udquot);
         }
  
         if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
-               error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)),
-                               XFS_DQTYPE_GROUP, doalloc, &ip->i_gdquot);
+               error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
+                               doalloc, &ip->i_gdquot);
                 if (error)
                         goto done;
                 ASSERT(ip->i_gdquot);
         }
  
         if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
-               error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQTYPE_PROJ,
+               error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
                                 doalloc, &ip->i_pdquot);
                 if (error)
                         goto done;
@@ -1663,6 +1662,7 @@ xfs_qm_vop_dqalloc(
         }
  
         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+               ASSERT(O_udqpp);
                 if (!uid_eq(inode->i_uid, uid)) {
                         /*
                          * What we need is the dquot that has this uid, and
@@ -1696,6 +1696,7 @@ xfs_qm_vop_dqalloc(
                 }
         }
         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+               ASSERT(O_gdqpp);
                 if (!gid_eq(inode->i_gid, gid)) {
                         xfs_iunlock(ip, lockflags);
                         error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
@@ -1713,9 +1714,10 @@ xfs_qm_vop_dqalloc(
                 }
         }
         if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+               ASSERT(O_pdqpp);
                 if (ip->i_d.di_projid != prid) {
                         xfs_iunlock(ip, lockflags);
-                       error = xfs_qm_dqget(mp, (xfs_dqid_t)prid,
+                       error = xfs_qm_dqget(mp, prid,
                                         XFS_DQTYPE_PROJ, true, &pq);
                         if (error) {
                                 ASSERT(error != -ENOENT);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c

index ca93b64..7529eb6 100644 (file)
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -424,7 +424,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
  STATIC int
  xfs_cui_item_recover(
         struct xfs_log_item             *lip,
-       struct xfs_trans                *parent_tp)
+       struct list_head                *capture_list)
  {
         struct xfs_bmbt_irec            irec;
         struct xfs_cui_log_item         *cuip = CUI_ITEM(lip);
@@ -432,7 +432,7 @@ xfs_cui_item_recover(
         struct xfs_cud_log_item         *cudp;
         struct xfs_trans                *tp;
         struct xfs_btree_cur            *rcur = NULL;
-       struct xfs_mount                *mp = parent_tp->t_mountp;
+       struct xfs_mount                *mp = lip->li_mountp;
         xfs_fsblock_t                   startblock_fsb;
         xfs_fsblock_t                   new_fsb;
         xfs_extlen_t                    new_len;
@@ -467,14 +467,8 @@ xfs_cui_item_recover(
                     refc->pe_len == 0 ||
                     startblock_fsb >= mp->m_sb.sb_dblocks ||
                     refc->pe_len >= mp->m_sb.sb_agblocks ||
-                   (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
-                       /*
-                        * This will pull the CUI from the AIL and
-                        * free the memory associated with it.
-                        */
-                       xfs_cui_release(cuip);
+                   (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS))
                         return -EFSCORRUPTED;
-               }
         }
  
         /*
@@ -493,12 +487,7 @@ xfs_cui_item_recover(
                         mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
         if (error)
                 return error;
-       /*
-        * Recovery stashes all deferred ops during intent processing and
-        * finishes them on completion. Transfer current dfops state to this
-        * transaction and transfer the result back before we return.
-        */
-       xfs_defer_move(tp, parent_tp);
+
         cudp = xfs_trans_get_cud(tp, cuip);
  
         for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -555,13 +544,10 @@ xfs_cui_item_recover(
         }
  
         xfs_refcount_finish_one_cleanup(tp, rcur, error);
-       xfs_defer_move(parent_tp, tp);
-       error = xfs_trans_commit(tp);
-       return error;
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
  
  abort_error:
         xfs_refcount_finish_one_cleanup(tp, rcur, error);
-       xfs_defer_move(parent_tp, tp);
         xfs_trans_cancel(tp);
         return error;
  }
@@ -574,6 +560,32 @@ xfs_cui_item_match(
         return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
  }
  
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_cui_item_relog(
+       struct xfs_log_item             *intent,
+       struct xfs_trans                *tp)
+{
+       struct xfs_cud_log_item         *cudp;
+       struct xfs_cui_log_item         *cuip;
+       struct xfs_phys_extent          *extp;
+       unsigned int                    count;
+
+       count = CUI_ITEM(intent)->cui_format.cui_nextents;
+       extp = CUI_ITEM(intent)->cui_format.cui_extents;
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent));
+       set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+       cuip = xfs_cui_init(tp->t_mountp, count);
+       memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp));
+       atomic_set(&cuip->cui_next_extent, count);
+       xfs_trans_add_item(tp, &cuip->cui_item);
+       set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
+       return &cuip->cui_item;
+}
+
  static const struct xfs_item_ops xfs_cui_item_ops = {
         .iop_size       = xfs_cui_item_size,
         .iop_format     = xfs_cui_item_format,
@@ -581,6 +593,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
         .iop_release    = xfs_cui_item_release,
         .iop_recover    = xfs_cui_item_recover,
         .iop_match      = xfs_cui_item_match,
+       .iop_relog      = xfs_cui_item_relog,
  };
  
  /*
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c

index dc5b075..7adc996 100644 (file)
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -467,14 +467,14 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
  STATIC int
  xfs_rui_item_recover(
         struct xfs_log_item             *lip,
-       struct xfs_trans                *parent_tp)
+       struct list_head                *capture_list)
  {
         struct xfs_rui_log_item         *ruip = RUI_ITEM(lip);
         struct xfs_map_extent           *rmap;
         struct xfs_rud_log_item         *rudp;
         struct xfs_trans                *tp;
         struct xfs_btree_cur            *rcur = NULL;
-       struct xfs_mount                *mp = parent_tp->t_mountp;
+       struct xfs_mount                *mp = lip->li_mountp;
         xfs_fsblock_t                   startblock_fsb;
         enum xfs_rmap_intent_type       type;
         xfs_exntst_t                    state;
@@ -511,14 +511,8 @@ xfs_rui_item_recover(
                     rmap->me_len == 0 ||
                     startblock_fsb >= mp->m_sb.sb_dblocks ||
                     rmap->me_len >= mp->m_sb.sb_agblocks ||
-                   (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
-                       /*
-                        * This will pull the RUI from the AIL and
-                        * free the memory associated with it.
-                        */
-                       xfs_rui_release(ruip);
+                   (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS))
                         return -EFSCORRUPTED;
-               }
         }
  
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
@@ -573,8 +567,7 @@ xfs_rui_item_recover(
         }
  
         xfs_rmap_finish_one_cleanup(tp, rcur, error);
-       error = xfs_trans_commit(tp);
-       return error;
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
  
  abort_error:
         xfs_rmap_finish_one_cleanup(tp, rcur, error);
@@ -590,6 +583,32 @@ xfs_rui_item_match(
         return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
  }
  
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_rui_item_relog(
+       struct xfs_log_item             *intent,
+       struct xfs_trans                *tp)
+{
+       struct xfs_rud_log_item         *rudp;
+       struct xfs_rui_log_item         *ruip;
+       struct xfs_map_extent           *extp;
+       unsigned int                    count;
+
+       count = RUI_ITEM(intent)->rui_format.rui_nextents;
+       extp = RUI_ITEM(intent)->rui_format.rui_extents;
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent));
+       set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+       ruip = xfs_rui_init(tp->t_mountp, count);
+       memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp));
+       atomic_set(&ruip->rui_next_extent, count);
+       xfs_trans_add_item(tp, &ruip->rui_item);
+       set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+       return &ruip->rui_item;
+}
+
  static const struct xfs_item_ops xfs_rui_item_ops = {
         .iop_size       = xfs_rui_item_size,
         .iop_format     = xfs_rui_item_format,
@@ -597,6 +616,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
         .iop_release    = xfs_rui_item_release,
         .iop_recover    = xfs_rui_item_recover,
         .iop_match      = xfs_rui_item_match,
+       .iop_relog      = xfs_rui_item_relog,
  };
  
  /*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 5b89c12..ede1baf 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -18,7 +18,7 @@
  #include "xfs_trans_space.h"
  #include "xfs_icache.h"
  #include "xfs_rtalloc.h"
-
+#include "xfs_sb.h"
  
  /*
   * Read and return the summary information for a given extent size,
@@ -778,8 +778,14 @@ xfs_growfs_rt_alloc(
         struct xfs_bmbt_irec    map;            /* block map output */
         int                     nmap;           /* number of block maps */
         int                     resblks;        /* space reservation */
+       enum xfs_blft           buf_type;
         struct xfs_trans        *tp;
  
+       if (ip == mp->m_rsumip)
+               buf_type = XFS_BLFT_RTSUMMARY_BUF;
+       else
+               buf_type = XFS_BLFT_RTBITMAP_BUF;
+
         /*
          * Allocate space to the file, as necessary.
          */
@@ -841,6 +847,9 @@ xfs_growfs_rt_alloc(
                                         mp->m_bsize, 0, &bp);
                         if (error)
                                 goto out_trans_cancel;
+
+                       xfs_trans_buf_set_type(tp, bp, buf_type);
+                       bp->b_ops = &xfs_rtbuf_ops;
                         memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
                         xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
                         /*
@@ -1015,23 +1024,29 @@ xfs_growfs_rt(
                 /*
                  * Lock out other callers by grabbing the bitmap inode lock.
                  */
-               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
                 /*
-                * Update the bitmap inode's size.
+                * Update the bitmap inode's size ondisk and incore.  We need
+                * to update the incore size so that inode inactivation won't
+                * punch what it thinks are "posteof" blocks.
                  */
                 mp->m_rbmip->i_d.di_size =
                         nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+               i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
                 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
                 /*
                  * Get the summary inode into the transaction.
                  */
-               xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+               xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
                 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
                 /*
-                * Update the summary inode's size.
+                * Update the summary inode's size.  We need to update the
+                * incore size so that inode inactivation won't punch what it
+                * thinks are "posteof" blocks.
                  */
                 mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+               i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
                 xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
                 /*
                  * Copy summary data from old to new sizes.
@@ -1087,7 +1102,13 @@ error_cancel:
                 if (error)
                         break;
         }
+       if (error)
+               goto out_free;
+
+       /* Update secondary superblocks now the physical grow has completed */
+       error = xfs_update_secondary_sbs(mp);
  
+out_free:
         /*
          * Free the fake mp structure.
          */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c

index f70f125..20e0534 100644 (file)
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -23,6 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
         uint64_t        xs_xstrat_bytes = 0;
         uint64_t        xs_write_bytes = 0;
         uint64_t        xs_read_bytes = 0;
+       uint64_t        defer_relog = 0;
  
         static const struct xstats_entry {
                 char    *desc;
@@ -70,10 +71,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
                 xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
                 xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
                 xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
+               defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
         }
  
         len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
                         xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+       len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
+                       defer_relog);
         len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
  #if defined(DEBUG)
                 1);
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h

index 34d704f..43ffba7 100644 (file)
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -137,6 +137,7 @@ struct __xfsstats {
         uint64_t                xs_xstrat_bytes;
         uint64_t                xs_write_bytes;
         uint64_t                xs_read_bytes;
+       uint64_t                defer_relog;
  };
  
  #define        xfsstats_offset(f)      (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index baf5de3..d1b5f2d 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1234,25 +1234,12 @@ xfs_fc_parse_param(
         case Opt_nouuid:
                 mp->m_flags |= XFS_MOUNT_NOUUID;
                 return 0;
-       case Opt_ikeep:
-               mp->m_flags |= XFS_MOUNT_IKEEP;
-               return 0;
-       case Opt_noikeep:
-               mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               return 0;
         case Opt_largeio:
                 mp->m_flags |= XFS_MOUNT_LARGEIO;
                 return 0;
         case Opt_nolargeio:
                 mp->m_flags &= ~XFS_MOUNT_LARGEIO;
                 return 0;
-       case Opt_attr2:
-               mp->m_flags |= XFS_MOUNT_ATTR2;
-               return 0;
-       case Opt_noattr2:
-               mp->m_flags &= ~XFS_MOUNT_ATTR2;
-               mp->m_flags |= XFS_MOUNT_NOATTR2;
-               return 0;
         case Opt_filestreams:
                 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
                 return 0;
@@ -1304,6 +1291,24 @@ xfs_fc_parse_param(
                 xfs_mount_set_dax_mode(mp, result.uint_32);
                 return 0;
  #endif
+       /* Following mount options will be removed in September 2025 */
+       case Opt_ikeep:
+               xfs_warn(mp, "%s mount option is deprecated.", param->key);
+               mp->m_flags |= XFS_MOUNT_IKEEP;
+               return 0;
+       case Opt_noikeep:
+               xfs_warn(mp, "%s mount option is deprecated.", param->key);
+               mp->m_flags &= ~XFS_MOUNT_IKEEP;
+               return 0;
+       case Opt_attr2:
+               xfs_warn(mp, "%s mount option is deprecated.", param->key);
+               mp->m_flags |= XFS_MOUNT_ATTR2;
+               return 0;
+       case Opt_noattr2:
+               xfs_warn(mp, "%s mount option is deprecated.", param->key);
+               mp->m_flags &= ~XFS_MOUNT_ATTR2;
+               mp->m_flags |= XFS_MOUNT_NOATTR2;
+               return 0;
         default:
                 xfs_warn(mp, "unknown mount option [%s].", param->key);
                 return -EINVAL;
@@ -1450,6 +1455,19 @@ xfs_fc_fill_super(
         if (error)
                 goto out_free_sb;
  
+       /* V4 support is undergoing deprecation. */
+       if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+#ifdef CONFIG_XFS_SUPPORT_V4
+               xfs_warn_once(mp,
+       "Deprecated V4 format (crc=0) will not be supported after September 2030.");
+#else
+               xfs_warn(mp,
+       "Deprecated V4 format (crc=0) not supported by kernel.");
+               error = -EINVAL;
+               goto out_free_sb;
+#endif
+       }
+
         /*
          * XFS block mappings use 54 bits to store the logical block offset.
          * This should suffice to handle the maximum file size that the VFS
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c

index 021ef96..fac9de7 100644 (file)
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -50,13 +50,45 @@ xfs_panic_mask_proc_handler(
  }
  #endif /* CONFIG_PROC_FS */
  
+STATIC int
+xfs_deprecate_irix_sgid_inherit_proc_handler(
+       struct ctl_table        *ctl,
+       int                     write,
+       void                    *buffer,
+       size_t                  *lenp,
+       loff_t                  *ppos)
+{
+       if (write) {
+               printk_once(KERN_WARNING
+                               "XFS: " "%s sysctl option is deprecated.\n",
+                               ctl->procname);
+       }
+       return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
+STATIC int
+xfs_deprecate_irix_symlink_mode_proc_handler(
+       struct ctl_table        *ctl,
+       int                     write,
+       void                    *buffer,
+       size_t                  *lenp,
+       loff_t                  *ppos)
+{
+       if (write) {
+               printk_once(KERN_WARNING
+                               "XFS: " "%s sysctl option is deprecated.\n",
+                               ctl->procname);
+       }
+       return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
  static struct ctl_table xfs_table[] = {
         {
                 .procname       = "irix_sgid_inherit",
                 .data           = &xfs_params.sgid_inherit.val,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = xfs_deprecate_irix_sgid_inherit_proc_handler,
                 .extra1         = &xfs_params.sgid_inherit.min,
                 .extra2         = &xfs_params.sgid_inherit.max
         },
@@ -65,7 +97,7 @@ static struct ctl_table xfs_table[] = {
                 .data           = &xfs_params.symlink_mode.val,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = xfs_deprecate_irix_symlink_mode_proc_handler,
                 .extra1         = &xfs_params.symlink_mode.min,
                 .extra2         = &xfs_params.symlink_mode.max
         },
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index dcdcf99..8695165 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2533,6 +2533,7 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent);
  DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
  DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
  DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
  
  #define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
  DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index ca18a04..c94e71f 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -959,7 +959,7 @@ xfs_trans_cancel(
                 struct xfs_log_item *lip;
  
                 list_for_each_entry(lip, &tp->t_items, li_trans)
-                       ASSERT(!(lip->li_type == XFS_LI_EFD));
+                       ASSERT(!xlog_item_is_intent_done(lip));
         }
  #endif
         xfs_trans_unreserve_and_mod_sb(tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index f46534b..0846589 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -55,14 +55,12 @@ struct xfs_log_item {
  #define        XFS_LI_ABORTED  1
  #define        XFS_LI_FAILED   2
  #define        XFS_LI_DIRTY    3       /* log item dirty in transaction */
-#define        XFS_LI_RECOVERED 4      /* log intent item has been recovered */
  
  #define XFS_LI_FLAGS \
         { (1 << XFS_LI_IN_AIL),         "IN_AIL" }, \
         { (1 << XFS_LI_ABORTED),        "ABORTED" }, \
         { (1 << XFS_LI_FAILED),         "FAILED" }, \
-       { (1 << XFS_LI_DIRTY),          "DIRTY" }, \
-       { (1 << XFS_LI_RECOVERED),      "RECOVERED" }
+       { (1 << XFS_LI_DIRTY),          "DIRTY" }
  
  struct xfs_item_ops {
         unsigned flags;
@@ -74,10 +72,29 @@ struct xfs_item_ops {
         void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
         void (*iop_release)(struct xfs_log_item *);
         xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
-       int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
+       int (*iop_recover)(struct xfs_log_item *lip,
+                          struct list_head *capture_list);
         bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
+       struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
+                       struct xfs_trans *tp);
  };
  
+/* Is this log item a deferred action intent? */
+static inline bool
+xlog_item_is_intent(struct xfs_log_item *lip)
+{
+       return lip->li_ops->iop_recover != NULL &&
+              lip->li_ops->iop_match != NULL;
+}
+
+/* Is this a log intent-done item? */
+static inline bool
+xlog_item_is_intent_done(struct xfs_log_item *lip)
+{
+       return lip->li_ops->iop_unpin == NULL &&
+              lip->li_ops->iop_push == NULL;
+}
+
  /*
   * Release the log item as soon as committed.  This is for items just logging
   * intents that never need to be written back in place.
@@ -243,4 +260,12 @@ void               xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
  
  extern kmem_zone_t     *xfs_trans_zone;
  
+static inline struct xfs_log_item *
+xfs_trans_item_relog(
+       struct xfs_log_item     *lip,
+       struct xfs_trans        *tp)
+{
+       return lip->li_ops->iop_relog(lip, tp);
+}
+
  #endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c

index 133fc6f..fe45b0c 100644 (file)
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -221,36 +221,27 @@ xfs_trans_mod_dquot(
         }
  
         switch (field) {
-
-               /*
-                * regular disk blk reservation
-                */
-             case XFS_TRANS_DQ_RES_BLKS:
+       /* regular disk blk reservation */
+       case XFS_TRANS_DQ_RES_BLKS:
                 qtrx->qt_blk_res += delta;
                 break;
  
-               /*
-                * inode reservation
-                */
-             case XFS_TRANS_DQ_RES_INOS:
+       /* inode reservation */
+       case XFS_TRANS_DQ_RES_INOS:
                 qtrx->qt_ino_res += delta;
                 break;
  
-               /*
-                * disk blocks used.
-                */
-             case XFS_TRANS_DQ_BCOUNT:
+       /* disk blocks used. */
+       case XFS_TRANS_DQ_BCOUNT:
                 qtrx->qt_bcount_delta += delta;
                 break;
  
-             case XFS_TRANS_DQ_DELBCOUNT:
+       case XFS_TRANS_DQ_DELBCOUNT:
                 qtrx->qt_delbcnt_delta += delta;
                 break;
  
-               /*
-                * Inode Count
-                */
-             case XFS_TRANS_DQ_ICOUNT:
+       /* Inode Count */
+       case XFS_TRANS_DQ_ICOUNT:
                 if (qtrx->qt_ino_res && delta > 0) {
                         qtrx->qt_ino_res_used += delta;
                         ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
@@ -258,17 +249,13 @@ xfs_trans_mod_dquot(
                 qtrx->qt_icount_delta += delta;
                 break;
  
-               /*
-                * rtblk reservation
-                */
-             case XFS_TRANS_DQ_RES_RTBLKS:
+       /* rtblk reservation */
+       case XFS_TRANS_DQ_RES_RTBLKS:
                 qtrx->qt_rtblk_res += delta;
                 break;
  
-               /*
-                * rtblk count
-                */
-             case XFS_TRANS_DQ_RTBCOUNT:
+       /* rtblk count */
+       case XFS_TRANS_DQ_RTBCOUNT:
                 if (qtrx->qt_rtblk_res && delta > 0) {
                         qtrx->qt_rtblk_res_used += delta;
                         ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
@@ -276,11 +263,11 @@ xfs_trans_mod_dquot(
                 qtrx->qt_rtbcount_delta += delta;
                 break;
  
-             case XFS_TRANS_DQ_DELRTBCOUNT:
+       case XFS_TRANS_DQ_DELRTBCOUNT:
                 qtrx->qt_delrtb_delta += delta;
                 break;
  
-             default:
+       default:
                 ASSERT(0);
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 19 Oct 2020 21:38:46 +0000 (14:38 -0700)
Documentation/admin-guide/xfs.rst		patch \| blob \| history
fs/xfs/Kconfig		patch \| blob \| history
fs/xfs/libxfs/xfs_attr_remote.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_da_format.h		patch \| blob \| history
fs/xfs/libxfs/xfs_defer.c		patch \| blob \| history
fs/xfs/libxfs/xfs_defer.h		patch \| blob \| history
fs/xfs/libxfs/xfs_inode_buf.h		patch \| blob \| history
fs/xfs/libxfs/xfs_rmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_rtbitmap.c		patch \| blob \| history
fs/xfs/scrub/dabtree.c		patch \| blob \| history
fs/xfs/xfs_bmap_item.c		patch \| blob \| history
fs/xfs/xfs_buf_item_recover.c		patch \| blob \| history
fs/xfs/xfs_dquot.c		patch \| blob \| history
fs/xfs/xfs_extfree_item.c		patch \| blob \| history
fs/xfs/xfs_filestream.c		patch \| blob \| history
fs/xfs/xfs_fsmap.c		patch \| blob \| history
fs/xfs/xfs_fsmap.h		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_iops.c		patch \| blob \| history
fs/xfs/xfs_linux.h		patch \| blob \| history
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_log.h		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_qm.c		patch \| blob \| history
fs/xfs/xfs_refcount_item.c		patch \| blob \| history
fs/xfs/xfs_rmap_item.c		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_stats.c		patch \| blob \| history
fs/xfs/xfs_stats.h		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_sysctl.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history
fs/xfs/xfs_trans.h		patch \| blob \| history
fs/xfs/xfs_trans_dquot.c		patch \| blob \| history