Merge branch 'akpm' (patches from Andrew)
[linux-2.6-microblaze.git] / fs / gfs2 / bmap.c
index 01b97c0..4d810be 100644 (file)
@@ -38,11 +38,6 @@ struct metapath {
        __u16 mp_list[GFS2_MAX_META_HEIGHT];
 };
 
-struct strip_mine {
-       int sm_first;
-       unsigned int sm_height;
-};
-
 /**
  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  * @ip: the inode
@@ -252,6 +247,19 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp)
        return 1;
 }
 
+/**
+ * metaptr1 - Return the first possible metadata pointer in a metaath buffer
+ * @height: The metadata height (0 = dinode)
+ * @mp: The metapath
+ */
+static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
+{
+       struct buffer_head *bh = mp->mp_bh[height];
+       if (height == 0)
+               return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
+       return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
+}
+
 /**
  * metapointer - Return pointer to start of metadata in a buffer
  * @height: The metadata height (0 = dinode)
@@ -264,10 +272,8 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp)
 
 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 {
-       struct buffer_head *bh = mp->mp_bh[height];
-       unsigned int head_size = (height > 0) ?
-               sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
-       return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
+       __be64 *p = metaptr1(height, mp);
+       return p + mp->mp_list[height];
 }
 
 static void gfs2_metapath_ra(struct gfs2_glock *gl,
@@ -295,6 +301,23 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
        }
 }
 
+/**
+ * lookup_mp_height - helper function for lookup_metapath
+ * @ip: the inode
+ * @mp: the metapath
+ * @h: the height which needs looking up
+ */
+static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
+{
+       __be64 *ptr = metapointer(h, mp);
+       u64 dblock = be64_to_cpu(*ptr);
+
+       if (!dblock)
+               return h + 1;
+
+       return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]);
+}
+
 /**
  * lookup_metapath - Walk the metadata tree to a specific point
  * @ip: The inode
@@ -316,17 +339,10 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 {
        unsigned int end_of_metadata = ip->i_height - 1;
        unsigned int x;
-       __be64 *ptr;
-       u64 dblock;
        int ret;
 
        for (x = 0; x < end_of_metadata; x++) {
-               ptr = metapointer(x, mp);
-               dblock = be64_to_cpu(*ptr);
-               if (!dblock)
-                       return x + 1;
-
-               ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]);
+               ret = lookup_mp_height(ip, mp, x);
                if (ret)
                        return ret;
        }
@@ -334,6 +350,35 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
        return ip->i_height;
 }
 
+/**
+ * fillup_metapath - fill up buffers for the metadata path to a specific height
+ * @ip: The inode
+ * @mp: The metapath
+ * @h: The height to which it should be mapped
+ *
+ * Similar to lookup_metapath, but does lookups for a range of heights
+ *
+ * Returns: error or height of metadata tree
+ */
+
+static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
+{
+       unsigned int start_h = h - 1;
+       int ret;
+
+       if (h) {
+               /* find the first buffer we need to look up. */
+               while (start_h > 0 && mp->mp_bh[start_h] == NULL)
+                       start_h--;
+               for (; start_h < h; start_h++) {
+                       ret = lookup_mp_height(ip, mp, start_h);
+                       if (ret)
+                               return ret;
+               }
+       }
+       return ip->i_height;
+}
+
 static inline void release_metapath(struct metapath *mp)
 {
        int i;
@@ -422,6 +467,13 @@ enum alloc_state {
        /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 };
 
+static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
+{
+       if (hgt)
+               return sdp->sd_inptrs;
+       return sdp->sd_diptrs;
+}
+
 /**
  * gfs2_bmap_alloc - Build a metadata tree of the requested height
  * @inode: The GFS2 inode
@@ -620,7 +672,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 
        BUG_ON(maxlen == 0);
 
-       memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
+       memset(&mp, 0, sizeof(mp));
        bmap_lock(ip, create);
        clear_buffer_mapped(bh_map);
        clear_buffer_new(bh_map);
@@ -701,252 +753,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
        return ret;
 }
 
-/**
- * do_strip - Look for a layer a particular layer of the file and strip it off
- * @ip: the inode
- * @dibh: the dinode buffer
- * @bh: A buffer of pointers
- * @top: The first pointer in the buffer
- * @bottom: One more than the last pointer
- * @height: the height this buffer is at
- * @sm: a pointer to a struct strip_mine
- *
- * Returns: errno
- */
-
-static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
-                   struct buffer_head *bh, __be64 *top, __be64 *bottom,
-                   unsigned int height, struct strip_mine *sm)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_rgrp_list rlist;
-       struct gfs2_trans *tr;
-       u64 bn, bstart;
-       u32 blen, btotal;
-       __be64 *p;
-       unsigned int rg_blocks = 0;
-       int metadata;
-       unsigned int revokes = 0;
-       int x;
-       int error;
-       int jblocks_rqsted;
-
-       error = gfs2_rindex_update(sdp);
-       if (error)
-               return error;
-
-       if (!*top)
-               sm->sm_first = 0;
-
-       if (height != sm->sm_height)
-               return 0;
-
-       if (sm->sm_first) {
-               top++;
-               sm->sm_first = 0;
-       }
-
-       metadata = (height != ip->i_height - 1);
-       if (metadata)
-               revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
-       else if (ip->i_depth)
-               revokes = sdp->sd_inptrs;
-
-       memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
-       bstart = 0;
-       blen = 0;
-
-       for (p = top; p < bottom; p++) {
-               if (!*p)
-                       continue;
-
-               bn = be64_to_cpu(*p);
-
-               if (bstart + blen == bn)
-                       blen++;
-               else {
-                       if (bstart)
-                               gfs2_rlist_add(ip, &rlist, bstart);
-
-                       bstart = bn;
-                       blen = 1;
-               }
-       }
-
-       if (bstart)
-               gfs2_rlist_add(ip, &rlist, bstart);
-       else
-               goto out; /* Nothing to do */
-
-       gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
-
-       for (x = 0; x < rlist.rl_rgrps; x++) {
-               struct gfs2_rgrpd *rgd;
-               rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-               rg_blocks += rgd->rd_length;
-       }
-
-       error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
-       if (error)
-               goto out_rlist;
-
-       if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
-               gfs2_rs_deltree(&ip->i_res);
-
-restart:
-       jblocks_rqsted = rg_blocks + RES_DINODE +
-               RES_INDIRECT + RES_STATFS + RES_QUOTA +
-               gfs2_struct2blk(sdp, revokes, sizeof(u64));
-       if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
-               jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
-       error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
-       if (error)
-               goto out_rg_gunlock;
-
-       tr = current->journal_info;
-       down_write(&ip->i_rw_mutex);
-
-       gfs2_trans_add_meta(ip->i_gl, dibh);
-       gfs2_trans_add_meta(ip->i_gl, bh);
-
-       bstart = 0;
-       blen = 0;
-       btotal = 0;
-
-       for (p = top; p < bottom; p++) {
-               if (!*p)
-                       continue;
-
-               /* check for max reasonable journal transaction blocks */
-               if (tr->tr_num_buf_new + RES_STATFS +
-                   RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
-                       if (rg_blocks >= tr->tr_num_buf_new)
-                               rg_blocks -= tr->tr_num_buf_new;
-                       else
-                               rg_blocks = 0;
-                       break;
-               }
-
-               bn = be64_to_cpu(*p);
-
-               if (bstart + blen == bn)
-                       blen++;
-               else {
-                       if (bstart) {
-                               __gfs2_free_blocks(ip, bstart, blen, metadata);
-                               btotal += blen;
-                       }
-
-                       bstart = bn;
-                       blen = 1;
-               }
-
-               *p = 0;
-               gfs2_add_inode_blocks(&ip->i_inode, -1);
-       }
-       if (p == bottom)
-               rg_blocks = 0;
-
-       if (bstart) {
-               __gfs2_free_blocks(ip, bstart, blen, metadata);
-               btotal += blen;
-       }
-
-       gfs2_statfs_change(sdp, 0, +btotal, 0);
-       gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
-                         ip->i_inode.i_gid);
-
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
-
-       gfs2_dinode_out(ip, dibh->b_data);
-
-       up_write(&ip->i_rw_mutex);
-
-       gfs2_trans_end(sdp);
-
-       if (rg_blocks)
-               goto restart;
-
-out_rg_gunlock:
-       gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
-out_rlist:
-       gfs2_rlist_free(&rlist);
-out:
-       return error;
-}
-
-/**
- * recursive_scan - recursively scan through the end of a file
- * @ip: the inode
- * @dibh: the dinode buffer
- * @mp: the path through the metadata to the point to start
- * @height: the height the recursion is at
- * @block: the indirect block to look at
- * @first: 1 if this is the first block
- * @sm: data opaque to this function to pass to @bc
- *
- * When this is first called @height and @block should be zero and
- * @first should be 1.
- *
- * Returns: errno
- */
-
-static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
-                         struct metapath *mp, unsigned int height,
-                         u64 block, int first, struct strip_mine *sm)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct buffer_head *bh = NULL;
-       __be64 *top, *bottom;
-       u64 bn;
-       int error;
-       int mh_size = sizeof(struct gfs2_meta_header);
-
-       if (!height) {
-               error = gfs2_meta_inode_buffer(ip, &bh);
-               if (error)
-                       return error;
-               dibh = bh;
-
-               top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
-               bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
-       } else {
-               error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
-               if (error)
-                       return error;
-
-               top = (__be64 *)(bh->b_data + mh_size) +
-                                 (first ? mp->mp_list[height] : 0);
-
-               bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
-       }
-
-       error = do_strip(ip, dibh, bh, top, bottom, height, sm);
-       if (error)
-               goto out;
-
-       if (height < ip->i_height - 1) {
-
-               gfs2_metapath_ra(ip->i_gl, bh, top);
-
-               for (; top < bottom; top++, first = 0) {
-                       if (!*top)
-                               continue;
-
-                       bn = be64_to_cpu(*top);
-
-                       error = recursive_scan(ip, dibh, mp, height + 1, bn,
-                                              first, sm);
-                       if (error)
-                               break;
-               }
-       }
-out:
-       brelse(bh);
-       return error;
-}
-
-
 /**
  * gfs2_block_truncate_page - Deal with zeroing out data for truncate
  *
@@ -1106,41 +912,406 @@ out:
        return error;
 }
 
-static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
+/**
+ * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
+ * @ip: inode
+ * @rg_gh: holder of resource group glock
+ * @mp: current metapath fully populated with buffers
+ * @btotal: place to keep count of total blocks freed
+ * @hgt: height we're processing
+ * @first: true if this is the first call to this function for this height
+ *
+ * We sweep a metadata buffer (provided by the metapath) for blocks we need to
+ * free, and free them all. However, we do it one rgrp at a time. If this
+ * block has references to multiple rgrps, we break it into individual
+ * transactions. This allows other processes to use the rgrps while we're
+ * focused on a single one, for better concurrency / performance.
+ * At every transaction boundary, we rewrite the inode into the journal.
+ * That way the bitmaps are kept consistent with the inode and we can recover
+ * if we're interrupted by power-outages.
+ *
+ * Returns: 0, or return code if an error occurred.
+ *          *btotal has the total number of blocks freed
+ */
+static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
+                             const struct metapath *mp, u32 *btotal, int hgt,
+                             bool preserve1)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       unsigned int height = ip->i_height;
-       u64 lblock;
-       struct metapath mp;
-       int error;
+       struct gfs2_rgrpd *rgd;
+       struct gfs2_trans *tr;
+       struct buffer_head *bh = mp->mp_bh[hgt];
+       __be64 *top, *bottom, *p;
+       int blks_outside_rgrp;
+       u64 bn, bstart, isize_blks;
+       s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
+       int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
+       int ret = 0;
+       bool buf_in_tr = false; /* buffer was added to transaction */
+
+       if (gfs2_metatype_check(sdp, bh,
+                               (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
+               return -EIO;
+
+more_rgrps:
+       blks_outside_rgrp = 0;
+       bstart = 0;
+       blen = 0;
+       top = metapointer(hgt, mp); /* first ptr from metapath */
+       /* If we're keeping some data at the truncation point, we've got to
+          preserve the metadata tree by adding 1 to the starting metapath. */
+       if (preserve1)
+               top++;
+
+       bottom = (__be64 *)(bh->b_data + bh->b_size);
+
+       for (p = top; p < bottom; p++) {
+               if (!*p)
+                       continue;
+               bn = be64_to_cpu(*p);
+               if (gfs2_holder_initialized(rd_gh)) {
+                       rgd = (struct gfs2_rgrpd *)rd_gh->gh_gl->gl_object;
+                       gfs2_assert_withdraw(sdp,
+                                    gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
+               } else {
+                       rgd = gfs2_blk2rgrpd(sdp, bn, false);
+                       ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                                0, rd_gh);
+                       if (ret)
+                               goto out;
+
+                       /* Must be done with the rgrp glock held: */
+                       if (gfs2_rs_active(&ip->i_res) &&
+                           rgd == ip->i_res.rs_rbm.rgd)
+                               gfs2_rs_deltree(&ip->i_res);
+               }
+
+               if (!rgrp_contains_block(rgd, bn)) {
+                       blks_outside_rgrp++;
+                       continue;
+               }
+
+               /* The size of our transactions will be unknown until we
+                  actually process all the metadata blocks that relate to
+                  the rgrp. So we estimate. We know it can't be more than
+                  the dinode's i_blocks and we don't want to exceed the
+                  journal flush threshold, sd_log_thresh2. */
+               if (current->journal_info == NULL) {
+                       unsigned int jblocks_rqsted, revokes;
+
+                       jblocks_rqsted = rgd->rd_length + RES_DINODE +
+                               RES_INDIRECT;
+                       isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
+                       if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
+                               jblocks_rqsted +=
+                                       atomic_read(&sdp->sd_log_thresh2);
+                       else
+                               jblocks_rqsted += isize_blks;
+                       revokes = jblocks_rqsted;
+                       if (meta)
+                               revokes += hptrs(sdp, hgt);
+                       else if (ip->i_depth)
+                               revokes += sdp->sd_inptrs;
+                       ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
+                       if (ret)
+                               goto out_unlock;
+                       down_write(&ip->i_rw_mutex);
+               }
+               /* check if we will exceed the transaction blocks requested */
+               tr = current->journal_info;
+               if (tr->tr_num_buf_new + RES_STATFS +
+                   RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
+                       /* We set blks_outside_rgrp to ensure the loop will
+                          be repeated for the same rgrp, but with a new
+                          transaction. */
+                       blks_outside_rgrp++;
+                       /* This next part is tricky. If the buffer was added
+                          to the transaction, we've already set some block
+                          pointers to 0, so we better follow through and free
+                          them, or we will introduce corruption (so break).
+                          This may be impossible, or at least rare, but I
+                          decided to cover the case regardless.
+
+                          If the buffer was not added to the transaction
+                          (this call), doing so would exceed our transaction
+                          size, so we need to end the transaction and start a
+                          new one (so goto). */
+
+                       if (buf_in_tr)
+                               break;
+                       goto out_unlock;
+               }
+
+               gfs2_trans_add_meta(ip->i_gl, bh);
+               buf_in_tr = true;
+               *p = 0;
+               if (bstart + blen == bn) {
+                       blen++;
+                       continue;
+               }
+               if (bstart) {
+                       __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
+                       (*btotal) += blen;
+                       gfs2_add_inode_blocks(&ip->i_inode, -blen);
+               }
+               bstart = bn;
+               blen = 1;
+       }
+       if (bstart) {
+               __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
+               (*btotal) += blen;
+               gfs2_add_inode_blocks(&ip->i_inode, -blen);
+       }
+out_unlock:
+       if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
+                                           outside the rgrp we just processed,
+                                           do it all over again. */
+               if (current->journal_info) {
+                       struct buffer_head *dibh = mp->mp_bh[0];
+
+                       /* Every transaction boundary, we rewrite the dinode
+                          to keep its di_blocks current in case of failure. */
+                       ip->i_inode.i_mtime = ip->i_inode.i_ctime =
+                               current_time(&ip->i_inode);
+                       gfs2_trans_add_meta(ip->i_gl, dibh);
+                       gfs2_dinode_out(ip, dibh->b_data);
+                       up_write(&ip->i_rw_mutex);
+                       gfs2_trans_end(sdp);
+               }
+               gfs2_glock_dq_uninit(rd_gh);
+               cond_resched();
+               goto more_rgrps;
+       }
+out:
+       return ret;
+}
+
+/**
+ * find_nonnull_ptr - find a non-null pointer given a metapath and height
+ * assumes the metapath is valid (with buffers) out to height h
+ * @mp: starting metapath
+ * @h: desired height to search
+ *
+ * Returns: true if a non-null pointer was found in the metapath buffer
+ *          false if all remaining pointers are NULL in the buffer
+ */
+static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
+                            unsigned int h)
+{
+       __be64 *ptr;
+       unsigned int ptrs = hptrs(sdp, h) - 1;
+
+       while (true) {
+               ptr = metapointer(h, mp);
+               if (*ptr) /* if we have a non-null pointer */
+                       return true;
+
+               if (mp->mp_list[h] < ptrs)
+                       mp->mp_list[h]++;
+               else
+                       return false; /* no more pointers in this buffer */
+       }
+}
+
+enum dealloc_states {
+       DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
+       DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
+       DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
+       DEALLOC_DONE = 3,       /* process complete */
+};
 
-       if (!size)
+/**
+ * trunc_dealloc - truncate a file down to a desired size
+ * @ip: inode to truncate
+ * @newsize: The desired size of the file
+ *
+ * This function truncates a file to newsize. It works from the
+ * bottom up, and from the right to the left. In other words, it strips off
+ * the highest layer (data) before stripping any of the metadata. Doing it
+ * this way is best in case the operation is interrupted by power failure, etc.
+ * The dinode is rewritten in every transaction to guarantee integrity.
+ */
+static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
+{
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       struct metapath mp;
+       struct buffer_head *dibh, *bh;
+       struct gfs2_holder rd_gh;
+       u64 lblock;
+       __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
+       unsigned int strip_h = ip->i_height - 1;
+       u32 btotal = 0;
+       int ret, state;
+       int mp_h; /* metapath buffers are read in to this height */
+       sector_t last_ra = 0;
+       u64 prev_bnr = 0;
+       bool preserve1; /* need to preserve the first meta pointer? */
+
+       if (!newsize)
                lblock = 0;
        else
-               lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
+               lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
 
+       memset(&mp, 0, sizeof(mp));
        find_metapath(sdp, lblock, &mp, ip->i_height);
-       error = gfs2_rindex_update(sdp);
-       if (error)
-               return error;
 
-       error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
-       if (error)
-               return error;
+       memcpy(&nbof, &mp.mp_list, sizeof(nbof));
+
+       ret = gfs2_meta_inode_buffer(ip, &dibh);
+       if (ret)
+               return ret;
 
-       while (height--) {
-               struct strip_mine sm;
-               sm.sm_first = !!size;
-               sm.sm_height = height;
+       mp.mp_bh[0] = dibh;
+       ret = lookup_metapath(ip, &mp);
+       if (ret == ip->i_height)
+               state = DEALLOC_MP_FULL; /* We have a complete metapath */
+       else
+               state = DEALLOC_FILL_MP; /* deal with partial metapath */
 
-               error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
-               if (error)
+       ret = gfs2_rindex_update(sdp);
+       if (ret)
+               goto out_metapath;
+
+       ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
+       if (ret)
+               goto out_metapath;
+       gfs2_holder_mark_uninitialized(&rd_gh);
+
+       mp_h = strip_h;
+
+       while (state != DEALLOC_DONE) {
+               switch (state) {
+               /* Truncate a full metapath at the given strip height.
+                * Note that strip_h == mp_h in order to be in this state. */
+               case DEALLOC_MP_FULL:
+                       if (mp_h > 0) { /* issue read-ahead on metadata */
+                               __be64 *top;
+
+                               bh = mp.mp_bh[mp_h - 1];
+                               if (bh->b_blocknr != last_ra) {
+                                       last_ra = bh->b_blocknr;
+                                       top = metaptr1(mp_h - 1, &mp);
+                                       gfs2_metapath_ra(ip->i_gl, bh, top);
+                               }
+                       }
+                       /* If we're truncating to a non-zero size and the mp is
+                          at the beginning of file for the strip height, we
+                          need to preserve the first metadata pointer. */
+                       preserve1 = (newsize &&
+                                    (mp.mp_list[mp_h] == nbof[mp_h]));
+                       bh = mp.mp_bh[mp_h];
+                       gfs2_assert_withdraw(sdp, bh);
+                       if (gfs2_assert_withdraw(sdp,
+                                                prev_bnr != bh->b_blocknr)) {
+                               printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
+                                      "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
+                                      sdp->sd_fsname,
+                                      (unsigned long long)ip->i_no_addr,
+                                      prev_bnr, ip->i_height, strip_h, mp_h);
+                       }
+                       prev_bnr = bh->b_blocknr;
+                       ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
+                                                mp_h, preserve1);
+                       /* If we hit an error or just swept dinode buffer,
+                          just exit. */
+                       if (ret || !mp_h) {
+                               state = DEALLOC_DONE;
+                               break;
+                       }
+                       state = DEALLOC_MP_LOWER;
+                       break;
+
+               /* lower the metapath strip height */
+               case DEALLOC_MP_LOWER:
+                       /* We're done with the current buffer, so release it,
+                          unless it's the dinode buffer. Then back up to the
+                          previous pointer. */
+                       if (mp_h) {
+                               brelse(mp.mp_bh[mp_h]);
+                               mp.mp_bh[mp_h] = NULL;
+                       }
+                       /* If we can't get any lower in height, we've stripped
+                          off all we can. Next step is to back up and start
+                          stripping the previous level of metadata. */
+                       if (mp_h == 0) {
+                               strip_h--;
+                               memcpy(&mp.mp_list, &nbof, sizeof(nbof));
+                               mp_h = strip_h;
+                               state = DEALLOC_FILL_MP;
+                               break;
+                       }
+                       mp.mp_list[mp_h] = 0;
+                       mp_h--; /* search one metadata height down */
+                       if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
+                               break; /* loop around in the same state */
+                       mp.mp_list[mp_h]++;
+                       /* Here we've found a part of the metapath that is not
+                        * allocated. We need to search at that height for the
+                        * next non-null pointer. */
+                       if (find_nonnull_ptr(sdp, &mp, mp_h)) {
+                               state = DEALLOC_FILL_MP;
+                               mp_h++;
+                       }
+                       /* No more non-null pointers at this height. Back up
+                          to the previous height and try again. */
+                       break; /* loop around in the same state */
+
+               /* Fill the metapath with buffers to the given height. */
+               case DEALLOC_FILL_MP:
+                       /* Fill the buffers out to the current height. */
+                       ret = fillup_metapath(ip, &mp, mp_h);
+                       if (ret < 0)
+                               goto out;
+
+                       /* If buffers found for the entire strip height */
+                       if ((ret == ip->i_height) && (mp_h == strip_h)) {
+                               state = DEALLOC_MP_FULL;
+                               break;
+                       }
+                       if (ret < ip->i_height) /* We have a partial height */
+                               mp_h = ret - 1;
+
+                       /* If we find a non-null block pointer, crawl a bit
+                          higher up in the metapath and try again, otherwise
+                          we need to look lower for a new starting point. */
+                       if (find_nonnull_ptr(sdp, &mp, mp_h))
+                               mp_h++;
+                       else
+                               state = DEALLOC_MP_LOWER;
                        break;
+               }
        }
 
-       gfs2_quota_unhold(ip);
+       if (btotal) {
+               if (current->journal_info == NULL) {
+                       ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
+                                              RES_QUOTA, 0);
+                       if (ret)
+                               goto out;
+                       down_write(&ip->i_rw_mutex);
+               }
+               gfs2_statfs_change(sdp, 0, +btotal, 0);
+               gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
+                                 ip->i_inode.i_gid);
+               ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+               gfs2_trans_add_meta(ip->i_gl, dibh);
+               gfs2_dinode_out(ip, dibh->b_data);
+               up_write(&ip->i_rw_mutex);
+               gfs2_trans_end(sdp);
+       }
 
-       return error;
+out:
+       if (gfs2_holder_initialized(&rd_gh))
+               gfs2_glock_dq_uninit(&rd_gh);
+       if (current->journal_info) {
+               up_write(&ip->i_rw_mutex);
+               gfs2_trans_end(sdp);
+               cond_resched();
+       }
+       gfs2_quota_unhold(ip);
+out_metapath:
+       release_metapath(&mp);
+       return ret;
 }
 
 static int trunc_end(struct gfs2_inode *ip)