fs/gfs2/bmap.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/spinlock.h>
  11 #include <linux/completion.h>
  12 #include <linux/buffer_head.h>
  13 #include <linux/blkdev.h>
  14 #include <linux/gfs2_ondisk.h>
  15 #include <linux/crc32.h>
  16 #include <linux/iomap.h>
  17
  18 #include "gfs2.h"
  19 #include "incore.h"
  20 #include "bmap.h"
  21 #include "glock.h"
  22 #include "inode.h"
  23 #include "meta_io.h"
  24 #include "quota.h"
  25 #include "rgrp.h"
  26 #include "log.h"
  27 #include "super.h"
  28 #include "trans.h"
  29 #include "dir.h"
  30 #include "util.h"
  31 #include "trace_gfs2.h"
  32
  33 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  34  * block is 512, so __u16 is fine for that. It saves stack space to
  35  * keep it small.
  36  */
  37 struct metapath {
  38         struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  39         __u16 mp_list[GFS2_MAX_META_HEIGHT];
  40         int mp_fheight; /* find_metapath height */
  41         int mp_aheight; /* actual height (lookup height) */
  42 };
  43
  44 /**
  45  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  46  * @ip: the inode
  47  * @dibh: the dinode buffer
  48  * @block: the block number that was allocated
  49  * @page: The (optional) page. This is looked up if @page is NULL
  50  *
  51  * Returns: errno
  52  */
  53
  54 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  55                                u64 block, struct page *page)
  56 {
  57         struct inode *inode = &ip->i_inode;
  58         struct buffer_head *bh;
  59         int release = 0;
  60
  61         if (!page || page->index) {
  62                 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
  63                 if (!page)
  64                         return -ENOMEM;
  65                 release = 1;
  66         }
  67
  68         if (!PageUptodate(page)) {
  69                 void *kaddr = kmap(page);
  70                 u64 dsize = i_size_read(inode);
  71
  72                 if (dsize > gfs2_max_stuffed_size(ip))
  73                         dsize = gfs2_max_stuffed_size(ip);
  74
  75                 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  76                 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  77                 kunmap(page);
  78
  79                 SetPageUptodate(page);
  80         }
  81
  82         if (!page_has_buffers(page))
  83                 create_empty_buffers(page, BIT(inode->i_blkbits),
  84                                      BIT(BH_Uptodate));
  85
  86         bh = page_buffers(page);
  87
  88         if (!buffer_mapped(bh))
  89                 map_bh(bh, inode->i_sb, block);
  90
  91         set_buffer_uptodate(bh);
  92         if (gfs2_is_jdata(ip))
  93                 gfs2_trans_add_data(ip->i_gl, bh);
  94         else {
  95                 mark_buffer_dirty(bh);
  96                 gfs2_ordered_add_inode(ip);
  97         }
  98
  99         if (release) {
 100                 unlock_page(page);
 101                 put_page(page);
 102         }
 103
 104         return 0;
 105 }
 106
 107 /**
 108  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 109  * @ip: The GFS2 inode to unstuff
 110  * @page: The (optional) page. This is looked up if the @page is NULL
 111  *
 112  * This routine unstuffs a dinode and returns it to a "normal" state such
 113  * that the height can be grown in the traditional way.
 114  *
 115  * Returns: errno
 116  */
 117
 118 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 119 {
 120         struct buffer_head *bh, *dibh;
 121         struct gfs2_dinode *di;
 122         u64 block = 0;
 123         int isdir = gfs2_is_dir(ip);
 124         int error;
 125
 126         down_write(&ip->i_rw_mutex);
 127
 128         error = gfs2_meta_inode_buffer(ip, &dibh);
 129         if (error)
 130                 goto out;
 131
 132         if (i_size_read(&ip->i_inode)) {
 133                 /* Get a free block, fill it with the stuffed data,
 134                    and write it out to disk */
 135
 136                 unsigned int n = 1;
 137                 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 138                 if (error)
 139                         goto out_brelse;
 140                 if (isdir) {
 141                         gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 142                         error = gfs2_dir_get_new_buffer(ip, block, &bh);
 143                         if (error)
 144                                 goto out_brelse;
 145                         gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 146                                               dibh, sizeof(struct gfs2_dinode));
 147                         brelse(bh);
 148                 } else {
 149                         error = gfs2_unstuffer_page(ip, dibh, block, page);
 150                         if (error)
 151                                 goto out_brelse;
 152                 }
 153         }
 154
 155         /*  Set up the pointer to the new block  */
 156
 157         gfs2_trans_add_meta(ip->i_gl, dibh);
 158         di = (struct gfs2_dinode *)dibh->b_data;
 159         gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 160
 161         if (i_size_read(&ip->i_inode)) {
 162                 *(__be64 *)(di + 1) = cpu_to_be64(block);
 163                 gfs2_add_inode_blocks(&ip->i_inode, 1);
 164                 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 165         }
 166
 167         ip->i_height = 1;
 168         di->di_height = cpu_to_be16(1);
 169
 170 out_brelse:
 171         brelse(dibh);
 172 out:
 173         up_write(&ip->i_rw_mutex);
 174         return error;
 175 }
 176
 177
 178 /**
 179  * find_metapath - Find path through the metadata tree
 180  * @sdp: The superblock
 181  * @block: The disk block to look up
 182  * @mp: The metapath to return the result in
 183  * @height: The pre-calculated height of the metadata tree
 184  *
 185  *   This routine returns a struct metapath structure that defines a path
 186  *   through the metadata of inode "ip" to get to block "block".
 187  *
 188  *   Example:
 189  *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 190  *   filesystem with a blocksize of 4096.
 191  *
 192  *   find_metapath() would return a struct metapath structure set to:
 193  *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
 194  *
 195  *   That means that in order to get to the block containing the byte at
 196  *   offset 101342453, we would load the indirect block pointed to by pointer
 197  *   0 in the dinode.  We would then load the indirect block pointed to by
 198  *   pointer 48 in that indirect block.  We would then load the data block
 199  *   pointed to by pointer 165 in that indirect block.
 200  *
 201  *             ----------------------------------------
 202  *             | Dinode |                             |
 203  *             |        |                            4|
 204  *             |        |0 1 2 3 4 5                 9|
 205  *             |        |                            6|
 206  *             ----------------------------------------
 207  *                       |
 208  *                       |
 209  *                       V
 210  *             ----------------------------------------
 211  *             | Indirect Block                       |
 212  *             |                                     5|
 213  *             |            4 4 4 4 4 5 5            1|
 214  *             |0           5 6 7 8 9 0 1            2|
 215  *             ----------------------------------------
 216  *                                |
 217  *                                |
 218  *                                V
 219  *             ----------------------------------------
 220  *             | Indirect Block                       |
 221  *             |                         1 1 1 1 1   5|
 222  *             |                         6 6 6 6 6   1|
 223  *             |0                        3 4 5 6 7   2|
 224  *             ----------------------------------------
 225  *                                           |
 226  *                                           |
 227  *                                           V
 228  *             ----------------------------------------
 229  *             | Data block containing offset         |
 230  *             |            101342453                 |
 231  *             |                                      |
 232  *             |                                      |
 233  *             ----------------------------------------
 234  *
 235  */
 236
 237 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 238                           struct metapath *mp, unsigned int height)
 239 {
 240         unsigned int i;
 241
 242         mp->mp_fheight = height;
 243         for (i = height; i--;)
 244                 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 245 }
 246
 247 static inline unsigned int metapath_branch_start(const struct metapath *mp)
 248 {
 249         if (mp->mp_list[0] == 0)
 250                 return 2;
 251         return 1;
 252 }
 253
 254 /**
 255  * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 256  * @height: The metadata height (0 = dinode)
 257  * @mp: The metapath
 258  */
 259 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 260 {
 261         struct buffer_head *bh = mp->mp_bh[height];
 262         if (height == 0)
 263                 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 264         return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 265 }
 266
 267 /**
 268  * metapointer - Return pointer to start of metadata in a buffer
 269  * @height: The metadata height (0 = dinode)
 270  * @mp: The metapath
 271  *
 272  * Return a pointer to the block number of the next height of the metadata
 273  * tree given a buffer containing the pointer to the current height of the
 274  * metadata tree.
 275  */
 276
 277 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 278 {
 279         __be64 *p = metaptr1(height, mp);
 280         return p + mp->mp_list[height];
 281 }
 282
 283 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
 284 {
 285         const struct buffer_head *bh = mp->mp_bh[height];
 286         return (const __be64 *)(bh->b_data + bh->b_size);
 287 }
 288
 289 static void clone_metapath(struct metapath *clone, struct metapath *mp)
 290 {
 291         unsigned int hgt;
 292
 293         *clone = *mp;
 294         for (hgt = 0; hgt < mp->mp_aheight; hgt++)
 295                 get_bh(clone->mp_bh[hgt]);
 296 }
 297
 298 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 299 {
 300         const __be64 *t;
 301
 302         for (t = start; t < end; t++) {
 303                 struct buffer_head *rabh;
 304
 305                 if (!*t)
 306                         continue;
 307
 308                 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 309                 if (trylock_buffer(rabh)) {
 310                         if (!buffer_uptodate(rabh)) {
 311                                 rabh->b_end_io = end_buffer_read_sync;
 312                                 submit_bh(REQ_OP_READ,
 313                                           REQ_RAHEAD | REQ_META | REQ_PRIO,
 314                                           rabh);
 315                                 continue;
 316                         }
 317                         unlock_buffer(rabh);
 318                 }
 319                 brelse(rabh);
 320         }
 321 }
 322
 323 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 324                              unsigned int x, unsigned int h)
 325 {
 326         for (; x < h; x++) {
 327                 __be64 *ptr = metapointer(x, mp);
 328                 u64 dblock = be64_to_cpu(*ptr);
 329                 int ret;
 330
 331                 if (!dblock)
 332                         break;
 333                 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
 334                 if (ret)
 335                         return ret;
 336         }
 337         mp->mp_aheight = x + 1;
 338         return 0;
 339 }
 340
 341 /**
 342  * lookup_metapath - Walk the metadata tree to a specific point
 343  * @ip: The inode
 344  * @mp: The metapath
 345  *
 346  * Assumes that the inode's buffer has already been looked up and
 347  * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 348  * by find_metapath().
 349  *
 350  * If this function encounters part of the tree which has not been
 351  * allocated, it returns the current height of the tree at the point
 352  * at which it found the unallocated block. Blocks which are found are
 353  * added to the mp->mp_bh[] list.
 354  *
 355  * Returns: error
 356  */
 357
 358 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 359 {
 360         return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 361 }
 362
 363 /**
 364  * fillup_metapath - fill up buffers for the metadata path to a specific height
 365  * @ip: The inode
 366  * @mp: The metapath
 367  * @h: The height to which it should be mapped
 368  *
 369  * Similar to lookup_metapath, but does lookups for a range of heights
 370  *
 371  * Returns: error or the number of buffers filled
 372  */
 373
 374 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 375 {
 376         unsigned int x = 0;
 377         int ret;
 378
 379         if (h) {
 380                 /* find the first buffer we need to look up. */
 381                 for (x = h - 1; x > 0; x--) {
 382                         if (mp->mp_bh[x])
 383                                 break;
 384                 }
 385         }
 386         ret = __fillup_metapath(ip, mp, x, h);
 387         if (ret)
 388                 return ret;
 389         return mp->mp_aheight - x - 1;
 390 }
 391
 392 static inline void release_metapath(struct metapath *mp)
 393 {
 394         int i;
 395
 396         for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 397                 if (mp->mp_bh[i] == NULL)
 398                         break;
 399                 brelse(mp->mp_bh[i]);
 400         }
 401 }
 402
 403 /**
 404  * gfs2_extent_length - Returns length of an extent of blocks
 405  * @start: Start of the buffer
 406  * @len: Length of the buffer in bytes
 407  * @ptr: Current position in the buffer
 408  * @limit: Max extent length to return (0 = unlimited)
 409  * @eob: Set to 1 if we hit "end of block"
 410  *
 411  * If the first block is zero (unallocated) it will return the number of
 412  * unallocated blocks in the extent, otherwise it will return the number
 413  * of contiguous blocks in the extent.
 414  *
 415  * Returns: The length of the extent (minimum of one block)
 416  */
 417
 418 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
 419 {
 420         const __be64 *end = (start + len);
 421         const __be64 *first = ptr;
 422         u64 d = be64_to_cpu(*ptr);
 423
 424         *eob = 0;
 425         do {
 426                 ptr++;
 427                 if (ptr >= end)
 428                         break;
 429                 if (limit && --limit == 0)
 430                         break;
 431                 if (d)
 432                         d++;
 433         } while(be64_to_cpu(*ptr) == d);
 434         if (ptr >= end)
 435                 *eob = 1;
 436         return (ptr - first);
 437 }
 438
 439 typedef const __be64 *(*gfs2_metadata_walker)(
 440                 struct metapath *mp,
 441                 const __be64 *start, const __be64 *end,
 442                 u64 factor, void *data);
 443
 444 #define WALK_STOP ((__be64 *)0)
 445 #define WALK_NEXT ((__be64 *)1)
 446
 447 static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
 448                 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
 449                 void *data)
 450 {
 451         struct metapath clone;
 452         struct gfs2_inode *ip = GFS2_I(inode);
 453         struct gfs2_sbd *sdp = GFS2_SB(inode);
 454         const __be64 *start, *end, *ptr;
 455         u64 factor = 1;
 456         unsigned int hgt;
 457         int ret = 0;
 458
 459         for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
 460                 factor *= sdp->sd_inptrs;
 461
 462         for (;;) {
 463                 u64 step;
 464
 465                 /* Walk indirect block. */
 466                 start = metapointer(hgt, mp);
 467                 end = metaend(hgt, mp);
 468
 469                 step = (end - start) * factor;
 470                 if (step > len)
 471                         end = start + DIV_ROUND_UP_ULL(len, factor);
 472
 473                 ptr = walker(mp, start, end, factor, data);
 474                 if (ptr == WALK_STOP)
 475                         break;
 476                 if (step >= len)
 477                         break;
 478                 len -= step;
 479                 if (ptr != WALK_NEXT) {
 480                         BUG_ON(!*ptr);
 481                         mp->mp_list[hgt] += ptr - start;
 482                         goto fill_up_metapath;
 483                 }
 484
 485 lower_metapath:
 486                 /* Decrease height of metapath. */
 487                 if (mp != &clone) {
 488                         clone_metapath(&clone, mp);
 489                         mp = &clone;
 490                 }
 491                 brelse(mp->mp_bh[hgt]);
 492                 mp->mp_bh[hgt] = NULL;
 493                 if (!hgt)
 494                         break;
 495                 hgt--;
 496                 factor *= sdp->sd_inptrs;
 497
 498                 /* Advance in metadata tree. */
 499                 (mp->mp_list[hgt])++;
 500                 start = metapointer(hgt, mp);
 501                 end = metaend(hgt, mp);
 502                 if (start >= end) {
 503                         mp->mp_list[hgt] = 0;
 504                         if (!hgt)
 505                                 break;
 506                         goto lower_metapath;
 507                 }
 508
 509 fill_up_metapath:
 510                 /* Increase height of metapath. */
 511                 if (mp != &clone) {
 512                         clone_metapath(&clone, mp);
 513                         mp = &clone;
 514                 }
 515                 ret = fillup_metapath(ip, mp, ip->i_height - 1);
 516                 if (ret < 0)
 517                         break;
 518                 hgt += ret;
 519                 for (; ret; ret--)
 520                         do_div(factor, sdp->sd_inptrs);
 521                 mp->mp_aheight = hgt + 1;
 522         }
 523         if (mp == &clone)
 524                 release_metapath(mp);
 525         return ret;
 526 }
 527
 528 struct gfs2_hole_walker_args {
 529         u64 blocks;
 530 };
 531
 532 static const __be64 *gfs2_hole_walker(struct metapath *mp,
 533                 const __be64 *start, const __be64 *end,
 534                 u64 factor, void *data)
 535 {
 536         struct gfs2_hole_walker_args *args = data;
 537         const __be64 *ptr;
 538
 539         for (ptr = start; ptr < end; ptr++) {
 540                 if (*ptr) {
 541                         args->blocks += (ptr - start) * factor;
 542                         if (mp->mp_aheight == mp->mp_fheight)
 543                                 return WALK_STOP;
 544                         return ptr;  /* increase height */
 545                 }
 546         }
 547         args->blocks += (end - start) * factor;
 548         return WALK_NEXT;
 549 }
 550
 551 /**
 552  * gfs2_hole_size - figure out the size of a hole
 553  * @inode: The inode
 554  * @lblock: The logical starting block number
 555  * @len: How far to look (in blocks)
 556  * @mp: The metapath at lblock
 557  * @iomap: The iomap to store the hole size in
 558  *
 559  * This function modifies @mp.
 560  *
 561  * Returns: errno on error
 562  */
 563 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
 564                           struct metapath *mp, struct iomap *iomap)
 565 {
 566         struct gfs2_hole_walker_args args = { };
 567         int ret = 0;
 568
 569         ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
 570         if (!ret)
 571                 iomap->length = args.blocks << inode->i_blkbits;
 572         return ret;
 573 }
 574
 575 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 576                                          struct gfs2_glock *gl, unsigned int i,
 577                                          unsigned offset, u64 bn)
 578 {
 579         __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 580                        ((i > 1) ? sizeof(struct gfs2_meta_header) :
 581                                  sizeof(struct gfs2_dinode)));
 582         BUG_ON(i < 1);
 583         BUG_ON(mp->mp_bh[i] != NULL);
 584         mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 585         gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 586         gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 587         gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 588         ptr += offset;
 589         *ptr = cpu_to_be64(bn);
 590         return ptr;
 591 }
 592
 593 enum alloc_state {
 594         ALLOC_DATA = 0,
 595         ALLOC_GROW_DEPTH = 1,
 596         ALLOC_GROW_HEIGHT = 2,
 597         /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 598 };
 599
 600 /**
 601  * gfs2_iomap_alloc - Build a metadata tree of the requested height
 602  * @inode: The GFS2 inode
 603  * @iomap: The iomap structure
 604  * @flags: iomap flags
 605  * @mp: The metapath, with proper height information calculated
 606  *
 607  * In this routine we may have to alloc:
 608  *   i) Indirect blocks to grow the metadata tree height
 609  *  ii) Indirect blocks to fill in lower part of the metadata tree
 610  * iii) Data blocks
 611  *
 612  * The function is in two parts. The first part works out the total
 613  * number of blocks which we need. The second part does the actual
 614  * allocation asking for an extent at a time (if enough contiguous free
 615  * blocks are available, there will only be one request per bmap call)
 616  * and uses the state machine to initialise the blocks in order.
 617  *
 618  * Right now, this function will allocate at most one indirect block
 619  * worth of data -- with a default block size of 4K, that's slightly
 620  * less than 2M.  If this limitation is ever removed to allow huge
 621  * allocations, we would probably still want to limit the iomap size we
 622  * return to avoid stalling other tasks during huge writes; the next
 623  * iomap iteration would then find the blocks already allocated.
 624  *
 625  * Returns: errno on error
 626  */
 627
 628 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 629                             unsigned flags, struct metapath *mp)
 630 {
 631         struct gfs2_inode *ip = GFS2_I(inode);
 632         struct gfs2_sbd *sdp = GFS2_SB(inode);
 633         struct buffer_head *dibh = mp->mp_bh[0];
 634         u64 bn;
 635         unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 636         unsigned dblks = 0;
 637         unsigned ptrs_per_blk;
 638         const unsigned end_of_metadata = mp->mp_fheight - 1;
 639         int ret;
 640         enum alloc_state state;
 641         __be64 *ptr;
 642         __be64 zero_bn = 0;
 643         size_t maxlen = iomap->length >> inode->i_blkbits;
 644
 645         BUG_ON(mp->mp_aheight < 1);
 646         BUG_ON(dibh == NULL);
 647
 648         gfs2_trans_add_meta(ip->i_gl, dibh);
 649
 650         down_write(&ip->i_rw_mutex);
 651
 652         if (mp->mp_fheight == mp->mp_aheight) {
 653                 struct buffer_head *bh;
 654                 int eob;
 655
 656                 /* Bottom indirect block exists, find unalloced extent size */
 657                 ptr = metapointer(end_of_metadata, mp);
 658                 bh = mp->mp_bh[end_of_metadata];
 659                 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
 660                                            maxlen, &eob);
 661                 BUG_ON(dblks < 1);
 662                 state = ALLOC_DATA;
 663         } else {
 664                 /* Need to allocate indirect blocks */
 665                 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
 666                         sdp->sd_diptrs;
 667                 dblks = min(maxlen, (size_t)(ptrs_per_blk -
 668                                              mp->mp_list[end_of_metadata]));
 669                 if (mp->mp_fheight == ip->i_height) {
 670                         /* Writing into existing tree, extend tree down */
 671                         iblks = mp->mp_fheight - mp->mp_aheight;
 672                         state = ALLOC_GROW_DEPTH;
 673                 } else {
 674                         /* Building up tree height */
 675                         state = ALLOC_GROW_HEIGHT;
 676                         iblks = mp->mp_fheight - ip->i_height;
 677                         branch_start = metapath_branch_start(mp);
 678                         iblks += (mp->mp_fheight - branch_start);
 679                 }
 680         }
 681
 682         /* start of the second part of the function (state machine) */
 683
 684         blks = dblks + iblks;
 685         i = mp->mp_aheight;
 686         do {
 687                 n = blks - alloced;
 688                 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 689                 if (ret)
 690                         goto out;
 691                 alloced += n;
 692                 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 693                         gfs2_trans_add_unrevoke(sdp, bn, n);
 694                 switch (state) {
 695                 /* Growing height of tree */
 696                 case ALLOC_GROW_HEIGHT:
 697                         if (i == 1) {
 698                                 ptr = (__be64 *)(dibh->b_data +
 699                                                  sizeof(struct gfs2_dinode));
 700                                 zero_bn = *ptr;
 701                         }
 702                         for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 703                              i++, n--)
 704                                 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 705                         if (i - 1 == mp->mp_fheight - ip->i_height) {
 706                                 i--;
 707                                 gfs2_buffer_copy_tail(mp->mp_bh[i],
 708                                                 sizeof(struct gfs2_meta_header),
 709                                                 dibh, sizeof(struct gfs2_dinode));
 710                                 gfs2_buffer_clear_tail(dibh,
 711                                                 sizeof(struct gfs2_dinode) +
 712                                                 sizeof(__be64));
 713                                 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 714                                         sizeof(struct gfs2_meta_header));
 715                                 *ptr = zero_bn;
 716                                 state = ALLOC_GROW_DEPTH;
 717                                 for(i = branch_start; i < mp->mp_fheight; i++) {
 718                                         if (mp->mp_bh[i] == NULL)
 719                                                 break;
 720                                         brelse(mp->mp_bh[i]);
 721                                         mp->mp_bh[i] = NULL;
 722                                 }
 723                                 i = branch_start;
 724                         }
 725                         if (n == 0)
 726                                 break;
 727                 /* Branching from existing tree */
 728                 case ALLOC_GROW_DEPTH:
 729                         if (i > 1 && i < mp->mp_fheight)
 730                                 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 731                         for (; i < mp->mp_fheight && n > 0; i++, n--)
 732                                 gfs2_indirect_init(mp, ip->i_gl, i,
 733                                                    mp->mp_list[i-1], bn++);
 734                         if (i == mp->mp_fheight)
 735                                 state = ALLOC_DATA;
 736                         if (n == 0)
 737                                 break;
 738                 /* Tree complete, adding data blocks */
 739                 case ALLOC_DATA:
 740                         BUG_ON(n > dblks);
 741                         BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 742                         gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 743                         dblks = n;
 744                         ptr = metapointer(end_of_metadata, mp);
 745                         iomap->addr = bn << inode->i_blkbits;
 746                         iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
 747                         while (n-- > 0)
 748                                 *ptr++ = cpu_to_be64(bn++);
 749                         break;
 750                 }
 751         } while (iomap->addr == IOMAP_NULL_ADDR);
 752
 753         iomap->length = (u64)dblks << inode->i_blkbits;
 754         ip->i_height = mp->mp_fheight;
 755         gfs2_add_inode_blocks(&ip->i_inode, alloced);
 756         gfs2_dinode_out(ip, dibh->b_data);
 757 out:
 758         up_write(&ip->i_rw_mutex);
 759         return ret;
 760 }
 761
 762 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 763 {
 764         struct gfs2_inode *ip = GFS2_I(inode);
 765
 766         iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 767                       sizeof(struct gfs2_dinode);
 768         iomap->offset = 0;
 769         iomap->length = i_size_read(inode);
 770         iomap->type = IOMAP_INLINE;
 771 }
 772
 773 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
 774
 775 /**
 776  * gfs2_iomap_get - Map blocks from an inode to disk blocks
 777  * @inode: The inode
 778  * @pos: Starting position in bytes
 779  * @length: Length to map, in bytes
 780  * @flags: iomap flags
 781  * @iomap: The iomap structure
 782  * @mp: The metapath
 783  *
 784  * Returns: errno
 785  */
 786 static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 787                           unsigned flags, struct iomap *iomap,
 788                           struct metapath *mp)
 789 {
 790         struct gfs2_inode *ip = GFS2_I(inode);
 791         struct gfs2_sbd *sdp = GFS2_SB(inode);
 792         __be64 *ptr;
 793         sector_t lblock;
 794         sector_t lblock_stop;
 795         int ret;
 796         int eob;
 797         u64 len;
 798         struct buffer_head *bh;
 799         u8 height;
 800
 801         if (!length)
 802                 return -EINVAL;
 803
 804         if (gfs2_is_stuffed(ip)) {
 805                 if (flags & IOMAP_REPORT) {
 806                         if (pos >= i_size_read(inode))
 807                                 return -ENOENT;
 808                         gfs2_stuffed_iomap(inode, iomap);
 809                         return 0;
 810                 }
 811                 BUG_ON(!(flags & IOMAP_WRITE));
 812         }
 813         lblock = pos >> inode->i_blkbits;
 814         iomap->offset = lblock << inode->i_blkbits;
 815         lblock_stop = (pos + length - 1) >> inode->i_blkbits;
 816         len = lblock_stop - lblock + 1;
 817
 818         down_read(&ip->i_rw_mutex);
 819
 820         ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
 821         if (ret)
 822                 goto unlock;
 823
 824         height = ip->i_height;
 825         while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 826                 height++;
 827         find_metapath(sdp, lblock, mp, height);
 828         if (height > ip->i_height || gfs2_is_stuffed(ip))
 829                 goto do_alloc;
 830
 831         ret = lookup_metapath(ip, mp);
 832         if (ret)
 833                 goto unlock;
 834
 835         if (mp->mp_aheight != ip->i_height)
 836                 goto do_alloc;
 837
 838         ptr = metapointer(ip->i_height - 1, mp);
 839         if (*ptr == 0)
 840                 goto do_alloc;
 841
 842         bh = mp->mp_bh[ip->i_height - 1];
 843         len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
 844
 845         iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 846         iomap->length = len << inode->i_blkbits;
 847         iomap->type = IOMAP_MAPPED;
 848         iomap->flags = IOMAP_F_MERGED;
 849         if (eob)
 850                 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
 851
 852 out:
 853         iomap->bdev = inode->i_sb->s_bdev;
 854 unlock:
 855         up_read(&ip->i_rw_mutex);
 856         return ret;
 857
 858 do_alloc:
 859         iomap->addr = IOMAP_NULL_ADDR;
 860         iomap->length = len << inode->i_blkbits;
 861         iomap->type = IOMAP_HOLE;
 862         iomap->flags = 0;
 863         if (flags & IOMAP_REPORT) {
 864                 loff_t size = i_size_read(inode);
 865                 if (pos >= size)
 866                         ret = -ENOENT;
 867                 else if (height == ip->i_height)
 868                         ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 869                 else
 870                         iomap->length = size - pos;
 871         }
 872         goto out;
 873 }
 874
 875 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 876                             unsigned flags, struct iomap *iomap)
 877 {
 878         struct gfs2_inode *ip = GFS2_I(inode);
 879         struct metapath mp = { .mp_aheight = 1, };
 880         int ret;
 881
 882         trace_gfs2_iomap_start(ip, pos, length, flags);
 883         if (flags & IOMAP_WRITE) {
 884                 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
 885                 if (!ret && iomap->type == IOMAP_HOLE)
 886                         ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 887                 release_metapath(&mp);
 888         } else {
 889                 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
 890                 release_metapath(&mp);
 891         }
 892         trace_gfs2_iomap_end(ip, iomap, ret);
 893         return ret;
 894 }
 895
 896 const struct iomap_ops gfs2_iomap_ops = {
 897         .iomap_begin = gfs2_iomap_begin,
 898 };
 899
 900 /**
 901  * gfs2_block_map - Map one or more blocks of an inode to a disk block
 902  * @inode: The inode
 903  * @lblock: The logical block number
 904  * @bh_map: The bh to be mapped
 905  * @create: True if its ok to alloc blocks to satify the request
 906  *
 907  * The size of the requested mapping is defined in bh_map->b_size.
 908  *
 909  * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
 910  * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
 911  * bh_map->b_size to indicate the size of the mapping when @lblock and
 912  * successive blocks are mapped, up to the requested size.
 913  *
 914  * Sets buffer_boundary() if a read of metadata will be required
 915  * before the next block can be mapped. Sets buffer_new() if new
 916  * blocks were allocated.
 917  *
 918  * Returns: errno
 919  */
 920
 921 int gfs2_block_map(struct inode *inode, sector_t lblock,
 922                    struct buffer_head *bh_map, int create)
 923 {
 924         struct gfs2_inode *ip = GFS2_I(inode);
 925         loff_t pos = (loff_t)lblock << inode->i_blkbits;
 926         loff_t length = bh_map->b_size;
 927         struct metapath mp = { .mp_aheight = 1, };
 928         struct iomap iomap = { };
 929         int ret;
 930
 931         clear_buffer_mapped(bh_map);
 932         clear_buffer_new(bh_map);
 933         clear_buffer_boundary(bh_map);
 934         trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 935
 936         if (create) {
 937                 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
 938                 if (!ret && iomap.type == IOMAP_HOLE)
 939                         ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
 940                 release_metapath(&mp);
 941         } else {
 942                 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
 943                 release_metapath(&mp);
 944
 945                 /* Return unmapped buffer beyond the end of file. */
 946                 if (ret == -ENOENT) {
 947                         ret = 0;
 948                         goto out;
 949                 }
 950         }
 951         if (ret)
 952                 goto out;
 953
 954         if (iomap.length > bh_map->b_size) {
 955                 iomap.length = bh_map->b_size;
 956                 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
 957         }
 958         if (iomap.addr != IOMAP_NULL_ADDR)
 959                 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 960         bh_map->b_size = iomap.length;
 961         if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
 962                 set_buffer_boundary(bh_map);
 963         if (iomap.flags & IOMAP_F_NEW)
 964                 set_buffer_new(bh_map);
 965
 966 out:
 967         trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 968         return ret;
 969 }
 970
 971 /*
 972  * Deprecated: do not use in new code
 973  */
 974 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 975 {
 976         struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 977         int ret;
 978         int create = *new;
 979
 980         BUG_ON(!extlen);
 981         BUG_ON(!dblock);
 982         BUG_ON(!new);
 983
 984         bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 985         ret = gfs2_block_map(inode, lblock, &bh, create);
 986         *extlen = bh.b_size >> inode->i_blkbits;
 987         *dblock = bh.b_blocknr;
 988         if (buffer_new(&bh))
 989                 *new = 1;
 990         else
 991                 *new = 0;
 992         return ret;
 993 }
 994
 995 /**
 996  * gfs2_block_zero_range - Deal with zeroing out data
 997  *
 998  * This is partly borrowed from ext3.
 999  */
1000 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1001                                  unsigned int length)
1002 {
1003         struct address_space *mapping = inode->i_mapping;
1004         struct gfs2_inode *ip = GFS2_I(inode);
1005         unsigned long index = from >> PAGE_SHIFT;
1006         unsigned offset = from & (PAGE_SIZE-1);
1007         unsigned blocksize, iblock, pos;
1008         struct buffer_head *bh;
1009         struct page *page;
1010         int err;
1011
1012         page = find_or_create_page(mapping, index, GFP_NOFS);
1013         if (!page)
1014                 return 0;
1015
1016         blocksize = inode->i_sb->s_blocksize;
1017         iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
1018
1019         if (!page_has_buffers(page))
1020                 create_empty_buffers(page, blocksize, 0);
1021
1022         /* Find the buffer that contains "offset" */
1023         bh = page_buffers(page);
1024         pos = blocksize;
1025         while (offset >= pos) {
1026                 bh = bh->b_this_page;
1027                 iblock++;
1028                 pos += blocksize;
1029         }
1030
1031         err = 0;
1032
1033         if (!buffer_mapped(bh)) {
1034                 gfs2_block_map(inode, iblock, bh, 0);
1035                 /* unmapped? It's a hole - nothing to do */
1036                 if (!buffer_mapped(bh))
1037                         goto unlock;
1038         }
1039
1040         /* Ok, it's mapped. Make sure it's up-to-date */
1041         if (PageUptodate(page))
1042                 set_buffer_uptodate(bh);
1043
1044         if (!buffer_uptodate(bh)) {
1045                 err = -EIO;
1046                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1047                 wait_on_buffer(bh);
1048                 /* Uhhuh. Read error. Complain and punt. */
1049                 if (!buffer_uptodate(bh))
1050                         goto unlock;
1051                 err = 0;
1052         }
1053
1054         if (gfs2_is_jdata(ip))
1055                 gfs2_trans_add_data(ip->i_gl, bh);
1056         else
1057                 gfs2_ordered_add_inode(ip);
1058
1059         zero_user(page, offset, length);
1060         mark_buffer_dirty(bh);
1061 unlock:
1062         unlock_page(page);
1063         put_page(page);
1064         return err;
1065 }
1066
1067 #define GFS2_JTRUNC_REVOKES 8192
1068
1069 /**
1070  * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1071  * @inode: The inode being truncated
1072  * @oldsize: The original (larger) size
1073  * @newsize: The new smaller size
1074  *
1075  * With jdata files, we have to journal a revoke for each block which is
1076  * truncated. As a result, we need to split this into separate transactions
1077  * if the number of pages being truncated gets too large.
1078  */
1079
1080 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1081 {
1082         struct gfs2_sbd *sdp = GFS2_SB(inode);
1083         u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1084         u64 chunk;
1085         int error;
1086
1087         while (oldsize != newsize) {
1088                 struct gfs2_trans *tr;
1089                 unsigned int offs;
1090
1091                 chunk = oldsize - newsize;
1092                 if (chunk > max_chunk)
1093                         chunk = max_chunk;
1094
1095                 offs = oldsize & ~PAGE_MASK;
1096                 if (offs && chunk > PAGE_SIZE)
1097                         chunk = offs + ((chunk - offs) & PAGE_MASK);
1098
1099                 truncate_pagecache(inode, oldsize - chunk);
1100                 oldsize -= chunk;
1101
1102                 tr = current->journal_info;
1103                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1104                         continue;
1105
1106                 gfs2_trans_end(sdp);
1107                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1108                 if (error)
1109                         return error;
1110         }
1111
1112         return 0;
1113 }
1114
1115 static int trunc_start(struct inode *inode, u64 newsize)
1116 {
1117         struct gfs2_inode *ip = GFS2_I(inode);
1118         struct gfs2_sbd *sdp = GFS2_SB(inode);
1119         struct buffer_head *dibh = NULL;
1120         int journaled = gfs2_is_jdata(ip);
1121         u64 oldsize = inode->i_size;
1122         int error;
1123
1124         if (journaled)
1125                 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1126         else
1127                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1128         if (error)
1129                 return error;
1130
1131         error = gfs2_meta_inode_buffer(ip, &dibh);
1132         if (error)
1133                 goto out;
1134
1135         gfs2_trans_add_meta(ip->i_gl, dibh);
1136
1137         if (gfs2_is_stuffed(ip)) {
1138                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1139         } else {
1140                 unsigned int blocksize = i_blocksize(inode);
1141                 unsigned int offs = newsize & (blocksize - 1);
1142                 if (offs) {
1143                         error = gfs2_block_zero_range(inode, newsize,
1144                                                       blocksize - offs);
1145                         if (error)
1146                                 goto out;
1147                 }
1148                 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1149         }
1150
1151         i_size_write(inode, newsize);
1152         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1153         gfs2_dinode_out(ip, dibh->b_data);
1154
1155         if (journaled)
1156                 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1157         else
1158                 truncate_pagecache(inode, newsize);
1159
1160 out:
1161         brelse(dibh);
1162         if (current->journal_info)
1163                 gfs2_trans_end(sdp);
1164         return error;
1165 }
1166
1167 int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1168                          struct iomap *iomap)
1169 {
1170         struct metapath mp = { .mp_aheight = 1, };
1171         int ret;
1172
1173         ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1174         if (!ret && iomap->type == IOMAP_HOLE)
1175                 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1176         release_metapath(&mp);
1177         return ret;
1178 }
1179
1180 /**
1181  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1182  * @ip: inode
1183  * @rg_gh: holder of resource group glock
1184  * @bh: buffer head to sweep
1185  * @start: starting point in bh
1186  * @end: end point in bh
1187  * @meta: true if bh points to metadata (rather than data)
1188  * @btotal: place to keep count of total blocks freed
1189  *
1190  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1191  * free, and free them all. However, we do it one rgrp at a time. If this
1192  * block has references to multiple rgrps, we break it into individual
1193  * transactions. This allows other processes to use the rgrps while we're
1194  * focused on a single one, for better concurrency / performance.
1195  * At every transaction boundary, we rewrite the inode into the journal.
1196  * That way the bitmaps are kept consistent with the inode and we can recover
1197  * if we're interrupted by power-outages.
1198  *
1199  * Returns: 0, or return code if an error occurred.
1200  *          *btotal has the total number of blocks freed
1201  */
1202 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1203                               struct buffer_head *bh, __be64 *start, __be64 *end,
1204                               bool meta, u32 *btotal)
1205 {
1206         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1207         struct gfs2_rgrpd *rgd;
1208         struct gfs2_trans *tr;
1209         __be64 *p;
1210         int blks_outside_rgrp;
1211         u64 bn, bstart, isize_blks;
1212         s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1213         int ret = 0;
1214         bool buf_in_tr = false; /* buffer was added to transaction */
1215
1216 more_rgrps:
1217         rgd = NULL;
1218         if (gfs2_holder_initialized(rd_gh)) {
1219                 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1220                 gfs2_assert_withdraw(sdp,
1221                              gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1222         }
1223         blks_outside_rgrp = 0;
1224         bstart = 0;
1225         blen = 0;
1226
1227         for (p = start; p < end; p++) {
1228                 if (!*p)
1229                         continue;
1230                 bn = be64_to_cpu(*p);
1231
1232                 if (rgd) {
1233                         if (!rgrp_contains_block(rgd, bn)) {
1234                                 blks_outside_rgrp++;
1235                                 continue;
1236                         }
1237                 } else {
1238                         rgd = gfs2_blk2rgrpd(sdp, bn, true);
1239                         if (unlikely(!rgd)) {
1240                                 ret = -EIO;
1241                                 goto out;
1242                         }
1243                         ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1244                                                  0, rd_gh);
1245                         if (ret)
1246                                 goto out;
1247
1248                         /* Must be done with the rgrp glock held: */
1249                         if (gfs2_rs_active(&ip->i_res) &&
1250                             rgd == ip->i_res.rs_rbm.rgd)
1251                                 gfs2_rs_deltree(&ip->i_res);
1252                 }
1253
1254                 /* The size of our transactions will be unknown until we
1255                    actually process all the metadata blocks that relate to
1256                    the rgrp. So we estimate. We know it can't be more than
1257                    the dinode's i_blocks and we don't want to exceed the
1258                    journal flush threshold, sd_log_thresh2. */
1259                 if (current->journal_info == NULL) {
1260                         unsigned int jblocks_rqsted, revokes;
1261
1262                         jblocks_rqsted = rgd->rd_length + RES_DINODE +
1263                                 RES_INDIRECT;
1264                         isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1265                         if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1266                                 jblocks_rqsted +=
1267                                         atomic_read(&sdp->sd_log_thresh2);
1268                         else
1269                                 jblocks_rqsted += isize_blks;
1270                         revokes = jblocks_rqsted;
1271                         if (meta)
1272                                 revokes += end - start;
1273                         else if (ip->i_depth)
1274                                 revokes += sdp->sd_inptrs;
1275                         ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1276                         if (ret)
1277                                 goto out_unlock;
1278                         down_write(&ip->i_rw_mutex);
1279                 }
1280                 /* check if we will exceed the transaction blocks requested */
1281                 tr = current->journal_info;
1282                 if (tr->tr_num_buf_new + RES_STATFS +
1283                     RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1284                         /* We set blks_outside_rgrp to ensure the loop will
1285                            be repeated for the same rgrp, but with a new
1286                            transaction. */
1287                         blks_outside_rgrp++;
1288                         /* This next part is tricky. If the buffer was added
1289                            to the transaction, we've already set some block
1290                            pointers to 0, so we better follow through and free
1291                            them, or we will introduce corruption (so break).
1292                            This may be impossible, or at least rare, but I
1293                            decided to cover the case regardless.
1294
1295                            If the buffer was not added to the transaction
1296                            (this call), doing so would exceed our transaction
1297                            size, so we need to end the transaction and start a
1298                            new one (so goto). */
1299
1300                         if (buf_in_tr)
1301                                 break;
1302                         goto out_unlock;
1303                 }
1304
1305                 gfs2_trans_add_meta(ip->i_gl, bh);
1306                 buf_in_tr = true;
1307                 *p = 0;
1308                 if (bstart + blen == bn) {
1309                         blen++;
1310                         continue;
1311                 }
1312                 if (bstart) {
1313                         __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1314                         (*btotal) += blen;
1315                         gfs2_add_inode_blocks(&ip->i_inode, -blen);
1316                 }
1317                 bstart = bn;
1318                 blen = 1;
1319         }
1320         if (bstart) {
1321                 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1322                 (*btotal) += blen;
1323                 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1324         }
1325 out_unlock:
1326         if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1327                                             outside the rgrp we just processed,
1328                                             do it all over again. */
1329                 if (current->journal_info) {
1330                         struct buffer_head *dibh;
1331
1332                         ret = gfs2_meta_inode_buffer(ip, &dibh);
1333                         if (ret)
1334                                 goto out;
1335
1336                         /* Every transaction boundary, we rewrite the dinode
1337                            to keep its di_blocks current in case of failure. */
1338                         ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1339                                 current_time(&ip->i_inode);
1340                         gfs2_trans_add_meta(ip->i_gl, dibh);
1341                         gfs2_dinode_out(ip, dibh->b_data);
1342                         brelse(dibh);
1343                         up_write(&ip->i_rw_mutex);
1344                         gfs2_trans_end(sdp);
1345                 }
1346                 gfs2_glock_dq_uninit(rd_gh);
1347                 cond_resched();
1348                 goto more_rgrps;
1349         }
1350 out:
1351         return ret;
1352 }
1353
1354 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1355 {
1356         if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1357                 return false;
1358         return true;
1359 }
1360
1361 /**
1362  * find_nonnull_ptr - find a non-null pointer given a metapath and height
1363  * @mp: starting metapath
1364  * @h: desired height to search
1365  *
1366  * Assumes the metapath is valid (with buffers) out to height h.
1367  * Returns: true if a non-null pointer was found in the metapath buffer
1368  *          false if all remaining pointers are NULL in the buffer
1369  */
1370 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1371                              unsigned int h,
1372                              __u16 *end_list, unsigned int end_aligned)
1373 {
1374         struct buffer_head *bh = mp->mp_bh[h];
1375         __be64 *first, *ptr, *end;
1376
1377         first = metaptr1(h, mp);
1378         ptr = first + mp->mp_list[h];
1379         end = (__be64 *)(bh->b_data + bh->b_size);
1380         if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1381                 bool keep_end = h < end_aligned;
1382                 end = first + end_list[h] + keep_end;
1383         }
1384
1385         while (ptr < end) {
1386                 if (*ptr) { /* if we have a non-null pointer */
1387                         mp->mp_list[h] = ptr - first;
1388                         h++;
1389                         if (h < GFS2_MAX_META_HEIGHT)
1390                                 mp->mp_list[h] = 0;
1391                         return true;
1392                 }
1393                 ptr++;
1394         }
1395         return false;
1396 }
1397
1398 enum dealloc_states {
1399         DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1400         DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1401         DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1402         DEALLOC_DONE = 3,       /* process complete */
1403 };
1404
1405 static inline void
1406 metapointer_range(struct metapath *mp, int height,
1407                   __u16 *start_list, unsigned int start_aligned,
1408                   __u16 *end_list, unsigned int end_aligned,
1409                   __be64 **start, __be64 **end)
1410 {
1411         struct buffer_head *bh = mp->mp_bh[height];
1412         __be64 *first;
1413
1414         first = metaptr1(height, mp);
1415         *start = first;
1416         if (mp_eq_to_hgt(mp, start_list, height)) {
1417                 bool keep_start = height < start_aligned;
1418                 *start = first + start_list[height] + keep_start;
1419         }
1420         *end = (__be64 *)(bh->b_data + bh->b_size);
1421         if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1422                 bool keep_end = height < end_aligned;
1423                 *end = first + end_list[height] + keep_end;
1424         }
1425 }
1426
1427 static inline bool walk_done(struct gfs2_sbd *sdp,
1428                              struct metapath *mp, int height,
1429                              __u16 *end_list, unsigned int end_aligned)
1430 {
1431         __u16 end;
1432
1433         if (end_list) {
1434                 bool keep_end = height < end_aligned;
1435                 if (!mp_eq_to_hgt(mp, end_list, height))
1436                         return false;
1437                 end = end_list[height] + keep_end;
1438         } else
1439                 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1440         return mp->mp_list[height] >= end;
1441 }
1442
1443 /**
1444  * punch_hole - deallocate blocks in a file
1445  * @ip: inode to truncate
1446  * @offset: the start of the hole
1447  * @length: the size of the hole (or 0 for truncate)
1448  *
1449  * Punch a hole into a file or truncate a file at a given position.  This
1450  * function operates in whole blocks (@offset and @length are rounded
1451  * accordingly); partially filled blocks must be cleared otherwise.
1452  *
1453  * This function works from the bottom up, and from the right to the left. In
1454  * other words, it strips off the highest layer (data) before stripping any of
1455  * the metadata. Doing it this way is best in case the operation is interrupted
1456  * by power failure, etc.  The dinode is rewritten in every transaction to
1457  * guarantee integrity.
1458  */
1459 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1460 {
1461         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1462         u64 maxsize = sdp->sd_heightsize[ip->i_height];
1463         struct metapath mp = {};
1464         struct buffer_head *dibh, *bh;
1465         struct gfs2_holder rd_gh;
1466         unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1467         u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1468         __u16 start_list[GFS2_MAX_META_HEIGHT];
1469         __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1470         unsigned int start_aligned, uninitialized_var(end_aligned);
1471         unsigned int strip_h = ip->i_height - 1;
1472         u32 btotal = 0;
1473         int ret, state;
1474         int mp_h; /* metapath buffers are read in to this height */
1475         u64 prev_bnr = 0;
1476         __be64 *start, *end;
1477
1478         if (offset >= maxsize) {
1479                 /*
1480                  * The starting point lies beyond the allocated meta-data;
1481                  * there are no blocks do deallocate.
1482                  */
1483                 return 0;
1484         }
1485
1486         /*
1487          * The start position of the hole is defined by lblock, start_list, and
1488          * start_aligned.  The end position of the hole is defined by lend,
1489          * end_list, and end_aligned.
1490          *
1491          * start_aligned and end_aligned define down to which height the start
1492          * and end positions are aligned to the metadata tree (i.e., the
1493          * position is a multiple of the metadata granularity at the height
1494          * above).  This determines at which heights additional meta pointers
1495          * needs to be preserved for the remaining data.
1496          */
1497
1498         if (length) {
1499                 u64 end_offset = offset + length;
1500                 u64 lend;
1501
1502                 /*
1503                  * Clip the end at the maximum file size for the given height:
1504                  * that's how far the metadata goes; files bigger than that
1505                  * will have additional layers of indirection.
1506                  */
1507                 if (end_offset > maxsize)
1508                         end_offset = maxsize;
1509                 lend = end_offset >> bsize_shift;
1510
1511                 if (lblock >= lend)
1512                         return 0;
1513
1514                 find_metapath(sdp, lend, &mp, ip->i_height);
1515                 end_list = __end_list;
1516                 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1517
1518                 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1519                         if (end_list[mp_h])
1520                                 break;
1521                 }
1522                 end_aligned = mp_h;
1523         }
1524
1525         find_metapath(sdp, lblock, &mp, ip->i_height);
1526         memcpy(start_list, mp.mp_list, sizeof(start_list));
1527
1528         for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1529                 if (start_list[mp_h])
1530                         break;
1531         }
1532         start_aligned = mp_h;
1533
1534         ret = gfs2_meta_inode_buffer(ip, &dibh);
1535         if (ret)
1536                 return ret;
1537
1538         mp.mp_bh[0] = dibh;
1539         ret = lookup_metapath(ip, &mp);
1540         if (ret)
1541                 goto out_metapath;
1542
1543         /* issue read-ahead on metadata */
1544         for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1545                 metapointer_range(&mp, mp_h, start_list, start_aligned,
1546                                   end_list, end_aligned, &start, &end);
1547                 gfs2_metapath_ra(ip->i_gl, start, end);
1548         }
1549
1550         if (mp.mp_aheight == ip->i_height)
1551                 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1552         else
1553                 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1554
1555         ret = gfs2_rindex_update(sdp);
1556         if (ret)
1557                 goto out_metapath;
1558
1559         ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1560         if (ret)
1561                 goto out_metapath;
1562         gfs2_holder_mark_uninitialized(&rd_gh);
1563
1564         mp_h = strip_h;
1565
1566         while (state != DEALLOC_DONE) {
1567                 switch (state) {
1568                 /* Truncate a full metapath at the given strip height.
1569                  * Note that strip_h == mp_h in order to be in this state. */
1570                 case DEALLOC_MP_FULL:
1571                         bh = mp.mp_bh[mp_h];
1572                         gfs2_assert_withdraw(sdp, bh);
1573                         if (gfs2_assert_withdraw(sdp,
1574                                                  prev_bnr != bh->b_blocknr)) {
1575                                 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1576                                        "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1577                                        sdp->sd_fsname,
1578                                        (unsigned long long)ip->i_no_addr,
1579                                        prev_bnr, ip->i_height, strip_h, mp_h);
1580                         }
1581                         prev_bnr = bh->b_blocknr;
1582
1583                         if (gfs2_metatype_check(sdp, bh,
1584                                                 (mp_h ? GFS2_METATYPE_IN :
1585                                                         GFS2_METATYPE_DI))) {
1586                                 ret = -EIO;
1587                                 goto out;
1588                         }
1589
1590                         /*
1591                          * Below, passing end_aligned as 0 gives us the
1592                          * metapointer range excluding the end point: the end
1593                          * point is the first metapath we must not deallocate!
1594                          */
1595
1596                         metapointer_range(&mp, mp_h, start_list, start_aligned,
1597                                           end_list, 0 /* end_aligned */,
1598                                           &start, &end);
1599                         ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1600                                                  start, end,
1601                                                  mp_h != ip->i_height - 1,
1602                                                  &btotal);
1603
1604                         /* If we hit an error or just swept dinode buffer,
1605                            just exit. */
1606                         if (ret || !mp_h) {
1607                                 state = DEALLOC_DONE;
1608                                 break;
1609                         }
1610                         state = DEALLOC_MP_LOWER;
1611                         break;
1612
1613                 /* lower the metapath strip height */
1614                 case DEALLOC_MP_LOWER:
1615                         /* We're done with the current buffer, so release it,
1616                            unless it's the dinode buffer. Then back up to the
1617                            previous pointer. */
1618                         if (mp_h) {
1619                                 brelse(mp.mp_bh[mp_h]);
1620                                 mp.mp_bh[mp_h] = NULL;
1621                         }
1622                         /* If we can't get any lower in height, we've stripped
1623                            off all we can. Next step is to back up and start
1624                            stripping the previous level of metadata. */
1625                         if (mp_h == 0) {
1626                                 strip_h--;
1627                                 memcpy(mp.mp_list, start_list, sizeof(start_list));
1628                                 mp_h = strip_h;
1629                                 state = DEALLOC_FILL_MP;
1630                                 break;
1631                         }
1632                         mp.mp_list[mp_h] = 0;
1633                         mp_h--; /* search one metadata height down */
1634                         mp.mp_list[mp_h]++;
1635                         if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1636                                 break;
1637                         /* Here we've found a part of the metapath that is not
1638                          * allocated. We need to search at that height for the
1639                          * next non-null pointer. */
1640                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1641                                 state = DEALLOC_FILL_MP;
1642                                 mp_h++;
1643                         }
1644                         /* No more non-null pointers at this height. Back up
1645                            to the previous height and try again. */
1646                         break; /* loop around in the same state */
1647
1648                 /* Fill the metapath with buffers to the given height. */
1649                 case DEALLOC_FILL_MP:
1650                         /* Fill the buffers out to the current height. */
1651                         ret = fillup_metapath(ip, &mp, mp_h);
1652                         if (ret < 0)
1653                                 goto out;
1654
1655                         /* issue read-ahead on metadata */
1656                         if (mp.mp_aheight > 1) {
1657                                 for (; ret > 1; ret--) {
1658                                         metapointer_range(&mp, mp.mp_aheight - ret,
1659                                                           start_list, start_aligned,
1660                                                           end_list, end_aligned,
1661                                                           &start, &end);
1662                                         gfs2_metapath_ra(ip->i_gl, start, end);
1663                                 }
1664                         }
1665
1666                         /* If buffers found for the entire strip height */
1667                         if (mp.mp_aheight - 1 == strip_h) {
1668                                 state = DEALLOC_MP_FULL;
1669                                 break;
1670                         }
1671                         if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1672                                 mp_h = mp.mp_aheight - 1;
1673
1674                         /* If we find a non-null block pointer, crawl a bit
1675                            higher up in the metapath and try again, otherwise
1676                            we need to look lower for a new starting point. */
1677                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1678                                 mp_h++;
1679                         else
1680                                 state = DEALLOC_MP_LOWER;
1681                         break;
1682                 }
1683         }
1684
1685         if (btotal) {
1686                 if (current->journal_info == NULL) {
1687                         ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1688                                                RES_QUOTA, 0);
1689                         if (ret)
1690                                 goto out;
1691                         down_write(&ip->i_rw_mutex);
1692                 }
1693                 gfs2_statfs_change(sdp, 0, +btotal, 0);
1694                 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1695                                   ip->i_inode.i_gid);
1696                 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1697                 gfs2_trans_add_meta(ip->i_gl, dibh);
1698                 gfs2_dinode_out(ip, dibh->b_data);
1699                 up_write(&ip->i_rw_mutex);
1700                 gfs2_trans_end(sdp);
1701         }
1702
1703 out:
1704         if (gfs2_holder_initialized(&rd_gh))
1705                 gfs2_glock_dq_uninit(&rd_gh);
1706         if (current->journal_info) {
1707                 up_write(&ip->i_rw_mutex);
1708                 gfs2_trans_end(sdp);
1709                 cond_resched();
1710         }
1711         gfs2_quota_unhold(ip);
1712 out_metapath:
1713         release_metapath(&mp);
1714         return ret;
1715 }
1716
1717 static int trunc_end(struct gfs2_inode *ip)
1718 {
1719         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1720         struct buffer_head *dibh;
1721         int error;
1722
1723         error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1724         if (error)
1725                 return error;
1726
1727         down_write(&ip->i_rw_mutex);
1728
1729         error = gfs2_meta_inode_buffer(ip, &dibh);
1730         if (error)
1731                 goto out;
1732
1733         if (!i_size_read(&ip->i_inode)) {
1734                 ip->i_height = 0;
1735                 ip->i_goal = ip->i_no_addr;
1736                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1737                 gfs2_ordered_del_inode(ip);
1738         }
1739         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1740         ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1741
1742         gfs2_trans_add_meta(ip->i_gl, dibh);
1743         gfs2_dinode_out(ip, dibh->b_data);
1744         brelse(dibh);
1745
1746 out:
1747         up_write(&ip->i_rw_mutex);
1748         gfs2_trans_end(sdp);
1749         return error;
1750 }
1751
1752 /**
1753  * do_shrink - make a file smaller
1754  * @inode: the inode
1755  * @newsize: the size to make the file
1756  *
1757  * Called with an exclusive lock on @inode. The @size must
1758  * be equal to or smaller than the current inode size.
1759  *
1760  * Returns: errno
1761  */
1762
1763 static int do_shrink(struct inode *inode, u64 newsize)
1764 {
1765         struct gfs2_inode *ip = GFS2_I(inode);
1766         int error;
1767
1768         error = trunc_start(inode, newsize);
1769         if (error < 0)
1770                 return error;
1771         if (gfs2_is_stuffed(ip))
1772                 return 0;
1773
1774         error = punch_hole(ip, newsize, 0);
1775         if (error == 0)
1776                 error = trunc_end(ip);
1777
1778         return error;
1779 }
1780
1781 void gfs2_trim_blocks(struct inode *inode)
1782 {
1783         int ret;
1784
1785         ret = do_shrink(inode, inode->i_size);
1786         WARN_ON(ret != 0);
1787 }
1788
1789 /**
1790  * do_grow - Touch and update inode size
1791  * @inode: The inode
1792  * @size: The new size
1793  *
1794  * This function updates the timestamps on the inode and
1795  * may also increase the size of the inode. This function
1796  * must not be called with @size any smaller than the current
1797  * inode size.
1798  *
1799  * Although it is not strictly required to unstuff files here,
1800  * earlier versions of GFS2 have a bug in the stuffed file reading
1801  * code which will result in a buffer overrun if the size is larger
1802  * than the max stuffed file size. In order to prevent this from
1803  * occurring, such files are unstuffed, but in other cases we can
1804  * just update the inode size directly.
1805  *
1806  * Returns: 0 on success, or -ve on error
1807  */
1808
1809 static int do_grow(struct inode *inode, u64 size)
1810 {
1811         struct gfs2_inode *ip = GFS2_I(inode);
1812         struct gfs2_sbd *sdp = GFS2_SB(inode);
1813         struct gfs2_alloc_parms ap = { .target = 1, };
1814         struct buffer_head *dibh;
1815         int error;
1816         int unstuff = 0;
1817
1818         if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1819                 error = gfs2_quota_lock_check(ip, &ap);
1820                 if (error)
1821                         return error;
1822
1823                 error = gfs2_inplace_reserve(ip, &ap);
1824                 if (error)
1825                         goto do_grow_qunlock;
1826                 unstuff = 1;
1827         }
1828
1829         error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1830                                  (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1831                                   0 : RES_QUOTA), 0);
1832         if (error)
1833                 goto do_grow_release;
1834
1835         if (unstuff) {
1836                 error = gfs2_unstuff_dinode(ip, NULL);
1837                 if (error)
1838                         goto do_end_trans;
1839         }
1840
1841         error = gfs2_meta_inode_buffer(ip, &dibh);
1842         if (error)
1843                 goto do_end_trans;
1844
1845         i_size_write(inode, size);
1846         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1847         gfs2_trans_add_meta(ip->i_gl, dibh);
1848         gfs2_dinode_out(ip, dibh->b_data);
1849         brelse(dibh);
1850
1851 do_end_trans:
1852         gfs2_trans_end(sdp);
1853 do_grow_release:
1854         if (unstuff) {
1855                 gfs2_inplace_release(ip);
1856 do_grow_qunlock:
1857                 gfs2_quota_unlock(ip);
1858         }
1859         return error;
1860 }
1861
1862 /**
1863  * gfs2_setattr_size - make a file a given size
1864  * @inode: the inode
1865  * @newsize: the size to make the file
1866  *
1867  * The file size can grow, shrink, or stay the same size. This
1868  * is called holding i_rwsem and an exclusive glock on the inode
1869  * in question.
1870  *
1871  * Returns: errno
1872  */
1873
1874 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1875 {
1876         struct gfs2_inode *ip = GFS2_I(inode);
1877         int ret;
1878
1879         BUG_ON(!S_ISREG(inode->i_mode));
1880
1881         ret = inode_newsize_ok(inode, newsize);
1882         if (ret)
1883                 return ret;
1884
1885         inode_dio_wait(inode);
1886
1887         ret = gfs2_rsqa_alloc(ip);
1888         if (ret)
1889                 goto out;
1890
1891         if (newsize >= inode->i_size) {
1892                 ret = do_grow(inode, newsize);
1893                 goto out;
1894         }
1895
1896         ret = do_shrink(inode, newsize);
1897 out:
1898         gfs2_rsqa_delete(ip, NULL);
1899         return ret;
1900 }
1901
1902 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1903 {
1904         int error;
1905         error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1906         if (!error)
1907                 error = trunc_end(ip);
1908         return error;
1909 }
1910
1911 int gfs2_file_dealloc(struct gfs2_inode *ip)
1912 {
1913         return punch_hole(ip, 0, 0);
1914 }
1915
1916 /**
1917  * gfs2_free_journal_extents - Free cached journal bmap info
1918  * @jd: The journal
1919  *
1920  */
1921
1922 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1923 {
1924         struct gfs2_journal_extent *jext;
1925
1926         while(!list_empty(&jd->extent_list)) {
1927                 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1928                 list_del(&jext->list);
1929                 kfree(jext);
1930         }
1931 }
1932
1933 /**
1934  * gfs2_add_jextent - Add or merge a new extent to extent cache
1935  * @jd: The journal descriptor
1936  * @lblock: The logical block at start of new extent
1937  * @dblock: The physical block at start of new extent
1938  * @blocks: Size of extent in fs blocks
1939  *
1940  * Returns: 0 on success or -ENOMEM
1941  */
1942
1943 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1944 {
1945         struct gfs2_journal_extent *jext;
1946
1947         if (!list_empty(&jd->extent_list)) {
1948                 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1949                 if ((jext->dblock + jext->blocks) == dblock) {
1950                         jext->blocks += blocks;
1951                         return 0;
1952                 }
1953         }
1954
1955         jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1956         if (jext == NULL)
1957                 return -ENOMEM;
1958         jext->dblock = dblock;
1959         jext->lblock = lblock;
1960         jext->blocks = blocks;
1961         list_add_tail(&jext->list, &jd->extent_list);
1962         jd->nr_extents++;
1963         return 0;
1964 }
1965
1966 /**
1967  * gfs2_map_journal_extents - Cache journal bmap info
1968  * @sdp: The super block
1969  * @jd: The journal to map
1970  *
1971  * Create a reusable "extent" mapping from all logical
1972  * blocks to all physical blocks for the given journal.  This will save
1973  * us time when writing journal blocks.  Most journals will have only one
1974  * extent that maps all their logical blocks.  That's because gfs2.mkfs
1975  * arranges the journal blocks sequentially to maximize performance.
1976  * So the extent would map the first block for the entire file length.
1977  * However, gfs2_jadd can happen while file activity is happening, so
1978  * those journals may not be sequential.  Less likely is the case where
1979  * the users created their own journals by mounting the metafs and
1980  * laying it out.  But it's still possible.  These journals might have
1981  * several extents.
1982  *
1983  * Returns: 0 on success, or error on failure
1984  */
1985
1986 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1987 {
1988         u64 lblock = 0;
1989         u64 lblock_stop;
1990         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1991         struct buffer_head bh;
1992         unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1993         u64 size;
1994         int rc;
1995
1996         lblock_stop = i_size_read(jd->jd_inode) >> shift;
1997         size = (lblock_stop - lblock) << shift;
1998         jd->nr_extents = 0;
1999         WARN_ON(!list_empty(&jd->extent_list));
2000
2001         do {
2002                 bh.b_state = 0;
2003                 bh.b_blocknr = 0;
2004                 bh.b_size = size;
2005                 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2006                 if (rc || !buffer_mapped(&bh))
2007                         goto fail;
2008                 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2009                 if (rc)
2010                         goto fail;
2011                 size -= bh.b_size;
2012                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2013         } while(size > 0);
2014
2015         fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
2016                 jd->nr_extents);
2017         return 0;
2018
2019 fail:
2020         fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2021                 rc, jd->jd_jid,
2022                 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2023                 jd->nr_extents);
2024         fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2025                 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2026                 bh.b_state, (unsigned long long)bh.b_size);
2027         gfs2_free_journal_extents(jd);
2028         return rc;
2029 }
2030
2031 /**
2032  * gfs2_write_alloc_required - figure out if a write will require an allocation
2033  * @ip: the file being written to
2034  * @offset: the offset to write to
2035  * @len: the number of bytes being written
2036  *
2037  * Returns: 1 if an alloc is required, 0 otherwise
2038  */
2039
2040 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2041                               unsigned int len)
2042 {
2043         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2044         struct buffer_head bh;
2045         unsigned int shift;
2046         u64 lblock, lblock_stop, size;
2047         u64 end_of_file;
2048
2049         if (!len)
2050                 return 0;
2051
2052         if (gfs2_is_stuffed(ip)) {
2053                 if (offset + len > gfs2_max_stuffed_size(ip))
2054                         return 1;
2055                 return 0;
2056         }
2057
2058         shift = sdp->sd_sb.sb_bsize_shift;
2059         BUG_ON(gfs2_is_dir(ip));
2060         end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2061         lblock = offset >> shift;
2062         lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2063         if (lblock_stop > end_of_file)
2064                 return 1;
2065
2066         size = (lblock_stop - lblock) << shift;
2067         do {
2068                 bh.b_state = 0;
2069                 bh.b_size = size;
2070                 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2071                 if (!buffer_mapped(&bh))
2072                         return 1;
2073                 size -= bh.b_size;
2074                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2075         } while(size > 0);
2076
2077         return 0;
2078 }
2079
2080 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2081 {
2082         struct gfs2_inode *ip = GFS2_I(inode);
2083         struct buffer_head *dibh;
2084         int error;
2085
2086         if (offset >= inode->i_size)
2087                 return 0;
2088         if (offset + length > inode->i_size)
2089                 length = inode->i_size - offset;
2090
2091         error = gfs2_meta_inode_buffer(ip, &dibh);
2092         if (error)
2093                 return error;
2094         gfs2_trans_add_meta(ip->i_gl, dibh);
2095         memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2096                length);
2097         brelse(dibh);
2098         return 0;
2099 }
2100
2101 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2102                                          loff_t length)
2103 {
2104         struct gfs2_sbd *sdp = GFS2_SB(inode);
2105         loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2106         int error;
2107
2108         while (length) {
2109                 struct gfs2_trans *tr;
2110                 loff_t chunk;
2111                 unsigned int offs;
2112
2113                 chunk = length;
2114                 if (chunk > max_chunk)
2115                         chunk = max_chunk;
2116
2117                 offs = offset & ~PAGE_MASK;
2118                 if (offs && chunk > PAGE_SIZE)
2119                         chunk = offs + ((chunk - offs) & PAGE_MASK);
2120
2121                 truncate_pagecache_range(inode, offset, chunk);
2122                 offset += chunk;
2123                 length -= chunk;
2124
2125                 tr = current->journal_info;
2126                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2127                         continue;
2128
2129                 gfs2_trans_end(sdp);
2130                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2131                 if (error)
2132                         return error;
2133         }
2134         return 0;
2135 }
2136
2137 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2138 {
2139         struct inode *inode = file_inode(file);
2140         struct gfs2_inode *ip = GFS2_I(inode);
2141         struct gfs2_sbd *sdp = GFS2_SB(inode);
2142         int error;
2143
2144         if (gfs2_is_jdata(ip))
2145                 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2146                                          GFS2_JTRUNC_REVOKES);
2147         else
2148                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2149         if (error)
2150                 return error;
2151
2152         if (gfs2_is_stuffed(ip)) {
2153                 error = stuffed_zero_range(inode, offset, length);
2154                 if (error)
2155                         goto out;
2156         } else {
2157                 unsigned int start_off, end_off, blocksize;
2158
2159                 blocksize = i_blocksize(inode);
2160                 start_off = offset & (blocksize - 1);
2161                 end_off = (offset + length) & (blocksize - 1);
2162                 if (start_off) {
2163                         unsigned int len = length;
2164                         if (length > blocksize - start_off)
2165                                 len = blocksize - start_off;
2166                         error = gfs2_block_zero_range(inode, offset, len);
2167                         if (error)
2168                                 goto out;
2169                         if (start_off + length < blocksize)
2170                                 end_off = 0;
2171                 }
2172                 if (end_off) {
2173                         error = gfs2_block_zero_range(inode,
2174                                 offset + length - end_off, end_off);
2175                         if (error)
2176                                 goto out;
2177                 }
2178         }
2179
2180         if (gfs2_is_jdata(ip)) {
2181                 BUG_ON(!current->journal_info);
2182                 gfs2_journaled_truncate_range(inode, offset, length);
2183         } else
2184                 truncate_pagecache_range(inode, offset, offset + length - 1);
2185
2186         file_update_time(file);
2187         mark_inode_dirty(inode);
2188
2189         if (current->journal_info)
2190                 gfs2_trans_end(sdp);
2191
2192         if (!gfs2_is_stuffed(ip))
2193                 error = punch_hole(ip, offset, length);
2194
2195 out:
2196         if (current->journal_info)
2197                 gfs2_trans_end(sdp);
2198         return error;
2199 }