Merge branch 'kvm-fixes' into 'next'
[linux-2.6-microblaze.git] / fs / xfs / libxfs / xfs_bmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_dir2.h"
17 #include "xfs_inode.h"
18 #include "xfs_btree.h"
19 #include "xfs_trans.h"
20 #include "xfs_alloc.h"
21 #include "xfs_bmap.h"
22 #include "xfs_bmap_util.h"
23 #include "xfs_bmap_btree.h"
24 #include "xfs_rtalloc.h"
25 #include "xfs_errortag.h"
26 #include "xfs_error.h"
27 #include "xfs_quota.h"
28 #include "xfs_trans_space.h"
29 #include "xfs_buf_item.h"
30 #include "xfs_trace.h"
31 #include "xfs_attr_leaf.h"
32 #include "xfs_filestream.h"
33 #include "xfs_rmap.h"
34 #include "xfs_ag_resv.h"
35 #include "xfs_refcount.h"
36 #include "xfs_icache.h"
37 #include "xfs_iomap.h"
38
39
40 kmem_zone_t             *xfs_bmap_free_item_zone;
41
42 /*
43  * Miscellaneous helper functions
44  */
45
46 /*
47  * Compute and fill in the value of the maximum depth of a bmap btree
48  * in this filesystem.  Done once, during mount.
49  */
50 void
51 xfs_bmap_compute_maxlevels(
52         xfs_mount_t     *mp,            /* file system mount structure */
53         int             whichfork)      /* data or attr fork */
54 {
55         int             level;          /* btree level */
56         uint            maxblocks;      /* max blocks at this level */
57         uint            maxleafents;    /* max leaf entries possible */
58         int             maxrootrecs;    /* max records in root block */
59         int             minleafrecs;    /* min records in leaf block */
60         int             minnoderecs;    /* min records in node block */
61         int             sz;             /* root block size */
62
63         /*
64          * The maximum number of extents in a file, hence the maximum number of
65          * leaf entries, is controlled by the size of the on-disk extent count,
66          * either a signed 32-bit number for the data fork, or a signed 16-bit
67          * number for the attr fork.
68          *
69          * Note that we can no longer assume that if we are in ATTR1 that
70          * the fork offset of all the inodes will be
71          * (xfs_default_attroffset(ip) >> 3) because we could have mounted
72          * with ATTR2 and then mounted back with ATTR1, keeping the
73          * di_forkoff's fixed but probably at various positions. Therefore,
74          * for both ATTR1 and ATTR2 we have to assume the worst case scenario
75          * of a minimum size available.
76          */
77         if (whichfork == XFS_DATA_FORK) {
78                 maxleafents = MAXEXTNUM;
79                 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
80         } else {
81                 maxleafents = MAXAEXTNUM;
82                 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
83         }
84         maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
85         minleafrecs = mp->m_bmap_dmnr[0];
86         minnoderecs = mp->m_bmap_dmnr[1];
87         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
88         for (level = 1; maxblocks > 1; level++) {
89                 if (maxblocks <= maxrootrecs)
90                         maxblocks = 1;
91                 else
92                         maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
93         }
94         mp->m_bm_maxlevels[whichfork] = level;
95 }
96
97 STATIC int                              /* error */
98 xfs_bmbt_lookup_eq(
99         struct xfs_btree_cur    *cur,
100         struct xfs_bmbt_irec    *irec,
101         int                     *stat)  /* success/failure */
102 {
103         cur->bc_rec.b = *irec;
104         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105 }
106
107 STATIC int                              /* error */
108 xfs_bmbt_lookup_first(
109         struct xfs_btree_cur    *cur,
110         int                     *stat)  /* success/failure */
111 {
112         cur->bc_rec.b.br_startoff = 0;
113         cur->bc_rec.b.br_startblock = 0;
114         cur->bc_rec.b.br_blockcount = 0;
115         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
116 }
117
118 /*
119  * Check if the inode needs to be converted to btree format.
120  */
121 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
122 {
123         struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
124
125         return whichfork != XFS_COW_FORK &&
126                 ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
127                 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
128 }
129
130 /*
131  * Check if the inode should be converted to extent format.
132  */
133 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
134 {
135         struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
136
137         return whichfork != XFS_COW_FORK &&
138                 ifp->if_format == XFS_DINODE_FMT_BTREE &&
139                 ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
140 }
141
142 /*
143  * Update the record referred to by cur to the value given by irec
144  * This either works (return 0) or gets an EFSCORRUPTED error.
145  */
146 STATIC int
147 xfs_bmbt_update(
148         struct xfs_btree_cur    *cur,
149         struct xfs_bmbt_irec    *irec)
150 {
151         union xfs_btree_rec     rec;
152
153         xfs_bmbt_disk_set_all(&rec.bmbt, irec);
154         return xfs_btree_update(cur, &rec);
155 }
156
157 /*
158  * Compute the worst-case number of indirect blocks that will be used
159  * for ip's delayed extent of length "len".
160  */
161 STATIC xfs_filblks_t
162 xfs_bmap_worst_indlen(
163         xfs_inode_t     *ip,            /* incore inode pointer */
164         xfs_filblks_t   len)            /* delayed extent length */
165 {
166         int             level;          /* btree level number */
167         int             maxrecs;        /* maximum record count at this level */
168         xfs_mount_t     *mp;            /* mount structure */
169         xfs_filblks_t   rval;           /* return value */
170
171         mp = ip->i_mount;
172         maxrecs = mp->m_bmap_dmxr[0];
173         for (level = 0, rval = 0;
174              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
175              level++) {
176                 len += maxrecs - 1;
177                 do_div(len, maxrecs);
178                 rval += len;
179                 if (len == 1)
180                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
181                                 level - 1;
182                 if (level == 0)
183                         maxrecs = mp->m_bmap_dmxr[1];
184         }
185         return rval;
186 }
187
188 /*
189  * Calculate the default attribute fork offset for newly created inodes.
190  */
191 uint
192 xfs_default_attroffset(
193         struct xfs_inode        *ip)
194 {
195         struct xfs_mount        *mp = ip->i_mount;
196         uint                    offset;
197
198         if (mp->m_sb.sb_inodesize == 256)
199                 offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
200         else
201                 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
202
203         ASSERT(offset < XFS_LITINO(mp));
204         return offset;
205 }
206
207 /*
208  * Helper routine to reset inode di_forkoff field when switching
209  * attribute fork from local to extent format - we reset it where
210  * possible to make space available for inline data fork extents.
211  */
212 STATIC void
213 xfs_bmap_forkoff_reset(
214         xfs_inode_t     *ip,
215         int             whichfork)
216 {
217         if (whichfork == XFS_ATTR_FORK &&
218             ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
219             ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
220                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
221
222                 if (dfl_forkoff > ip->i_d.di_forkoff)
223                         ip->i_d.di_forkoff = dfl_forkoff;
224         }
225 }
226
227 #ifdef DEBUG
228 STATIC struct xfs_buf *
229 xfs_bmap_get_bp(
230         struct xfs_btree_cur    *cur,
231         xfs_fsblock_t           bno)
232 {
233         struct xfs_log_item     *lip;
234         int                     i;
235
236         if (!cur)
237                 return NULL;
238
239         for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
240                 if (!cur->bc_bufs[i])
241                         break;
242                 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
243                         return cur->bc_bufs[i];
244         }
245
246         /* Chase down all the log items to see if the bp is there */
247         list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
248                 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
249
250                 if (bip->bli_item.li_type == XFS_LI_BUF &&
251                     XFS_BUF_ADDR(bip->bli_buf) == bno)
252                         return bip->bli_buf;
253         }
254
255         return NULL;
256 }
257
258 STATIC void
259 xfs_check_block(
260         struct xfs_btree_block  *block,
261         xfs_mount_t             *mp,
262         int                     root,
263         short                   sz)
264 {
265         int                     i, j, dmxr;
266         __be64                  *pp, *thispa;   /* pointer to block address */
267         xfs_bmbt_key_t          *prevp, *keyp;
268
269         ASSERT(be16_to_cpu(block->bb_level) > 0);
270
271         prevp = NULL;
272         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
273                 dmxr = mp->m_bmap_dmxr[0];
274                 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
275
276                 if (prevp) {
277                         ASSERT(be64_to_cpu(prevp->br_startoff) <
278                                be64_to_cpu(keyp->br_startoff));
279                 }
280                 prevp = keyp;
281
282                 /*
283                  * Compare the block numbers to see if there are dups.
284                  */
285                 if (root)
286                         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
287                 else
288                         pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
289
290                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
291                         if (root)
292                                 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
293                         else
294                                 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
295                         if (*thispa == *pp) {
296                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
297                                         __func__, j, i,
298                                         (unsigned long long)be64_to_cpu(*thispa));
299                                 xfs_err(mp, "%s: ptrs are equal in node\n",
300                                         __func__);
301                                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
302                         }
303                 }
304         }
305 }
306
307 /*
308  * Check that the extents for the inode ip are in the right order in all
309  * btree leaves. THis becomes prohibitively expensive for large extent count
310  * files, so don't bother with inodes that have more than 10,000 extents in
311  * them. The btree record ordering checks will still be done, so for such large
312  * bmapbt constructs that is going to catch most corruptions.
313  */
314 STATIC void
315 xfs_bmap_check_leaf_extents(
316         xfs_btree_cur_t         *cur,   /* btree cursor or null */
317         xfs_inode_t             *ip,            /* incore inode pointer */
318         int                     whichfork)      /* data or attr fork */
319 {
320         struct xfs_mount        *mp = ip->i_mount;
321         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
322         struct xfs_btree_block  *block; /* current btree block */
323         xfs_fsblock_t           bno;    /* block # of "block" */
324         xfs_buf_t               *bp;    /* buffer for "block" */
325         int                     error;  /* error return value */
326         xfs_extnum_t            i=0, j; /* index into the extents list */
327         int                     level;  /* btree level, for checking */
328         __be64                  *pp;    /* pointer to block address */
329         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
330         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
331         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
332         int                     bp_release = 0;
333
334         if (ifp->if_format != XFS_DINODE_FMT_BTREE)
335                 return;
336
337         /* skip large extent count inodes */
338         if (ip->i_df.if_nextents > 10000)
339                 return;
340
341         bno = NULLFSBLOCK;
342         block = ifp->if_broot;
343         /*
344          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
345          */
346         level = be16_to_cpu(block->bb_level);
347         ASSERT(level > 0);
348         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
349         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
350         bno = be64_to_cpu(*pp);
351
352         ASSERT(bno != NULLFSBLOCK);
353         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
354         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
355
356         /*
357          * Go down the tree until leaf level is reached, following the first
358          * pointer (leftmost) at each level.
359          */
360         while (level-- > 0) {
361                 /* See if buf is in cur first */
362                 bp_release = 0;
363                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
364                 if (!bp) {
365                         bp_release = 1;
366                         error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
367                                                 XFS_BMAP_BTREE_REF,
368                                                 &xfs_bmbt_buf_ops);
369                         if (error)
370                                 goto error_norelse;
371                 }
372                 block = XFS_BUF_TO_BLOCK(bp);
373                 if (level == 0)
374                         break;
375
376                 /*
377                  * Check this block for basic sanity (increasing keys and
378                  * no duplicate blocks).
379                  */
380
381                 xfs_check_block(block, mp, 0, 0);
382                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
383                 bno = be64_to_cpu(*pp);
384                 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
385                         error = -EFSCORRUPTED;
386                         goto error0;
387                 }
388                 if (bp_release) {
389                         bp_release = 0;
390                         xfs_trans_brelse(NULL, bp);
391                 }
392         }
393
394         /*
395          * Here with bp and block set to the leftmost leaf node in the tree.
396          */
397         i = 0;
398
399         /*
400          * Loop over all leaf nodes checking that all extents are in the right order.
401          */
402         for (;;) {
403                 xfs_fsblock_t   nextbno;
404                 xfs_extnum_t    num_recs;
405
406
407                 num_recs = xfs_btree_get_numrecs(block);
408
409                 /*
410                  * Read-ahead the next leaf block, if any.
411                  */
412
413                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
414
415                 /*
416                  * Check all the extents to make sure they are OK.
417                  * If we had a previous block, the last entry should
418                  * conform with the first entry in this one.
419                  */
420
421                 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
422                 if (i) {
423                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
424                                xfs_bmbt_disk_get_blockcount(&last) <=
425                                xfs_bmbt_disk_get_startoff(ep));
426                 }
427                 for (j = 1; j < num_recs; j++) {
428                         nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
429                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
430                                xfs_bmbt_disk_get_blockcount(ep) <=
431                                xfs_bmbt_disk_get_startoff(nextp));
432                         ep = nextp;
433                 }
434
435                 last = *ep;
436                 i += num_recs;
437                 if (bp_release) {
438                         bp_release = 0;
439                         xfs_trans_brelse(NULL, bp);
440                 }
441                 bno = nextbno;
442                 /*
443                  * If we've reached the end, stop.
444                  */
445                 if (bno == NULLFSBLOCK)
446                         break;
447
448                 bp_release = 0;
449                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
450                 if (!bp) {
451                         bp_release = 1;
452                         error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
453                                                 XFS_BMAP_BTREE_REF,
454                                                 &xfs_bmbt_buf_ops);
455                         if (error)
456                                 goto error_norelse;
457                 }
458                 block = XFS_BUF_TO_BLOCK(bp);
459         }
460
461         return;
462
463 error0:
464         xfs_warn(mp, "%s: at error0", __func__);
465         if (bp_release)
466                 xfs_trans_brelse(NULL, bp);
467 error_norelse:
468         xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
469                 __func__, i);
470         xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
471         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
472         return;
473 }
474
475 /*
476  * Validate that the bmbt_irecs being returned from bmapi are valid
477  * given the caller's original parameters.  Specifically check the
478  * ranges of the returned irecs to ensure that they only extend beyond
479  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
480  */
481 STATIC void
482 xfs_bmap_validate_ret(
483         xfs_fileoff_t           bno,
484         xfs_filblks_t           len,
485         int                     flags,
486         xfs_bmbt_irec_t         *mval,
487         int                     nmap,
488         int                     ret_nmap)
489 {
490         int                     i;              /* index to map values */
491
492         ASSERT(ret_nmap <= nmap);
493
494         for (i = 0; i < ret_nmap; i++) {
495                 ASSERT(mval[i].br_blockcount > 0);
496                 if (!(flags & XFS_BMAPI_ENTIRE)) {
497                         ASSERT(mval[i].br_startoff >= bno);
498                         ASSERT(mval[i].br_blockcount <= len);
499                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
500                                bno + len);
501                 } else {
502                         ASSERT(mval[i].br_startoff < bno + len);
503                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
504                                bno);
505                 }
506                 ASSERT(i == 0 ||
507                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
508                        mval[i].br_startoff);
509                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
510                        mval[i].br_startblock != HOLESTARTBLOCK);
511                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
512                        mval[i].br_state == XFS_EXT_UNWRITTEN);
513         }
514 }
515
516 #else
517 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
518 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)    do { } while (0)
519 #endif /* DEBUG */
520
521 /*
522  * bmap free list manipulation functions
523  */
524
525 /*
526  * Add the extent to the list of extents to be free at transaction end.
527  * The list is maintained sorted (by block number).
528  */
529 void
530 __xfs_bmap_add_free(
531         struct xfs_trans                *tp,
532         xfs_fsblock_t                   bno,
533         xfs_filblks_t                   len,
534         const struct xfs_owner_info     *oinfo,
535         bool                            skip_discard)
536 {
537         struct xfs_extent_free_item     *new;           /* new element */
538 #ifdef DEBUG
539         struct xfs_mount                *mp = tp->t_mountp;
540         xfs_agnumber_t                  agno;
541         xfs_agblock_t                   agbno;
542
543         ASSERT(bno != NULLFSBLOCK);
544         ASSERT(len > 0);
545         ASSERT(len <= MAXEXTLEN);
546         ASSERT(!isnullstartblock(bno));
547         agno = XFS_FSB_TO_AGNO(mp, bno);
548         agbno = XFS_FSB_TO_AGBNO(mp, bno);
549         ASSERT(agno < mp->m_sb.sb_agcount);
550         ASSERT(agbno < mp->m_sb.sb_agblocks);
551         ASSERT(len < mp->m_sb.sb_agblocks);
552         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
553 #endif
554         ASSERT(xfs_bmap_free_item_zone != NULL);
555
556         new = kmem_cache_alloc(xfs_bmap_free_item_zone,
557                                GFP_KERNEL | __GFP_NOFAIL);
558         new->xefi_startblock = bno;
559         new->xefi_blockcount = (xfs_extlen_t)len;
560         if (oinfo)
561                 new->xefi_oinfo = *oinfo;
562         else
563                 new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
564         new->xefi_skip_discard = skip_discard;
565         trace_xfs_bmap_free_defer(tp->t_mountp,
566                         XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
567                         XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
568         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
569 }
570
571 /*
572  * Inode fork format manipulation functions
573  */
574
575 /*
576  * Convert the inode format to extent format if it currently is in btree format,
577  * but the extent list is small enough that it fits into the extent format.
578  *
579  * Since the extents are already in-core, all we have to do is give up the space
580  * for the btree root and pitch the leaf block.
581  */
582 STATIC int                              /* error */
583 xfs_bmap_btree_to_extents(
584         struct xfs_trans        *tp,    /* transaction pointer */
585         struct xfs_inode        *ip,    /* incore inode pointer */
586         struct xfs_btree_cur    *cur,   /* btree cursor */
587         int                     *logflagsp, /* inode logging flags */
588         int                     whichfork)  /* data or attr fork */
589 {
590         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
591         struct xfs_mount        *mp = ip->i_mount;
592         struct xfs_btree_block  *rblock = ifp->if_broot;
593         struct xfs_btree_block  *cblock;/* child btree block */
594         xfs_fsblock_t           cbno;   /* child block number */
595         xfs_buf_t               *cbp;   /* child block's buffer */
596         int                     error;  /* error return value */
597         __be64                  *pp;    /* ptr to block address */
598         struct xfs_owner_info   oinfo;
599
600         /* check if we actually need the extent format first: */
601         if (!xfs_bmap_wants_extents(ip, whichfork))
602                 return 0;
603
604         ASSERT(cur);
605         ASSERT(whichfork != XFS_COW_FORK);
606         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
607         ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
608         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
609         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
610         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
611
612         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
613         cbno = be64_to_cpu(*pp);
614 #ifdef DEBUG
615         if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
616                 return -EFSCORRUPTED;
617 #endif
618         error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
619                                 &xfs_bmbt_buf_ops);
620         if (error)
621                 return error;
622         cblock = XFS_BUF_TO_BLOCK(cbp);
623         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
624                 return error;
625         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
626         xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
627         ip->i_d.di_nblocks--;
628         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
629         xfs_trans_binval(tp, cbp);
630         if (cur->bc_bufs[0] == cbp)
631                 cur->bc_bufs[0] = NULL;
632         xfs_iroot_realloc(ip, -1, whichfork);
633         ASSERT(ifp->if_broot == NULL);
634         ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
635         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
636         *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
637         return 0;
638 }
639
640 /*
641  * Convert an extents-format file into a btree-format file.
642  * The new file will have a root block (in the inode) and a single child block.
643  */
644 STATIC int                                      /* error */
645 xfs_bmap_extents_to_btree(
646         struct xfs_trans        *tp,            /* transaction pointer */
647         struct xfs_inode        *ip,            /* incore inode pointer */
648         struct xfs_btree_cur    **curp,         /* cursor returned to caller */
649         int                     wasdel,         /* converting a delayed alloc */
650         int                     *logflagsp,     /* inode logging flags */
651         int                     whichfork)      /* data or attr fork */
652 {
653         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
654         struct xfs_buf          *abp;           /* buffer for ablock */
655         struct xfs_alloc_arg    args;           /* allocation arguments */
656         struct xfs_bmbt_rec     *arp;           /* child record pointer */
657         struct xfs_btree_block  *block;         /* btree root block */
658         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
659         int                     error;          /* error return value */
660         struct xfs_ifork        *ifp;           /* inode fork pointer */
661         struct xfs_bmbt_key     *kp;            /* root block key pointer */
662         struct xfs_mount        *mp;            /* mount structure */
663         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
664         struct xfs_iext_cursor  icur;
665         struct xfs_bmbt_irec    rec;
666         xfs_extnum_t            cnt = 0;
667
668         mp = ip->i_mount;
669         ASSERT(whichfork != XFS_COW_FORK);
670         ifp = XFS_IFORK_PTR(ip, whichfork);
671         ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
672
673         /*
674          * Make space in the inode incore. This needs to be undone if we fail
675          * to expand the root.
676          */
677         xfs_iroot_realloc(ip, 1, whichfork);
678         ifp->if_flags |= XFS_IFBROOT;
679
680         /*
681          * Fill in the root.
682          */
683         block = ifp->if_broot;
684         xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
685                                  XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
686                                  XFS_BTREE_LONG_PTRS);
687         /*
688          * Need a cursor.  Can't allocate until bb_level is filled in.
689          */
690         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
691         cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
692         /*
693          * Convert to a btree with two levels, one record in root.
694          */
695         ifp->if_format = XFS_DINODE_FMT_BTREE;
696         memset(&args, 0, sizeof(args));
697         args.tp = tp;
698         args.mp = mp;
699         xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
700         if (tp->t_firstblock == NULLFSBLOCK) {
701                 args.type = XFS_ALLOCTYPE_START_BNO;
702                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
703         } else if (tp->t_flags & XFS_TRANS_LOWMODE) {
704                 args.type = XFS_ALLOCTYPE_START_BNO;
705                 args.fsbno = tp->t_firstblock;
706         } else {
707                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
708                 args.fsbno = tp->t_firstblock;
709         }
710         args.minlen = args.maxlen = args.prod = 1;
711         args.wasdel = wasdel;
712         *logflagsp = 0;
713         error = xfs_alloc_vextent(&args);
714         if (error)
715                 goto out_root_realloc;
716
717         if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
718                 error = -ENOSPC;
719                 goto out_root_realloc;
720         }
721
722         /*
723          * Allocation can't fail, the space was reserved.
724          */
725         ASSERT(tp->t_firstblock == NULLFSBLOCK ||
726                args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
727         tp->t_firstblock = args.fsbno;
728         cur->bc_ino.allocated++;
729         ip->i_d.di_nblocks++;
730         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
731         error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
732                         XFS_FSB_TO_DADDR(mp, args.fsbno),
733                         mp->m_bsize, 0, &abp);
734         if (error)
735                 goto out_unreserve_dquot;
736
737         /*
738          * Fill in the child block.
739          */
740         abp->b_ops = &xfs_bmbt_buf_ops;
741         ablock = XFS_BUF_TO_BLOCK(abp);
742         xfs_btree_init_block_int(mp, ablock, abp->b_bn,
743                                 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
744                                 XFS_BTREE_LONG_PTRS);
745
746         for_each_xfs_iext(ifp, &icur, &rec) {
747                 if (isnullstartblock(rec.br_startblock))
748                         continue;
749                 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
750                 xfs_bmbt_disk_set_all(arp, &rec);
751                 cnt++;
752         }
753         ASSERT(cnt == ifp->if_nextents);
754         xfs_btree_set_numrecs(ablock, cnt);
755
756         /*
757          * Fill in the root key and pointer.
758          */
759         kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
760         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
761         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
762         pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
763                                                 be16_to_cpu(block->bb_level)));
764         *pp = cpu_to_be64(args.fsbno);
765
766         /*
767          * Do all this logging at the end so that
768          * the root is at the right level.
769          */
770         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
771         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
772         ASSERT(*curp == NULL);
773         *curp = cur;
774         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
775         return 0;
776
777 out_unreserve_dquot:
778         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
779 out_root_realloc:
780         xfs_iroot_realloc(ip, -1, whichfork);
781         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
782         ASSERT(ifp->if_broot == NULL);
783         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
784
785         return error;
786 }
787
788 /*
789  * Convert a local file to an extents file.
790  * This code is out of bounds for data forks of regular files,
791  * since the file data needs to get logged so things will stay consistent.
792  * (The bmap-level manipulations are ok, though).
793  */
794 void
795 xfs_bmap_local_to_extents_empty(
796         struct xfs_trans        *tp,
797         struct xfs_inode        *ip,
798         int                     whichfork)
799 {
800         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
801
802         ASSERT(whichfork != XFS_COW_FORK);
803         ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
804         ASSERT(ifp->if_bytes == 0);
805         ASSERT(ifp->if_nextents == 0);
806
807         xfs_bmap_forkoff_reset(ip, whichfork);
808         ifp->if_flags &= ~XFS_IFINLINE;
809         ifp->if_flags |= XFS_IFEXTENTS;
810         ifp->if_u1.if_root = NULL;
811         ifp->if_height = 0;
812         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
813         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
814 }
815
816
817 STATIC int                              /* error */
818 xfs_bmap_local_to_extents(
819         xfs_trans_t     *tp,            /* transaction pointer */
820         xfs_inode_t     *ip,            /* incore inode pointer */
821         xfs_extlen_t    total,          /* total blocks needed by transaction */
822         int             *logflagsp,     /* inode logging flags */
823         int             whichfork,
824         void            (*init_fn)(struct xfs_trans *tp,
825                                    struct xfs_buf *bp,
826                                    struct xfs_inode *ip,
827                                    struct xfs_ifork *ifp))
828 {
829         int             error = 0;
830         int             flags;          /* logging flags returned */
831         struct xfs_ifork *ifp;          /* inode fork pointer */
832         xfs_alloc_arg_t args;           /* allocation arguments */
833         xfs_buf_t       *bp;            /* buffer for extent block */
834         struct xfs_bmbt_irec rec;
835         struct xfs_iext_cursor icur;
836
837         /*
838          * We don't want to deal with the case of keeping inode data inline yet.
839          * So sending the data fork of a regular inode is invalid.
840          */
841         ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
842         ifp = XFS_IFORK_PTR(ip, whichfork);
843         ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
844
845         if (!ifp->if_bytes) {
846                 xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
847                 flags = XFS_ILOG_CORE;
848                 goto done;
849         }
850
851         flags = 0;
852         error = 0;
853         ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
854         memset(&args, 0, sizeof(args));
855         args.tp = tp;
856         args.mp = ip->i_mount;
857         xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
858         /*
859          * Allocate a block.  We know we need only one, since the
860          * file currently fits in an inode.
861          */
862         if (tp->t_firstblock == NULLFSBLOCK) {
863                 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
864                 args.type = XFS_ALLOCTYPE_START_BNO;
865         } else {
866                 args.fsbno = tp->t_firstblock;
867                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
868         }
869         args.total = total;
870         args.minlen = args.maxlen = args.prod = 1;
871         error = xfs_alloc_vextent(&args);
872         if (error)
873                 goto done;
874
875         /* Can't fail, the space was reserved. */
876         ASSERT(args.fsbno != NULLFSBLOCK);
877         ASSERT(args.len == 1);
878         tp->t_firstblock = args.fsbno;
879         error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
880                         XFS_FSB_TO_DADDR(args.mp, args.fsbno),
881                         args.mp->m_bsize, 0, &bp);
882         if (error)
883                 goto done;
884
885         /*
886          * Initialize the block, copy the data and log the remote buffer.
887          *
888          * The callout is responsible for logging because the remote format
889          * might differ from the local format and thus we don't know how much to
890          * log here. Note that init_fn must also set the buffer log item type
891          * correctly.
892          */
893         init_fn(tp, bp, ip, ifp);
894
895         /* account for the change in fork size */
896         xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
897         xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
898         flags |= XFS_ILOG_CORE;
899
900         ifp->if_u1.if_root = NULL;
901         ifp->if_height = 0;
902
903         rec.br_startoff = 0;
904         rec.br_startblock = args.fsbno;
905         rec.br_blockcount = 1;
906         rec.br_state = XFS_EXT_NORM;
907         xfs_iext_first(ifp, &icur);
908         xfs_iext_insert(ip, &icur, &rec, 0);
909
910         ifp->if_nextents = 1;
911         ip->i_d.di_nblocks = 1;
912         xfs_trans_mod_dquot_byino(tp, ip,
913                 XFS_TRANS_DQ_BCOUNT, 1L);
914         flags |= xfs_ilog_fext(whichfork);
915
916 done:
917         *logflagsp = flags;
918         return error;
919 }
920
921 /*
922  * Called from xfs_bmap_add_attrfork to handle btree format files.
923  */
924 STATIC int                                      /* error */
925 xfs_bmap_add_attrfork_btree(
926         xfs_trans_t             *tp,            /* transaction pointer */
927         xfs_inode_t             *ip,            /* incore inode pointer */
928         int                     *flags)         /* inode logging flags */
929 {
930         xfs_btree_cur_t         *cur;           /* btree cursor */
931         int                     error;          /* error return value */
932         xfs_mount_t             *mp;            /* file system mount struct */
933         int                     stat;           /* newroot status */
934
935         mp = ip->i_mount;
936         if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
937                 *flags |= XFS_ILOG_DBROOT;
938         else {
939                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
940                 error = xfs_bmbt_lookup_first(cur, &stat);
941                 if (error)
942                         goto error0;
943                 /* must be at least one entry */
944                 if (XFS_IS_CORRUPT(mp, stat != 1)) {
945                         error = -EFSCORRUPTED;
946                         goto error0;
947                 }
948                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
949                         goto error0;
950                 if (stat == 0) {
951                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
952                         return -ENOSPC;
953                 }
954                 cur->bc_ino.allocated = 0;
955                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
956         }
957         return 0;
958 error0:
959         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
960         return error;
961 }
962
963 /*
964  * Called from xfs_bmap_add_attrfork to handle extents format files.
965  */
966 STATIC int                                      /* error */
967 xfs_bmap_add_attrfork_extents(
968         struct xfs_trans        *tp,            /* transaction pointer */
969         struct xfs_inode        *ip,            /* incore inode pointer */
970         int                     *flags)         /* inode logging flags */
971 {
972         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
973         int                     error;          /* error return value */
974
975         if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
976             XFS_IFORK_DSIZE(ip))
977                 return 0;
978         cur = NULL;
979         error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
980                                           XFS_DATA_FORK);
981         if (cur) {
982                 cur->bc_ino.allocated = 0;
983                 xfs_btree_del_cursor(cur, error);
984         }
985         return error;
986 }
987
988 /*
989  * Called from xfs_bmap_add_attrfork to handle local format files. Each
990  * different data fork content type needs a different callout to do the
991  * conversion. Some are basic and only require special block initialisation
992  * callouts for the data formating, others (directories) are so specialised they
993  * handle everything themselves.
994  *
995  * XXX (dgc): investigate whether directory conversion can use the generic
996  * formatting callout. It should be possible - it's just a very complex
997  * formatter.
998  */
999 STATIC int                                      /* error */
1000 xfs_bmap_add_attrfork_local(
1001         struct xfs_trans        *tp,            /* transaction pointer */
1002         struct xfs_inode        *ip,            /* incore inode pointer */
1003         int                     *flags)         /* inode logging flags */
1004 {
1005         struct xfs_da_args      dargs;          /* args for dir/attr code */
1006
1007         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1008                 return 0;
1009
1010         if (S_ISDIR(VFS_I(ip)->i_mode)) {
1011                 memset(&dargs, 0, sizeof(dargs));
1012                 dargs.geo = ip->i_mount->m_dir_geo;
1013                 dargs.dp = ip;
1014                 dargs.total = dargs.geo->fsbcount;
1015                 dargs.whichfork = XFS_DATA_FORK;
1016                 dargs.trans = tp;
1017                 return xfs_dir2_sf_to_block(&dargs);
1018         }
1019
1020         if (S_ISLNK(VFS_I(ip)->i_mode))
1021                 return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1022                                                  XFS_DATA_FORK,
1023                                                  xfs_symlink_local_to_remote);
1024
1025         /* should only be called for types that support local format data */
1026         ASSERT(0);
1027         return -EFSCORRUPTED;
1028 }
1029
1030 /* Set an inode attr fork off based on the format */
1031 int
1032 xfs_bmap_set_attrforkoff(
1033         struct xfs_inode        *ip,
1034         int                     size,
1035         int                     *version)
1036 {
1037         switch (ip->i_df.if_format) {
1038         case XFS_DINODE_FMT_DEV:
1039                 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1040                 break;
1041         case XFS_DINODE_FMT_LOCAL:
1042         case XFS_DINODE_FMT_EXTENTS:
1043         case XFS_DINODE_FMT_BTREE:
1044                 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1045                 if (!ip->i_d.di_forkoff)
1046                         ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1047                 else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1048                         *version = 2;
1049                 break;
1050         default:
1051                 ASSERT(0);
1052                 return -EINVAL;
1053         }
1054
1055         return 0;
1056 }
1057
1058 /*
1059  * Convert inode from non-attributed to attributed.
1060  * Must not be in a transaction, ip must not be locked.
1061  */
1062 int                                             /* error code */
1063 xfs_bmap_add_attrfork(
1064         xfs_inode_t             *ip,            /* incore inode pointer */
1065         int                     size,           /* space new attribute needs */
1066         int                     rsvd)           /* xact may use reserved blks */
1067 {
1068         xfs_mount_t             *mp;            /* mount structure */
1069         xfs_trans_t             *tp;            /* transaction pointer */
1070         int                     blks;           /* space reservation */
1071         int                     version = 1;    /* superblock attr version */
1072         int                     logflags;       /* logging flags */
1073         int                     error;          /* error return value */
1074
1075         ASSERT(XFS_IFORK_Q(ip) == 0);
1076
1077         mp = ip->i_mount;
1078         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1079
1080         blks = XFS_ADDAFORK_SPACE_RES(mp);
1081
1082         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1083                         rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1084         if (error)
1085                 return error;
1086
1087         xfs_ilock(ip, XFS_ILOCK_EXCL);
1088         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1089                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1090                         XFS_QMOPT_RES_REGBLKS);
1091         if (error)
1092                 goto trans_cancel;
1093         if (XFS_IFORK_Q(ip))
1094                 goto trans_cancel;
1095
1096         xfs_trans_ijoin(tp, ip, 0);
1097         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1098         error = xfs_bmap_set_attrforkoff(ip, size, &version);
1099         if (error)
1100                 goto trans_cancel;
1101         ASSERT(ip->i_afp == NULL);
1102
1103         ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
1104                                       GFP_KERNEL | __GFP_NOFAIL);
1105
1106         ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
1107         ip->i_afp->if_flags = XFS_IFEXTENTS;
1108         logflags = 0;
1109         switch (ip->i_df.if_format) {
1110         case XFS_DINODE_FMT_LOCAL:
1111                 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1112                 break;
1113         case XFS_DINODE_FMT_EXTENTS:
1114                 error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1115                 break;
1116         case XFS_DINODE_FMT_BTREE:
1117                 error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1118                 break;
1119         default:
1120                 error = 0;
1121                 break;
1122         }
1123         if (logflags)
1124                 xfs_trans_log_inode(tp, ip, logflags);
1125         if (error)
1126                 goto trans_cancel;
1127         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1128            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1129                 bool log_sb = false;
1130
1131                 spin_lock(&mp->m_sb_lock);
1132                 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1133                         xfs_sb_version_addattr(&mp->m_sb);
1134                         log_sb = true;
1135                 }
1136                 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1137                         xfs_sb_version_addattr2(&mp->m_sb);
1138                         log_sb = true;
1139                 }
1140                 spin_unlock(&mp->m_sb_lock);
1141                 if (log_sb)
1142                         xfs_log_sb(tp);
1143         }
1144
1145         error = xfs_trans_commit(tp);
1146         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147         return error;
1148
1149 trans_cancel:
1150         xfs_trans_cancel(tp);
1151         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1152         return error;
1153 }
1154
1155 /*
1156  * Internal and external extent tree search functions.
1157  */
1158
1159 struct xfs_iread_state {
1160         struct xfs_iext_cursor  icur;
1161         xfs_extnum_t            loaded;
1162 };
1163
1164 /* Stuff every bmbt record from this block into the incore extent map. */
1165 static int
1166 xfs_iread_bmbt_block(
1167         struct xfs_btree_cur    *cur,
1168         int                     level,
1169         void                    *priv)
1170 {
1171         struct xfs_iread_state  *ir = priv;
1172         struct xfs_mount        *mp = cur->bc_mp;
1173         struct xfs_inode        *ip = cur->bc_ino.ip;
1174         struct xfs_btree_block  *block;
1175         struct xfs_buf          *bp;
1176         struct xfs_bmbt_rec     *frp;
1177         xfs_extnum_t            num_recs;
1178         xfs_extnum_t            j;
1179         int                     whichfork = cur->bc_ino.whichfork;
1180         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1181
1182         block = xfs_btree_get_block(cur, level, &bp);
1183
1184         /* Abort if we find more records than nextents. */
1185         num_recs = xfs_btree_get_numrecs(block);
1186         if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1187                 xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1188                                 (unsigned long long)ip->i_ino);
1189                 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1190                                 sizeof(*block), __this_address);
1191                 return -EFSCORRUPTED;
1192         }
1193
1194         /* Copy records into the incore cache. */
1195         frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1196         for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1197                 struct xfs_bmbt_irec    new;
1198                 xfs_failaddr_t          fa;
1199
1200                 xfs_bmbt_disk_get_all(frp, &new);
1201                 fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1202                 if (fa) {
1203                         xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1204                                         "xfs_iread_extents(2)", frp,
1205                                         sizeof(*frp), fa);
1206                         return -EFSCORRUPTED;
1207                 }
1208                 xfs_iext_insert(ip, &ir->icur, &new,
1209                                 xfs_bmap_fork_to_state(whichfork));
1210                 trace_xfs_read_extent(ip, &ir->icur,
1211                                 xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1212                 xfs_iext_next(ifp, &ir->icur);
1213         }
1214
1215         return 0;
1216 }
1217
1218 /*
1219  * Read in extents from a btree-format inode.
1220  */
1221 int
1222 xfs_iread_extents(
1223         struct xfs_trans        *tp,
1224         struct xfs_inode        *ip,
1225         int                     whichfork)
1226 {
1227         struct xfs_iread_state  ir;
1228         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1229         struct xfs_mount        *mp = ip->i_mount;
1230         struct xfs_btree_cur    *cur;
1231         int                     error;
1232
1233         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1234
1235         if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
1236                 error = -EFSCORRUPTED;
1237                 goto out;
1238         }
1239
1240         ir.loaded = 0;
1241         xfs_iext_first(ifp, &ir.icur);
1242         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1243         error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1244                         XFS_BTREE_VISIT_RECORDS, &ir);
1245         xfs_btree_del_cursor(cur, error);
1246         if (error)
1247                 goto out;
1248
1249         if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1250                 error = -EFSCORRUPTED;
1251                 goto out;
1252         }
1253         ASSERT(ir.loaded == xfs_iext_count(ifp));
1254
1255         ifp->if_flags |= XFS_IFEXTENTS;
1256         return 0;
1257 out:
1258         xfs_iext_destroy(ifp);
1259         return error;
1260 }
1261
1262 /*
1263  * Returns the relative block number of the first unused block(s) in the given
1264  * fork with at least "len" logically contiguous blocks free.  This is the
1265  * lowest-address hole if the fork has holes, else the first block past the end
1266  * of fork.  Return 0 if the fork is currently local (in-inode).
1267  */
1268 int                                             /* error */
1269 xfs_bmap_first_unused(
1270         struct xfs_trans        *tp,            /* transaction pointer */
1271         struct xfs_inode        *ip,            /* incore inode */
1272         xfs_extlen_t            len,            /* size of hole to find */
1273         xfs_fileoff_t           *first_unused,  /* unused block */
1274         int                     whichfork)      /* data or attr fork */
1275 {
1276         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1277         struct xfs_bmbt_irec    got;
1278         struct xfs_iext_cursor  icur;
1279         xfs_fileoff_t           lastaddr = 0;
1280         xfs_fileoff_t           lowest, max;
1281         int                     error;
1282
1283         if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1284                 *first_unused = 0;
1285                 return 0;
1286         }
1287
1288         ASSERT(xfs_ifork_has_extents(ifp));
1289
1290         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1291                 error = xfs_iread_extents(tp, ip, whichfork);
1292                 if (error)
1293                         return error;
1294         }
1295
1296         lowest = max = *first_unused;
1297         for_each_xfs_iext(ifp, &icur, &got) {
1298                 /*
1299                  * See if the hole before this extent will work.
1300                  */
1301                 if (got.br_startoff >= lowest + len &&
1302                     got.br_startoff - max >= len)
1303                         break;
1304                 lastaddr = got.br_startoff + got.br_blockcount;
1305                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1306         }
1307
1308         *first_unused = max;
1309         return 0;
1310 }
1311
1312 /*
1313  * Returns the file-relative block number of the last block - 1 before
1314  * last_block (input value) in the file.
1315  * This is not based on i_size, it is based on the extent records.
1316  * Returns 0 for local files, as they do not have extent records.
1317  */
1318 int                                             /* error */
1319 xfs_bmap_last_before(
1320         struct xfs_trans        *tp,            /* transaction pointer */
1321         struct xfs_inode        *ip,            /* incore inode */
1322         xfs_fileoff_t           *last_block,    /* last block */
1323         int                     whichfork)      /* data or attr fork */
1324 {
1325         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1326         struct xfs_bmbt_irec    got;
1327         struct xfs_iext_cursor  icur;
1328         int                     error;
1329
1330         switch (ifp->if_format) {
1331         case XFS_DINODE_FMT_LOCAL:
1332                 *last_block = 0;
1333                 return 0;
1334         case XFS_DINODE_FMT_BTREE:
1335         case XFS_DINODE_FMT_EXTENTS:
1336                 break;
1337         default:
1338                 ASSERT(0);
1339                 return -EFSCORRUPTED;
1340         }
1341
1342         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1343                 error = xfs_iread_extents(tp, ip, whichfork);
1344                 if (error)
1345                         return error;
1346         }
1347
1348         if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1349                 *last_block = 0;
1350         return 0;
1351 }
1352
1353 int
1354 xfs_bmap_last_extent(
1355         struct xfs_trans        *tp,
1356         struct xfs_inode        *ip,
1357         int                     whichfork,
1358         struct xfs_bmbt_irec    *rec,
1359         int                     *is_empty)
1360 {
1361         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1362         struct xfs_iext_cursor  icur;
1363         int                     error;
1364
1365         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1366                 error = xfs_iread_extents(tp, ip, whichfork);
1367                 if (error)
1368                         return error;
1369         }
1370
1371         xfs_iext_last(ifp, &icur);
1372         if (!xfs_iext_get_extent(ifp, &icur, rec))
1373                 *is_empty = 1;
1374         else
1375                 *is_empty = 0;
1376         return 0;
1377 }
1378
1379 /*
1380  * Check the last inode extent to determine whether this allocation will result
1381  * in blocks being allocated at the end of the file. When we allocate new data
1382  * blocks at the end of the file which do not start at the previous data block,
1383  * we will try to align the new blocks at stripe unit boundaries.
1384  *
1385  * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1386  * at, or past the EOF.
1387  */
1388 STATIC int
1389 xfs_bmap_isaeof(
1390         struct xfs_bmalloca     *bma,
1391         int                     whichfork)
1392 {
1393         struct xfs_bmbt_irec    rec;
1394         int                     is_empty;
1395         int                     error;
1396
1397         bma->aeof = false;
1398         error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1399                                      &is_empty);
1400         if (error)
1401                 return error;
1402
1403         if (is_empty) {
1404                 bma->aeof = true;
1405                 return 0;
1406         }
1407
1408         /*
1409          * Check if we are allocation or past the last extent, or at least into
1410          * the last delayed allocated extent.
1411          */
1412         bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1413                 (bma->offset >= rec.br_startoff &&
1414                  isnullstartblock(rec.br_startblock));
1415         return 0;
1416 }
1417
1418 /*
1419  * Returns the file-relative block number of the first block past eof in
1420  * the file.  This is not based on i_size, it is based on the extent records.
1421  * Returns 0 for local files, as they do not have extent records.
1422  */
1423 int
1424 xfs_bmap_last_offset(
1425         struct xfs_inode        *ip,
1426         xfs_fileoff_t           *last_block,
1427         int                     whichfork)
1428 {
1429         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1430         struct xfs_bmbt_irec    rec;
1431         int                     is_empty;
1432         int                     error;
1433
1434         *last_block = 0;
1435
1436         if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1437                 return 0;
1438
1439         if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1440                 return -EFSCORRUPTED;
1441
1442         error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1443         if (error || is_empty)
1444                 return error;
1445
1446         *last_block = rec.br_startoff + rec.br_blockcount;
1447         return 0;
1448 }
1449
1450 /*
1451  * Returns whether the selected fork of the inode has exactly one
1452  * block or not.  For the data fork we check this matches di_size,
1453  * implying the file's range is 0..bsize-1.
1454  */
1455 int                                     /* 1=>1 block, 0=>otherwise */
1456 xfs_bmap_one_block(
1457         struct xfs_inode        *ip,            /* incore inode */
1458         int                     whichfork)      /* data or attr fork */
1459 {
1460         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1461         int                     rval;           /* return value */
1462         struct xfs_bmbt_irec    s;              /* internal version of extent */
1463         struct xfs_iext_cursor icur;
1464
1465 #ifndef DEBUG
1466         if (whichfork == XFS_DATA_FORK)
1467                 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1468 #endif  /* !DEBUG */
1469         if (ifp->if_nextents != 1)
1470                 return 0;
1471         if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
1472                 return 0;
1473         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1474         xfs_iext_first(ifp, &icur);
1475         xfs_iext_get_extent(ifp, &icur, &s);
1476         rval = s.br_startoff == 0 && s.br_blockcount == 1;
1477         if (rval && whichfork == XFS_DATA_FORK)
1478                 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1479         return rval;
1480 }
1481
1482 /*
1483  * Extent tree manipulation functions used during allocation.
1484  */
1485
1486 /*
1487  * Convert a delayed allocation to a real allocation.
1488  */
1489 STATIC int                              /* error */
1490 xfs_bmap_add_extent_delay_real(
1491         struct xfs_bmalloca     *bma,
1492         int                     whichfork)
1493 {
1494         struct xfs_mount        *mp = bma->ip->i_mount;
1495         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1496         struct xfs_bmbt_irec    *new = &bma->got;
1497         int                     error;  /* error return value */
1498         int                     i;      /* temp state */
1499         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1500         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1501                                         /* left is 0, right is 1, prev is 2 */
1502         int                     rval=0; /* return value (logging flags) */
1503         int                     state = xfs_bmap_fork_to_state(whichfork);
1504         xfs_filblks_t           da_new; /* new count del alloc blocks used */
1505         xfs_filblks_t           da_old; /* old count del alloc blocks used */
1506         xfs_filblks_t           temp=0; /* value for da_new calculations */
1507         int                     tmp_rval;       /* partial logging flags */
1508         struct xfs_bmbt_irec    old;
1509
1510         ASSERT(whichfork != XFS_ATTR_FORK);
1511         ASSERT(!isnullstartblock(new->br_startblock));
1512         ASSERT(!bma->cur ||
1513                (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1514
1515         XFS_STATS_INC(mp, xs_add_exlist);
1516
1517 #define LEFT            r[0]
1518 #define RIGHT           r[1]
1519 #define PREV            r[2]
1520
1521         /*
1522          * Set up a bunch of variables to make the tests simpler.
1523          */
1524         xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1525         new_endoff = new->br_startoff + new->br_blockcount;
1526         ASSERT(isnullstartblock(PREV.br_startblock));
1527         ASSERT(PREV.br_startoff <= new->br_startoff);
1528         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1529
1530         da_old = startblockval(PREV.br_startblock);
1531         da_new = 0;
1532
1533         /*
1534          * Set flags determining what part of the previous delayed allocation
1535          * extent is being replaced by a real allocation.
1536          */
1537         if (PREV.br_startoff == new->br_startoff)
1538                 state |= BMAP_LEFT_FILLING;
1539         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1540                 state |= BMAP_RIGHT_FILLING;
1541
1542         /*
1543          * Check and set flags if this segment has a left neighbor.
1544          * Don't set contiguous if the combined extent would be too large.
1545          */
1546         if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1547                 state |= BMAP_LEFT_VALID;
1548                 if (isnullstartblock(LEFT.br_startblock))
1549                         state |= BMAP_LEFT_DELAY;
1550         }
1551
1552         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1553             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1554             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1555             LEFT.br_state == new->br_state &&
1556             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1557                 state |= BMAP_LEFT_CONTIG;
1558
1559         /*
1560          * Check and set flags if this segment has a right neighbor.
1561          * Don't set contiguous if the combined extent would be too large.
1562          * Also check for all-three-contiguous being too large.
1563          */
1564         if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1565                 state |= BMAP_RIGHT_VALID;
1566                 if (isnullstartblock(RIGHT.br_startblock))
1567                         state |= BMAP_RIGHT_DELAY;
1568         }
1569
1570         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1571             new_endoff == RIGHT.br_startoff &&
1572             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1573             new->br_state == RIGHT.br_state &&
1574             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1575             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1576                        BMAP_RIGHT_FILLING)) !=
1577                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1578                        BMAP_RIGHT_FILLING) ||
1579              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1580                         <= MAXEXTLEN))
1581                 state |= BMAP_RIGHT_CONTIG;
1582
1583         error = 0;
1584         /*
1585          * Switch out based on the FILLING and CONTIG state bits.
1586          */
1587         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1588                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1589         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1590              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1591                 /*
1592                  * Filling in all of a previously delayed allocation extent.
1593                  * The left and right neighbors are both contiguous with new.
1594                  */
1595                 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1596
1597                 xfs_iext_remove(bma->ip, &bma->icur, state);
1598                 xfs_iext_remove(bma->ip, &bma->icur, state);
1599                 xfs_iext_prev(ifp, &bma->icur);
1600                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1601                 ifp->if_nextents--;
1602
1603                 if (bma->cur == NULL)
1604                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1605                 else {
1606                         rval = XFS_ILOG_CORE;
1607                         error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1608                         if (error)
1609                                 goto done;
1610                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1611                                 error = -EFSCORRUPTED;
1612                                 goto done;
1613                         }
1614                         error = xfs_btree_delete(bma->cur, &i);
1615                         if (error)
1616                                 goto done;
1617                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1618                                 error = -EFSCORRUPTED;
1619                                 goto done;
1620                         }
1621                         error = xfs_btree_decrement(bma->cur, 0, &i);
1622                         if (error)
1623                                 goto done;
1624                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1625                                 error = -EFSCORRUPTED;
1626                                 goto done;
1627                         }
1628                         error = xfs_bmbt_update(bma->cur, &LEFT);
1629                         if (error)
1630                                 goto done;
1631                 }
1632                 break;
1633
1634         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1635                 /*
1636                  * Filling in all of a previously delayed allocation extent.
1637                  * The left neighbor is contiguous, the right is not.
1638                  */
1639                 old = LEFT;
1640                 LEFT.br_blockcount += PREV.br_blockcount;
1641
1642                 xfs_iext_remove(bma->ip, &bma->icur, state);
1643                 xfs_iext_prev(ifp, &bma->icur);
1644                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1645
1646                 if (bma->cur == NULL)
1647                         rval = XFS_ILOG_DEXT;
1648                 else {
1649                         rval = 0;
1650                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1651                         if (error)
1652                                 goto done;
1653                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1654                                 error = -EFSCORRUPTED;
1655                                 goto done;
1656                         }
1657                         error = xfs_bmbt_update(bma->cur, &LEFT);
1658                         if (error)
1659                                 goto done;
1660                 }
1661                 break;
1662
1663         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664                 /*
1665                  * Filling in all of a previously delayed allocation extent.
1666                  * The right neighbor is contiguous, the left is not. Take care
1667                  * with delay -> unwritten extent allocation here because the
1668                  * delalloc record we are overwriting is always written.
1669                  */
1670                 PREV.br_startblock = new->br_startblock;
1671                 PREV.br_blockcount += RIGHT.br_blockcount;
1672                 PREV.br_state = new->br_state;
1673
1674                 xfs_iext_next(ifp, &bma->icur);
1675                 xfs_iext_remove(bma->ip, &bma->icur, state);
1676                 xfs_iext_prev(ifp, &bma->icur);
1677                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1678
1679                 if (bma->cur == NULL)
1680                         rval = XFS_ILOG_DEXT;
1681                 else {
1682                         rval = 0;
1683                         error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1684                         if (error)
1685                                 goto done;
1686                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1687                                 error = -EFSCORRUPTED;
1688                                 goto done;
1689                         }
1690                         error = xfs_bmbt_update(bma->cur, &PREV);
1691                         if (error)
1692                                 goto done;
1693                 }
1694                 break;
1695
1696         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1697                 /*
1698                  * Filling in all of a previously delayed allocation extent.
1699                  * Neither the left nor right neighbors are contiguous with
1700                  * the new one.
1701                  */
1702                 PREV.br_startblock = new->br_startblock;
1703                 PREV.br_state = new->br_state;
1704                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705                 ifp->if_nextents++;
1706
1707                 if (bma->cur == NULL)
1708                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1709                 else {
1710                         rval = XFS_ILOG_CORE;
1711                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1712                         if (error)
1713                                 goto done;
1714                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1715                                 error = -EFSCORRUPTED;
1716                                 goto done;
1717                         }
1718                         error = xfs_btree_insert(bma->cur, &i);
1719                         if (error)
1720                                 goto done;
1721                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1722                                 error = -EFSCORRUPTED;
1723                                 goto done;
1724                         }
1725                 }
1726                 break;
1727
1728         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1729                 /*
1730                  * Filling in the first part of a previous delayed allocation.
1731                  * The left neighbor is contiguous.
1732                  */
1733                 old = LEFT;
1734                 temp = PREV.br_blockcount - new->br_blockcount;
1735                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1736                                 startblockval(PREV.br_startblock));
1737
1738                 LEFT.br_blockcount += new->br_blockcount;
1739
1740                 PREV.br_blockcount = temp;
1741                 PREV.br_startoff += new->br_blockcount;
1742                 PREV.br_startblock = nullstartblock(da_new);
1743
1744                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1745                 xfs_iext_prev(ifp, &bma->icur);
1746                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1747
1748                 if (bma->cur == NULL)
1749                         rval = XFS_ILOG_DEXT;
1750                 else {
1751                         rval = 0;
1752                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1753                         if (error)
1754                                 goto done;
1755                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1756                                 error = -EFSCORRUPTED;
1757                                 goto done;
1758                         }
1759                         error = xfs_bmbt_update(bma->cur, &LEFT);
1760                         if (error)
1761                                 goto done;
1762                 }
1763                 break;
1764
1765         case BMAP_LEFT_FILLING:
1766                 /*
1767                  * Filling in the first part of a previous delayed allocation.
1768                  * The left neighbor is not contiguous.
1769                  */
1770                 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1771                 ifp->if_nextents++;
1772
1773                 if (bma->cur == NULL)
1774                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1775                 else {
1776                         rval = XFS_ILOG_CORE;
1777                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1778                         if (error)
1779                                 goto done;
1780                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1781                                 error = -EFSCORRUPTED;
1782                                 goto done;
1783                         }
1784                         error = xfs_btree_insert(bma->cur, &i);
1785                         if (error)
1786                                 goto done;
1787                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1788                                 error = -EFSCORRUPTED;
1789                                 goto done;
1790                         }
1791                 }
1792
1793                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1794                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1795                                         &bma->cur, 1, &tmp_rval, whichfork);
1796                         rval |= tmp_rval;
1797                         if (error)
1798                                 goto done;
1799                 }
1800
1801                 temp = PREV.br_blockcount - new->br_blockcount;
1802                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1803                         startblockval(PREV.br_startblock) -
1804                         (bma->cur ? bma->cur->bc_ino.allocated : 0));
1805
1806                 PREV.br_startoff = new_endoff;
1807                 PREV.br_blockcount = temp;
1808                 PREV.br_startblock = nullstartblock(da_new);
1809                 xfs_iext_next(ifp, &bma->icur);
1810                 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1811                 xfs_iext_prev(ifp, &bma->icur);
1812                 break;
1813
1814         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1815                 /*
1816                  * Filling in the last part of a previous delayed allocation.
1817                  * The right neighbor is contiguous with the new allocation.
1818                  */
1819                 old = RIGHT;
1820                 RIGHT.br_startoff = new->br_startoff;
1821                 RIGHT.br_startblock = new->br_startblock;
1822                 RIGHT.br_blockcount += new->br_blockcount;
1823
1824                 if (bma->cur == NULL)
1825                         rval = XFS_ILOG_DEXT;
1826                 else {
1827                         rval = 0;
1828                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1829                         if (error)
1830                                 goto done;
1831                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1832                                 error = -EFSCORRUPTED;
1833                                 goto done;
1834                         }
1835                         error = xfs_bmbt_update(bma->cur, &RIGHT);
1836                         if (error)
1837                                 goto done;
1838                 }
1839
1840                 temp = PREV.br_blockcount - new->br_blockcount;
1841                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1842                         startblockval(PREV.br_startblock));
1843
1844                 PREV.br_blockcount = temp;
1845                 PREV.br_startblock = nullstartblock(da_new);
1846
1847                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1848                 xfs_iext_next(ifp, &bma->icur);
1849                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1850                 break;
1851
1852         case BMAP_RIGHT_FILLING:
1853                 /*
1854                  * Filling in the last part of a previous delayed allocation.
1855                  * The right neighbor is not contiguous.
1856                  */
1857                 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1858                 ifp->if_nextents++;
1859
1860                 if (bma->cur == NULL)
1861                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1862                 else {
1863                         rval = XFS_ILOG_CORE;
1864                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1865                         if (error)
1866                                 goto done;
1867                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1868                                 error = -EFSCORRUPTED;
1869                                 goto done;
1870                         }
1871                         error = xfs_btree_insert(bma->cur, &i);
1872                         if (error)
1873                                 goto done;
1874                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1875                                 error = -EFSCORRUPTED;
1876                                 goto done;
1877                         }
1878                 }
1879
1880                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1881                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1882                                 &bma->cur, 1, &tmp_rval, whichfork);
1883                         rval |= tmp_rval;
1884                         if (error)
1885                                 goto done;
1886                 }
1887
1888                 temp = PREV.br_blockcount - new->br_blockcount;
1889                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890                         startblockval(PREV.br_startblock) -
1891                         (bma->cur ? bma->cur->bc_ino.allocated : 0));
1892
1893                 PREV.br_startblock = nullstartblock(da_new);
1894                 PREV.br_blockcount = temp;
1895                 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896                 xfs_iext_next(ifp, &bma->icur);
1897                 break;
1898
1899         case 0:
1900                 /*
1901                  * Filling in the middle part of a previous delayed allocation.
1902                  * Contiguity is impossible here.
1903                  * This case is avoided almost all the time.
1904                  *
1905                  * We start with a delayed allocation:
1906                  *
1907                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908                  *  PREV @ idx
1909                  *
1910                  * and we are allocating:
1911                  *                     +rrrrrrrrrrrrrrrrr+
1912                  *                            new
1913                  *
1914                  * and we set it up for insertion as:
1915                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916                  *                            new
1917                  *  PREV @ idx          LEFT              RIGHT
1918                  *                      inserted at idx + 1
1919                  */
1920                 old = PREV;
1921
1922                 /* LEFT is the new middle */
1923                 LEFT = *new;
1924
1925                 /* RIGHT is the new right */
1926                 RIGHT.br_state = PREV.br_state;
1927                 RIGHT.br_startoff = new_endoff;
1928                 RIGHT.br_blockcount =
1929                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930                 RIGHT.br_startblock =
1931                         nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932                                         RIGHT.br_blockcount));
1933
1934                 /* truncate PREV */
1935                 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936                 PREV.br_startblock =
1937                         nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938                                         PREV.br_blockcount));
1939                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941                 xfs_iext_next(ifp, &bma->icur);
1942                 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943                 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944                 ifp->if_nextents++;
1945
1946                 if (bma->cur == NULL)
1947                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948                 else {
1949                         rval = XFS_ILOG_CORE;
1950                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951                         if (error)
1952                                 goto done;
1953                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1954                                 error = -EFSCORRUPTED;
1955                                 goto done;
1956                         }
1957                         error = xfs_btree_insert(bma->cur, &i);
1958                         if (error)
1959                                 goto done;
1960                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1961                                 error = -EFSCORRUPTED;
1962                                 goto done;
1963                         }
1964                 }
1965
1966                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1967                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1968                                         &bma->cur, 1, &tmp_rval, whichfork);
1969                         rval |= tmp_rval;
1970                         if (error)
1971                                 goto done;
1972                 }
1973
1974                 da_new = startblockval(PREV.br_startblock) +
1975                          startblockval(RIGHT.br_startblock);
1976                 break;
1977
1978         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1979         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1980         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1981         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1982         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1983         case BMAP_LEFT_CONTIG:
1984         case BMAP_RIGHT_CONTIG:
1985                 /*
1986                  * These cases are all impossible.
1987                  */
1988                 ASSERT(0);
1989         }
1990
1991         /* add reverse mapping unless caller opted out */
1992         if (!(bma->flags & XFS_BMAPI_NORMAP))
1993                 xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1994
1995         /* convert to a btree if necessary */
1996         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1997                 int     tmp_logflags;   /* partial log flag return val */
1998
1999                 ASSERT(bma->cur == NULL);
2000                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2001                                 &bma->cur, da_old > 0, &tmp_logflags,
2002                                 whichfork);
2003                 bma->logflags |= tmp_logflags;
2004                 if (error)
2005                         goto done;
2006         }
2007
2008         if (da_new != da_old)
2009                 xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
2010
2011         if (bma->cur) {
2012                 da_new += bma->cur->bc_ino.allocated;
2013                 bma->cur->bc_ino.allocated = 0;
2014         }
2015
2016         /* adjust for changes in reserved delayed indirect blocks */
2017         if (da_new != da_old) {
2018                 ASSERT(state == 0 || da_new < da_old);
2019                 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2020                                 false);
2021         }
2022
2023         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2024 done:
2025         if (whichfork != XFS_COW_FORK)
2026                 bma->logflags |= rval;
2027         return error;
2028 #undef  LEFT
2029 #undef  RIGHT
2030 #undef  PREV
2031 }
2032
2033 /*
2034  * Convert an unwritten allocation to a real allocation or vice versa.
2035  */
2036 int                                     /* error */
2037 xfs_bmap_add_extent_unwritten_real(
2038         struct xfs_trans        *tp,
2039         xfs_inode_t             *ip,    /* incore inode pointer */
2040         int                     whichfork,
2041         struct xfs_iext_cursor  *icur,
2042         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2043         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2044         int                     *logflagsp) /* inode logging flags */
2045 {
2046         xfs_btree_cur_t         *cur;   /* btree cursor */
2047         int                     error;  /* error return value */
2048         int                     i;      /* temp state */
2049         struct xfs_ifork        *ifp;   /* inode fork pointer */
2050         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2051         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2052                                         /* left is 0, right is 1, prev is 2 */
2053         int                     rval=0; /* return value (logging flags) */
2054         int                     state = xfs_bmap_fork_to_state(whichfork);
2055         struct xfs_mount        *mp = ip->i_mount;
2056         struct xfs_bmbt_irec    old;
2057
2058         *logflagsp = 0;
2059
2060         cur = *curp;
2061         ifp = XFS_IFORK_PTR(ip, whichfork);
2062
2063         ASSERT(!isnullstartblock(new->br_startblock));
2064
2065         XFS_STATS_INC(mp, xs_add_exlist);
2066
2067 #define LEFT            r[0]
2068 #define RIGHT           r[1]
2069 #define PREV            r[2]
2070
2071         /*
2072          * Set up a bunch of variables to make the tests simpler.
2073          */
2074         error = 0;
2075         xfs_iext_get_extent(ifp, icur, &PREV);
2076         ASSERT(new->br_state != PREV.br_state);
2077         new_endoff = new->br_startoff + new->br_blockcount;
2078         ASSERT(PREV.br_startoff <= new->br_startoff);
2079         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2080
2081         /*
2082          * Set flags determining what part of the previous oldext allocation
2083          * extent is being replaced by a newext allocation.
2084          */
2085         if (PREV.br_startoff == new->br_startoff)
2086                 state |= BMAP_LEFT_FILLING;
2087         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2088                 state |= BMAP_RIGHT_FILLING;
2089
2090         /*
2091          * Check and set flags if this segment has a left neighbor.
2092          * Don't set contiguous if the combined extent would be too large.
2093          */
2094         if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2095                 state |= BMAP_LEFT_VALID;
2096                 if (isnullstartblock(LEFT.br_startblock))
2097                         state |= BMAP_LEFT_DELAY;
2098         }
2099
2100         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2101             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2102             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2103             LEFT.br_state == new->br_state &&
2104             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2105                 state |= BMAP_LEFT_CONTIG;
2106
2107         /*
2108          * Check and set flags if this segment has a right neighbor.
2109          * Don't set contiguous if the combined extent would be too large.
2110          * Also check for all-three-contiguous being too large.
2111          */
2112         if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2113                 state |= BMAP_RIGHT_VALID;
2114                 if (isnullstartblock(RIGHT.br_startblock))
2115                         state |= BMAP_RIGHT_DELAY;
2116         }
2117
2118         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119             new_endoff == RIGHT.br_startoff &&
2120             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121             new->br_state == RIGHT.br_state &&
2122             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124                        BMAP_RIGHT_FILLING)) !=
2125                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126                        BMAP_RIGHT_FILLING) ||
2127              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128                         <= MAXEXTLEN))
2129                 state |= BMAP_RIGHT_CONTIG;
2130
2131         /*
2132          * Switch out based on the FILLING and CONTIG state bits.
2133          */
2134         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138                 /*
2139                  * Setting all of a previous oldext extent to newext.
2140                  * The left and right neighbors are both contiguous with new.
2141                  */
2142                 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2143
2144                 xfs_iext_remove(ip, icur, state);
2145                 xfs_iext_remove(ip, icur, state);
2146                 xfs_iext_prev(ifp, icur);
2147                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2148                 ifp->if_nextents -= 2;
2149                 if (cur == NULL)
2150                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2151                 else {
2152                         rval = XFS_ILOG_CORE;
2153                         error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2154                         if (error)
2155                                 goto done;
2156                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2157                                 error = -EFSCORRUPTED;
2158                                 goto done;
2159                         }
2160                         if ((error = xfs_btree_delete(cur, &i)))
2161                                 goto done;
2162                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2163                                 error = -EFSCORRUPTED;
2164                                 goto done;
2165                         }
2166                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2167                                 goto done;
2168                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2169                                 error = -EFSCORRUPTED;
2170                                 goto done;
2171                         }
2172                         if ((error = xfs_btree_delete(cur, &i)))
2173                                 goto done;
2174                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2175                                 error = -EFSCORRUPTED;
2176                                 goto done;
2177                         }
2178                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2179                                 goto done;
2180                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2181                                 error = -EFSCORRUPTED;
2182                                 goto done;
2183                         }
2184                         error = xfs_bmbt_update(cur, &LEFT);
2185                         if (error)
2186                                 goto done;
2187                 }
2188                 break;
2189
2190         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2191                 /*
2192                  * Setting all of a previous oldext extent to newext.
2193                  * The left neighbor is contiguous, the right is not.
2194                  */
2195                 LEFT.br_blockcount += PREV.br_blockcount;
2196
2197                 xfs_iext_remove(ip, icur, state);
2198                 xfs_iext_prev(ifp, icur);
2199                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2200                 ifp->if_nextents--;
2201                 if (cur == NULL)
2202                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2203                 else {
2204                         rval = XFS_ILOG_CORE;
2205                         error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2206                         if (error)
2207                                 goto done;
2208                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2209                                 error = -EFSCORRUPTED;
2210                                 goto done;
2211                         }
2212                         if ((error = xfs_btree_delete(cur, &i)))
2213                                 goto done;
2214                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2215                                 error = -EFSCORRUPTED;
2216                                 goto done;
2217                         }
2218                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2219                                 goto done;
2220                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2221                                 error = -EFSCORRUPTED;
2222                                 goto done;
2223                         }
2224                         error = xfs_bmbt_update(cur, &LEFT);
2225                         if (error)
2226                                 goto done;
2227                 }
2228                 break;
2229
2230         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2231                 /*
2232                  * Setting all of a previous oldext extent to newext.
2233                  * The right neighbor is contiguous, the left is not.
2234                  */
2235                 PREV.br_blockcount += RIGHT.br_blockcount;
2236                 PREV.br_state = new->br_state;
2237
2238                 xfs_iext_next(ifp, icur);
2239                 xfs_iext_remove(ip, icur, state);
2240                 xfs_iext_prev(ifp, icur);
2241                 xfs_iext_update_extent(ip, state, icur, &PREV);
2242                 ifp->if_nextents--;
2243
2244                 if (cur == NULL)
2245                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2246                 else {
2247                         rval = XFS_ILOG_CORE;
2248                         error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2249                         if (error)
2250                                 goto done;
2251                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2252                                 error = -EFSCORRUPTED;
2253                                 goto done;
2254                         }
2255                         if ((error = xfs_btree_delete(cur, &i)))
2256                                 goto done;
2257                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2258                                 error = -EFSCORRUPTED;
2259                                 goto done;
2260                         }
2261                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2262                                 goto done;
2263                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2264                                 error = -EFSCORRUPTED;
2265                                 goto done;
2266                         }
2267                         error = xfs_bmbt_update(cur, &PREV);
2268                         if (error)
2269                                 goto done;
2270                 }
2271                 break;
2272
2273         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2274                 /*
2275                  * Setting all of a previous oldext extent to newext.
2276                  * Neither the left nor right neighbors are contiguous with
2277                  * the new one.
2278                  */
2279                 PREV.br_state = new->br_state;
2280                 xfs_iext_update_extent(ip, state, icur, &PREV);
2281
2282                 if (cur == NULL)
2283                         rval = XFS_ILOG_DEXT;
2284                 else {
2285                         rval = 0;
2286                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2287                         if (error)
2288                                 goto done;
2289                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2290                                 error = -EFSCORRUPTED;
2291                                 goto done;
2292                         }
2293                         error = xfs_bmbt_update(cur, &PREV);
2294                         if (error)
2295                                 goto done;
2296                 }
2297                 break;
2298
2299         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2300                 /*
2301                  * Setting the first part of a previous oldext extent to newext.
2302                  * The left neighbor is contiguous.
2303                  */
2304                 LEFT.br_blockcount += new->br_blockcount;
2305
2306                 old = PREV;
2307                 PREV.br_startoff += new->br_blockcount;
2308                 PREV.br_startblock += new->br_blockcount;
2309                 PREV.br_blockcount -= new->br_blockcount;
2310
2311                 xfs_iext_update_extent(ip, state, icur, &PREV);
2312                 xfs_iext_prev(ifp, icur);
2313                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2314
2315                 if (cur == NULL)
2316                         rval = XFS_ILOG_DEXT;
2317                 else {
2318                         rval = 0;
2319                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2320                         if (error)
2321                                 goto done;
2322                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2323                                 error = -EFSCORRUPTED;
2324                                 goto done;
2325                         }
2326                         error = xfs_bmbt_update(cur, &PREV);
2327                         if (error)
2328                                 goto done;
2329                         error = xfs_btree_decrement(cur, 0, &i);
2330                         if (error)
2331                                 goto done;
2332                         error = xfs_bmbt_update(cur, &LEFT);
2333                         if (error)
2334                                 goto done;
2335                 }
2336                 break;
2337
2338         case BMAP_LEFT_FILLING:
2339                 /*
2340                  * Setting the first part of a previous oldext extent to newext.
2341                  * The left neighbor is not contiguous.
2342                  */
2343                 old = PREV;
2344                 PREV.br_startoff += new->br_blockcount;
2345                 PREV.br_startblock += new->br_blockcount;
2346                 PREV.br_blockcount -= new->br_blockcount;
2347
2348                 xfs_iext_update_extent(ip, state, icur, &PREV);
2349                 xfs_iext_insert(ip, icur, new, state);
2350                 ifp->if_nextents++;
2351
2352                 if (cur == NULL)
2353                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354                 else {
2355                         rval = XFS_ILOG_CORE;
2356                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2357                         if (error)
2358                                 goto done;
2359                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2360                                 error = -EFSCORRUPTED;
2361                                 goto done;
2362                         }
2363                         error = xfs_bmbt_update(cur, &PREV);
2364                         if (error)
2365                                 goto done;
2366                         cur->bc_rec.b = *new;
2367                         if ((error = xfs_btree_insert(cur, &i)))
2368                                 goto done;
2369                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2370                                 error = -EFSCORRUPTED;
2371                                 goto done;
2372                         }
2373                 }
2374                 break;
2375
2376         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377                 /*
2378                  * Setting the last part of a previous oldext extent to newext.
2379                  * The right neighbor is contiguous with the new allocation.
2380                  */
2381                 old = PREV;
2382                 PREV.br_blockcount -= new->br_blockcount;
2383
2384                 RIGHT.br_startoff = new->br_startoff;
2385                 RIGHT.br_startblock = new->br_startblock;
2386                 RIGHT.br_blockcount += new->br_blockcount;
2387
2388                 xfs_iext_update_extent(ip, state, icur, &PREV);
2389                 xfs_iext_next(ifp, icur);
2390                 xfs_iext_update_extent(ip, state, icur, &RIGHT);
2391
2392                 if (cur == NULL)
2393                         rval = XFS_ILOG_DEXT;
2394                 else {
2395                         rval = 0;
2396                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2397                         if (error)
2398                                 goto done;
2399                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2400                                 error = -EFSCORRUPTED;
2401                                 goto done;
2402                         }
2403                         error = xfs_bmbt_update(cur, &PREV);
2404                         if (error)
2405                                 goto done;
2406                         error = xfs_btree_increment(cur, 0, &i);
2407                         if (error)
2408                                 goto done;
2409                         error = xfs_bmbt_update(cur, &RIGHT);
2410                         if (error)
2411                                 goto done;
2412                 }
2413                 break;
2414
2415         case BMAP_RIGHT_FILLING:
2416                 /*
2417                  * Setting the last part of a previous oldext extent to newext.
2418                  * The right neighbor is not contiguous.
2419                  */
2420                 old = PREV;
2421                 PREV.br_blockcount -= new->br_blockcount;
2422
2423                 xfs_iext_update_extent(ip, state, icur, &PREV);
2424                 xfs_iext_next(ifp, icur);
2425                 xfs_iext_insert(ip, icur, new, state);
2426                 ifp->if_nextents++;
2427
2428                 if (cur == NULL)
2429                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430                 else {
2431                         rval = XFS_ILOG_CORE;
2432                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2433                         if (error)
2434                                 goto done;
2435                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2436                                 error = -EFSCORRUPTED;
2437                                 goto done;
2438                         }
2439                         error = xfs_bmbt_update(cur, &PREV);
2440                         if (error)
2441                                 goto done;
2442                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2443                         if (error)
2444                                 goto done;
2445                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2446                                 error = -EFSCORRUPTED;
2447                                 goto done;
2448                         }
2449                         if ((error = xfs_btree_insert(cur, &i)))
2450                                 goto done;
2451                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2452                                 error = -EFSCORRUPTED;
2453                                 goto done;
2454                         }
2455                 }
2456                 break;
2457
2458         case 0:
2459                 /*
2460                  * Setting the middle part of a previous oldext extent to
2461                  * newext.  Contiguity is impossible here.
2462                  * One extent becomes three extents.
2463                  */
2464                 old = PREV;
2465                 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2466
2467                 r[0] = *new;
2468                 r[1].br_startoff = new_endoff;
2469                 r[1].br_blockcount =
2470                         old.br_startoff + old.br_blockcount - new_endoff;
2471                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2472                 r[1].br_state = PREV.br_state;
2473
2474                 xfs_iext_update_extent(ip, state, icur, &PREV);
2475                 xfs_iext_next(ifp, icur);
2476                 xfs_iext_insert(ip, icur, &r[1], state);
2477                 xfs_iext_insert(ip, icur, &r[0], state);
2478                 ifp->if_nextents += 2;
2479
2480                 if (cur == NULL)
2481                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2482                 else {
2483                         rval = XFS_ILOG_CORE;
2484                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2485                         if (error)
2486                                 goto done;
2487                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2488                                 error = -EFSCORRUPTED;
2489                                 goto done;
2490                         }
2491                         /* new right extent - oldext */
2492                         error = xfs_bmbt_update(cur, &r[1]);
2493                         if (error)
2494                                 goto done;
2495                         /* new left extent - oldext */
2496                         cur->bc_rec.b = PREV;
2497                         if ((error = xfs_btree_insert(cur, &i)))
2498                                 goto done;
2499                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2500                                 error = -EFSCORRUPTED;
2501                                 goto done;
2502                         }
2503                         /*
2504                          * Reset the cursor to the position of the new extent
2505                          * we are about to insert as we can't trust it after
2506                          * the previous insert.
2507                          */
2508                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2509                         if (error)
2510                                 goto done;
2511                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2512                                 error = -EFSCORRUPTED;
2513                                 goto done;
2514                         }
2515                         /* new middle extent - newext */
2516                         if ((error = xfs_btree_insert(cur, &i)))
2517                                 goto done;
2518                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2519                                 error = -EFSCORRUPTED;
2520                                 goto done;
2521                         }
2522                 }
2523                 break;
2524
2525         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2526         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2527         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2528         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2529         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2530         case BMAP_LEFT_CONTIG:
2531         case BMAP_RIGHT_CONTIG:
2532                 /*
2533                  * These cases are all impossible.
2534                  */
2535                 ASSERT(0);
2536         }
2537
2538         /* update reverse mappings */
2539         xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2540
2541         /* convert to a btree if necessary */
2542         if (xfs_bmap_needs_btree(ip, whichfork)) {
2543                 int     tmp_logflags;   /* partial log flag return val */
2544
2545                 ASSERT(cur == NULL);
2546                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2547                                 &tmp_logflags, whichfork);
2548                 *logflagsp |= tmp_logflags;
2549                 if (error)
2550                         goto done;
2551         }
2552
2553         /* clear out the allocated field, done with it now in any case. */
2554         if (cur) {
2555                 cur->bc_ino.allocated = 0;
2556                 *curp = cur;
2557         }
2558
2559         xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2560 done:
2561         *logflagsp |= rval;
2562         return error;
2563 #undef  LEFT
2564 #undef  RIGHT
2565 #undef  PREV
2566 }
2567
2568 /*
2569  * Convert a hole to a delayed allocation.
2570  */
2571 STATIC void
2572 xfs_bmap_add_extent_hole_delay(
2573         xfs_inode_t             *ip,    /* incore inode pointer */
2574         int                     whichfork,
2575         struct xfs_iext_cursor  *icur,
2576         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2577 {
2578         struct xfs_ifork        *ifp;   /* inode fork pointer */
2579         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2580         xfs_filblks_t           newlen=0;       /* new indirect size */
2581         xfs_filblks_t           oldlen=0;       /* old indirect size */
2582         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2583         int                     state = xfs_bmap_fork_to_state(whichfork);
2584         xfs_filblks_t           temp;    /* temp for indirect calculations */
2585
2586         ifp = XFS_IFORK_PTR(ip, whichfork);
2587         ASSERT(isnullstartblock(new->br_startblock));
2588
2589         /*
2590          * Check and set flags if this segment has a left neighbor
2591          */
2592         if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2593                 state |= BMAP_LEFT_VALID;
2594                 if (isnullstartblock(left.br_startblock))
2595                         state |= BMAP_LEFT_DELAY;
2596         }
2597
2598         /*
2599          * Check and set flags if the current (right) segment exists.
2600          * If it doesn't exist, we're converting the hole at end-of-file.
2601          */
2602         if (xfs_iext_get_extent(ifp, icur, &right)) {
2603                 state |= BMAP_RIGHT_VALID;
2604                 if (isnullstartblock(right.br_startblock))
2605                         state |= BMAP_RIGHT_DELAY;
2606         }
2607
2608         /*
2609          * Set contiguity flags on the left and right neighbors.
2610          * Don't let extents get too large, even if the pieces are contiguous.
2611          */
2612         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2613             left.br_startoff + left.br_blockcount == new->br_startoff &&
2614             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2615                 state |= BMAP_LEFT_CONTIG;
2616
2617         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2618             new->br_startoff + new->br_blockcount == right.br_startoff &&
2619             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2620             (!(state & BMAP_LEFT_CONTIG) ||
2621              (left.br_blockcount + new->br_blockcount +
2622               right.br_blockcount <= MAXEXTLEN)))
2623                 state |= BMAP_RIGHT_CONTIG;
2624
2625         /*
2626          * Switch out based on the contiguity flags.
2627          */
2628         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2629         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2630                 /*
2631                  * New allocation is contiguous with delayed allocations
2632                  * on the left and on the right.
2633                  * Merge all three into a single extent record.
2634                  */
2635                 temp = left.br_blockcount + new->br_blockcount +
2636                         right.br_blockcount;
2637
2638                 oldlen = startblockval(left.br_startblock) +
2639                         startblockval(new->br_startblock) +
2640                         startblockval(right.br_startblock);
2641                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2642                                          oldlen);
2643                 left.br_startblock = nullstartblock(newlen);
2644                 left.br_blockcount = temp;
2645
2646                 xfs_iext_remove(ip, icur, state);
2647                 xfs_iext_prev(ifp, icur);
2648                 xfs_iext_update_extent(ip, state, icur, &left);
2649                 break;
2650
2651         case BMAP_LEFT_CONTIG:
2652                 /*
2653                  * New allocation is contiguous with a delayed allocation
2654                  * on the left.
2655                  * Merge the new allocation with the left neighbor.
2656                  */
2657                 temp = left.br_blockcount + new->br_blockcount;
2658
2659                 oldlen = startblockval(left.br_startblock) +
2660                         startblockval(new->br_startblock);
2661                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2662                                          oldlen);
2663                 left.br_blockcount = temp;
2664                 left.br_startblock = nullstartblock(newlen);
2665
2666                 xfs_iext_prev(ifp, icur);
2667                 xfs_iext_update_extent(ip, state, icur, &left);
2668                 break;
2669
2670         case BMAP_RIGHT_CONTIG:
2671                 /*
2672                  * New allocation is contiguous with a delayed allocation
2673                  * on the right.
2674                  * Merge the new allocation with the right neighbor.
2675                  */
2676                 temp = new->br_blockcount + right.br_blockcount;
2677                 oldlen = startblockval(new->br_startblock) +
2678                         startblockval(right.br_startblock);
2679                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2680                                          oldlen);
2681                 right.br_startoff = new->br_startoff;
2682                 right.br_startblock = nullstartblock(newlen);
2683                 right.br_blockcount = temp;
2684                 xfs_iext_update_extent(ip, state, icur, &right);
2685                 break;
2686
2687         case 0:
2688                 /*
2689                  * New allocation is not contiguous with another
2690                  * delayed allocation.
2691                  * Insert a new entry.
2692                  */
2693                 oldlen = newlen = 0;
2694                 xfs_iext_insert(ip, icur, new, state);
2695                 break;
2696         }
2697         if (oldlen != newlen) {
2698                 ASSERT(oldlen > newlen);
2699                 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2700                                  false);
2701                 /*
2702                  * Nothing to do for disk quota accounting here.
2703                  */
2704                 xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2705         }
2706 }
2707
2708 /*
2709  * Convert a hole to a real allocation.
2710  */
2711 STATIC int                              /* error */
2712 xfs_bmap_add_extent_hole_real(
2713         struct xfs_trans        *tp,
2714         struct xfs_inode        *ip,
2715         int                     whichfork,
2716         struct xfs_iext_cursor  *icur,
2717         struct xfs_btree_cur    **curp,
2718         struct xfs_bmbt_irec    *new,
2719         int                     *logflagsp,
2720         int                     flags)
2721 {
2722         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2723         struct xfs_mount        *mp = ip->i_mount;
2724         struct xfs_btree_cur    *cur = *curp;
2725         int                     error;  /* error return value */
2726         int                     i;      /* temp state */
2727         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2728         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2729         int                     rval=0; /* return value (logging flags) */
2730         int                     state = xfs_bmap_fork_to_state(whichfork);
2731         struct xfs_bmbt_irec    old;
2732
2733         ASSERT(!isnullstartblock(new->br_startblock));
2734         ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2735
2736         XFS_STATS_INC(mp, xs_add_exlist);
2737
2738         /*
2739          * Check and set flags if this segment has a left neighbor.
2740          */
2741         if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2742                 state |= BMAP_LEFT_VALID;
2743                 if (isnullstartblock(left.br_startblock))
2744                         state |= BMAP_LEFT_DELAY;
2745         }
2746
2747         /*
2748          * Check and set flags if this segment has a current value.
2749          * Not true if we're inserting into the "hole" at eof.
2750          */
2751         if (xfs_iext_get_extent(ifp, icur, &right)) {
2752                 state |= BMAP_RIGHT_VALID;
2753                 if (isnullstartblock(right.br_startblock))
2754                         state |= BMAP_RIGHT_DELAY;
2755         }
2756
2757         /*
2758          * We're inserting a real allocation between "left" and "right".
2759          * Set the contiguity flags.  Don't let extents get too large.
2760          */
2761         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2762             left.br_startoff + left.br_blockcount == new->br_startoff &&
2763             left.br_startblock + left.br_blockcount == new->br_startblock &&
2764             left.br_state == new->br_state &&
2765             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2766                 state |= BMAP_LEFT_CONTIG;
2767
2768         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2769             new->br_startoff + new->br_blockcount == right.br_startoff &&
2770             new->br_startblock + new->br_blockcount == right.br_startblock &&
2771             new->br_state == right.br_state &&
2772             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2773             (!(state & BMAP_LEFT_CONTIG) ||
2774              left.br_blockcount + new->br_blockcount +
2775              right.br_blockcount <= MAXEXTLEN))
2776                 state |= BMAP_RIGHT_CONTIG;
2777
2778         error = 0;
2779         /*
2780          * Select which case we're in here, and implement it.
2781          */
2782         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2783         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2784                 /*
2785                  * New allocation is contiguous with real allocations on the
2786                  * left and on the right.
2787                  * Merge all three into a single extent record.
2788                  */
2789                 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2790
2791                 xfs_iext_remove(ip, icur, state);
2792                 xfs_iext_prev(ifp, icur);
2793                 xfs_iext_update_extent(ip, state, icur, &left);
2794                 ifp->if_nextents--;
2795
2796                 if (cur == NULL) {
2797                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2798                 } else {
2799                         rval = XFS_ILOG_CORE;
2800                         error = xfs_bmbt_lookup_eq(cur, &right, &i);
2801                         if (error)
2802                                 goto done;
2803                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2804                                 error = -EFSCORRUPTED;
2805                                 goto done;
2806                         }
2807                         error = xfs_btree_delete(cur, &i);
2808                         if (error)
2809                                 goto done;
2810                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2811                                 error = -EFSCORRUPTED;
2812                                 goto done;
2813                         }
2814                         error = xfs_btree_decrement(cur, 0, &i);
2815                         if (error)
2816                                 goto done;
2817                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2818                                 error = -EFSCORRUPTED;
2819                                 goto done;
2820                         }
2821                         error = xfs_bmbt_update(cur, &left);
2822                         if (error)
2823                                 goto done;
2824                 }
2825                 break;
2826
2827         case BMAP_LEFT_CONTIG:
2828                 /*
2829                  * New allocation is contiguous with a real allocation
2830                  * on the left.
2831                  * Merge the new allocation with the left neighbor.
2832                  */
2833                 old = left;
2834                 left.br_blockcount += new->br_blockcount;
2835
2836                 xfs_iext_prev(ifp, icur);
2837                 xfs_iext_update_extent(ip, state, icur, &left);
2838
2839                 if (cur == NULL) {
2840                         rval = xfs_ilog_fext(whichfork);
2841                 } else {
2842                         rval = 0;
2843                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2844                         if (error)
2845                                 goto done;
2846                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2847                                 error = -EFSCORRUPTED;
2848                                 goto done;
2849                         }
2850                         error = xfs_bmbt_update(cur, &left);
2851                         if (error)
2852                                 goto done;
2853                 }
2854                 break;
2855
2856         case BMAP_RIGHT_CONTIG:
2857                 /*
2858                  * New allocation is contiguous with a real allocation
2859                  * on the right.
2860                  * Merge the new allocation with the right neighbor.
2861                  */
2862                 old = right;
2863
2864                 right.br_startoff = new->br_startoff;
2865                 right.br_startblock = new->br_startblock;
2866                 right.br_blockcount += new->br_blockcount;
2867                 xfs_iext_update_extent(ip, state, icur, &right);
2868
2869                 if (cur == NULL) {
2870                         rval = xfs_ilog_fext(whichfork);
2871                 } else {
2872                         rval = 0;
2873                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2874                         if (error)
2875                                 goto done;
2876                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2877                                 error = -EFSCORRUPTED;
2878                                 goto done;
2879                         }
2880                         error = xfs_bmbt_update(cur, &right);
2881                         if (error)
2882                                 goto done;
2883                 }
2884                 break;
2885
2886         case 0:
2887                 /*
2888                  * New allocation is not contiguous with another
2889                  * real allocation.
2890                  * Insert a new entry.
2891                  */
2892                 xfs_iext_insert(ip, icur, new, state);
2893                 ifp->if_nextents++;
2894
2895                 if (cur == NULL) {
2896                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2897                 } else {
2898                         rval = XFS_ILOG_CORE;
2899                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2900                         if (error)
2901                                 goto done;
2902                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2903                                 error = -EFSCORRUPTED;
2904                                 goto done;
2905                         }
2906                         error = xfs_btree_insert(cur, &i);
2907                         if (error)
2908                                 goto done;
2909                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2910                                 error = -EFSCORRUPTED;
2911                                 goto done;
2912                         }
2913                 }
2914                 break;
2915         }
2916
2917         /* add reverse mapping unless caller opted out */
2918         if (!(flags & XFS_BMAPI_NORMAP))
2919                 xfs_rmap_map_extent(tp, ip, whichfork, new);
2920
2921         /* convert to a btree if necessary */
2922         if (xfs_bmap_needs_btree(ip, whichfork)) {
2923                 int     tmp_logflags;   /* partial log flag return val */
2924
2925                 ASSERT(cur == NULL);
2926                 error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2927                                 &tmp_logflags, whichfork);
2928                 *logflagsp |= tmp_logflags;
2929                 cur = *curp;
2930                 if (error)
2931                         goto done;
2932         }
2933
2934         /* clear out the allocated field, done with it now in any case. */
2935         if (cur)
2936                 cur->bc_ino.allocated = 0;
2937
2938         xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2939 done:
2940         *logflagsp |= rval;
2941         return error;
2942 }
2943
2944 /*
2945  * Functions used in the extent read, allocate and remove paths
2946  */
2947
2948 /*
2949  * Adjust the size of the new extent based on di_extsize and rt extsize.
2950  */
2951 int
2952 xfs_bmap_extsize_align(
2953         xfs_mount_t     *mp,
2954         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
2955         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
2956         xfs_extlen_t    extsz,          /* align to this extent size */
2957         int             rt,             /* is this a realtime inode? */
2958         int             eof,            /* is extent at end-of-file? */
2959         int             delay,          /* creating delalloc extent? */
2960         int             convert,        /* overwriting unwritten extent? */
2961         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
2962         xfs_extlen_t    *lenp)          /* in/out: aligned length */
2963 {
2964         xfs_fileoff_t   orig_off;       /* original offset */
2965         xfs_extlen_t    orig_alen;      /* original length */
2966         xfs_fileoff_t   orig_end;       /* original off+len */
2967         xfs_fileoff_t   nexto;          /* next file offset */
2968         xfs_fileoff_t   prevo;          /* previous file offset */
2969         xfs_fileoff_t   align_off;      /* temp for offset */
2970         xfs_extlen_t    align_alen;     /* temp for length */
2971         xfs_extlen_t    temp;           /* temp for calculations */
2972
2973         if (convert)
2974                 return 0;
2975
2976         orig_off = align_off = *offp;
2977         orig_alen = align_alen = *lenp;
2978         orig_end = orig_off + orig_alen;
2979
2980         /*
2981          * If this request overlaps an existing extent, then don't
2982          * attempt to perform any additional alignment.
2983          */
2984         if (!delay && !eof &&
2985             (orig_off >= gotp->br_startoff) &&
2986             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2987                 return 0;
2988         }
2989
2990         /*
2991          * If the file offset is unaligned vs. the extent size
2992          * we need to align it.  This will be possible unless
2993          * the file was previously written with a kernel that didn't
2994          * perform this alignment, or if a truncate shot us in the
2995          * foot.
2996          */
2997         div_u64_rem(orig_off, extsz, &temp);
2998         if (temp) {
2999                 align_alen += temp;
3000                 align_off -= temp;
3001         }
3002
3003         /* Same adjustment for the end of the requested area. */
3004         temp = (align_alen % extsz);
3005         if (temp)
3006                 align_alen += extsz - temp;
3007
3008         /*
3009          * For large extent hint sizes, the aligned extent might be larger than
3010          * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3011          * the length back under MAXEXTLEN. The outer allocation loops handle
3012          * short allocation just fine, so it is safe to do this. We only want to
3013          * do it when we are forced to, though, because it means more allocation
3014          * operations are required.
3015          */
3016         while (align_alen > MAXEXTLEN)
3017                 align_alen -= extsz;
3018         ASSERT(align_alen <= MAXEXTLEN);
3019
3020         /*
3021          * If the previous block overlaps with this proposed allocation
3022          * then move the start forward without adjusting the length.
3023          */
3024         if (prevp->br_startoff != NULLFILEOFF) {
3025                 if (prevp->br_startblock == HOLESTARTBLOCK)
3026                         prevo = prevp->br_startoff;
3027                 else
3028                         prevo = prevp->br_startoff + prevp->br_blockcount;
3029         } else
3030                 prevo = 0;
3031         if (align_off != orig_off && align_off < prevo)
3032                 align_off = prevo;
3033         /*
3034          * If the next block overlaps with this proposed allocation
3035          * then move the start back without adjusting the length,
3036          * but not before offset 0.
3037          * This may of course make the start overlap previous block,
3038          * and if we hit the offset 0 limit then the next block
3039          * can still overlap too.
3040          */
3041         if (!eof && gotp->br_startoff != NULLFILEOFF) {
3042                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3043                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3044                         nexto = gotp->br_startoff + gotp->br_blockcount;
3045                 else
3046                         nexto = gotp->br_startoff;
3047         } else
3048                 nexto = NULLFILEOFF;
3049         if (!eof &&
3050             align_off + align_alen != orig_end &&
3051             align_off + align_alen > nexto)
3052                 align_off = nexto > align_alen ? nexto - align_alen : 0;
3053         /*
3054          * If we're now overlapping the next or previous extent that
3055          * means we can't fit an extsz piece in this hole.  Just move
3056          * the start forward to the first valid spot and set
3057          * the length so we hit the end.
3058          */
3059         if (align_off != orig_off && align_off < prevo)
3060                 align_off = prevo;
3061         if (align_off + align_alen != orig_end &&
3062             align_off + align_alen > nexto &&
3063             nexto != NULLFILEOFF) {
3064                 ASSERT(nexto > prevo);
3065                 align_alen = nexto - align_off;
3066         }
3067
3068         /*
3069          * If realtime, and the result isn't a multiple of the realtime
3070          * extent size we need to remove blocks until it is.
3071          */
3072         if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3073                 /*
3074                  * We're not covering the original request, or
3075                  * we won't be able to once we fix the length.
3076                  */
3077                 if (orig_off < align_off ||
3078                     orig_end > align_off + align_alen ||
3079                     align_alen - temp < orig_alen)
3080                         return -EINVAL;
3081                 /*
3082                  * Try to fix it by moving the start up.
3083                  */
3084                 if (align_off + temp <= orig_off) {
3085                         align_alen -= temp;
3086                         align_off += temp;
3087                 }
3088                 /*
3089                  * Try to fix it by moving the end in.
3090                  */
3091                 else if (align_off + align_alen - temp >= orig_end)
3092                         align_alen -= temp;
3093                 /*
3094                  * Set the start to the minimum then trim the length.
3095                  */
3096                 else {
3097                         align_alen -= orig_off - align_off;
3098                         align_off = orig_off;
3099                         align_alen -= align_alen % mp->m_sb.sb_rextsize;
3100                 }
3101                 /*
3102                  * Result doesn't cover the request, fail it.
3103                  */
3104                 if (orig_off < align_off || orig_end > align_off + align_alen)
3105                         return -EINVAL;
3106         } else {
3107                 ASSERT(orig_off >= align_off);
3108                 /* see MAXEXTLEN handling above */
3109                 ASSERT(orig_end <= align_off + align_alen ||
3110                        align_alen + extsz > MAXEXTLEN);
3111         }
3112
3113 #ifdef DEBUG
3114         if (!eof && gotp->br_startoff != NULLFILEOFF)
3115                 ASSERT(align_off + align_alen <= gotp->br_startoff);
3116         if (prevp->br_startoff != NULLFILEOFF)
3117                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3118 #endif
3119
3120         *lenp = align_alen;
3121         *offp = align_off;
3122         return 0;
3123 }
3124
3125 #define XFS_ALLOC_GAP_UNITS     4
3126
3127 void
3128 xfs_bmap_adjacent(
3129         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3130 {
3131         xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3132         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3133         xfs_mount_t     *mp;            /* mount point structure */
3134         int             nullfb;         /* true if ap->firstblock isn't set */
3135         int             rt;             /* true if inode is realtime */
3136
3137 #define ISVALID(x,y)    \
3138         (rt ? \
3139                 (x) < mp->m_sb.sb_rblocks : \
3140                 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3141                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3142                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3143
3144         mp = ap->ip->i_mount;
3145         nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3146         rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3147                 (ap->datatype & XFS_ALLOC_USERDATA);
3148         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3149                                                         ap->tp->t_firstblock);
3150         /*
3151          * If allocating at eof, and there's a previous real block,
3152          * try to use its last block as our starting point.
3153          */
3154         if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3155             !isnullstartblock(ap->prev.br_startblock) &&
3156             ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3157                     ap->prev.br_startblock)) {
3158                 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3159                 /*
3160                  * Adjust for the gap between prevp and us.
3161                  */
3162                 adjust = ap->offset -
3163                         (ap->prev.br_startoff + ap->prev.br_blockcount);
3164                 if (adjust &&
3165                     ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3166                         ap->blkno += adjust;
3167         }
3168         /*
3169          * If not at eof, then compare the two neighbor blocks.
3170          * Figure out whether either one gives us a good starting point,
3171          * and pick the better one.
3172          */
3173         else if (!ap->eof) {
3174                 xfs_fsblock_t   gotbno;         /* right side block number */
3175                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
3176                 xfs_fsblock_t   prevbno;        /* left side block number */
3177                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
3178
3179                 /*
3180                  * If there's a previous (left) block, select a requested
3181                  * start block based on it.
3182                  */
3183                 if (ap->prev.br_startoff != NULLFILEOFF &&
3184                     !isnullstartblock(ap->prev.br_startblock) &&
3185                     (prevbno = ap->prev.br_startblock +
3186                                ap->prev.br_blockcount) &&
3187                     ISVALID(prevbno, ap->prev.br_startblock)) {
3188                         /*
3189                          * Calculate gap to end of previous block.
3190                          */
3191                         adjust = prevdiff = ap->offset -
3192                                 (ap->prev.br_startoff +
3193                                  ap->prev.br_blockcount);
3194                         /*
3195                          * Figure the startblock based on the previous block's
3196                          * end and the gap size.
3197                          * Heuristic!
3198                          * If the gap is large relative to the piece we're
3199                          * allocating, or using it gives us an invalid block
3200                          * number, then just use the end of the previous block.
3201                          */
3202                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3203                             ISVALID(prevbno + prevdiff,
3204                                     ap->prev.br_startblock))
3205                                 prevbno += adjust;
3206                         else
3207                                 prevdiff += adjust;
3208                         /*
3209                          * If the firstblock forbids it, can't use it,
3210                          * must use default.
3211                          */
3212                         if (!rt && !nullfb &&
3213                             XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3214                                 prevbno = NULLFSBLOCK;
3215                 }
3216                 /*
3217                  * No previous block or can't follow it, just default.
3218                  */
3219                 else
3220                         prevbno = NULLFSBLOCK;
3221                 /*
3222                  * If there's a following (right) block, select a requested
3223                  * start block based on it.
3224                  */
3225                 if (!isnullstartblock(ap->got.br_startblock)) {
3226                         /*
3227                          * Calculate gap to start of next block.
3228                          */
3229                         adjust = gotdiff = ap->got.br_startoff - ap->offset;
3230                         /*
3231                          * Figure the startblock based on the next block's
3232                          * start and the gap size.
3233                          */
3234                         gotbno = ap->got.br_startblock;
3235                         /*
3236                          * Heuristic!
3237                          * If the gap is large relative to the piece we're
3238                          * allocating, or using it gives us an invalid block
3239                          * number, then just use the start of the next block
3240                          * offset by our length.
3241                          */
3242                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3243                             ISVALID(gotbno - gotdiff, gotbno))
3244                                 gotbno -= adjust;
3245                         else if (ISVALID(gotbno - ap->length, gotbno)) {
3246                                 gotbno -= ap->length;
3247                                 gotdiff += adjust - ap->length;
3248                         } else
3249                                 gotdiff += adjust;
3250                         /*
3251                          * If the firstblock forbids it, can't use it,
3252                          * must use default.
3253                          */
3254                         if (!rt && !nullfb &&
3255                             XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3256                                 gotbno = NULLFSBLOCK;
3257                 }
3258                 /*
3259                  * No next block, just default.
3260                  */
3261                 else
3262                         gotbno = NULLFSBLOCK;
3263                 /*
3264                  * If both valid, pick the better one, else the only good
3265                  * one, else ap->blkno is already set (to 0 or the inode block).
3266                  */
3267                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3268                         ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3269                 else if (prevbno != NULLFSBLOCK)
3270                         ap->blkno = prevbno;
3271                 else if (gotbno != NULLFSBLOCK)
3272                         ap->blkno = gotbno;
3273         }
3274 #undef ISVALID
3275 }
3276
3277 static int
3278 xfs_bmap_longest_free_extent(
3279         struct xfs_trans        *tp,
3280         xfs_agnumber_t          ag,
3281         xfs_extlen_t            *blen,
3282         int                     *notinit)
3283 {
3284         struct xfs_mount        *mp = tp->t_mountp;
3285         struct xfs_perag        *pag;
3286         xfs_extlen_t            longest;
3287         int                     error = 0;
3288
3289         pag = xfs_perag_get(mp, ag);
3290         if (!pag->pagf_init) {
3291                 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3292                 if (error) {
3293                         /* Couldn't lock the AGF, so skip this AG. */
3294                         if (error == -EAGAIN) {
3295                                 *notinit = 1;
3296                                 error = 0;
3297                         }
3298                         goto out;
3299                 }
3300         }
3301
3302         longest = xfs_alloc_longest_free_extent(pag,
3303                                 xfs_alloc_min_freelist(mp, pag),
3304                                 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3305         if (*blen < longest)
3306                 *blen = longest;
3307
3308 out:
3309         xfs_perag_put(pag);
3310         return error;
3311 }
3312
3313 static void
3314 xfs_bmap_select_minlen(
3315         struct xfs_bmalloca     *ap,
3316         struct xfs_alloc_arg    *args,
3317         xfs_extlen_t            *blen,
3318         int                     notinit)
3319 {
3320         if (notinit || *blen < ap->minlen) {
3321                 /*
3322                  * Since we did a BUF_TRYLOCK above, it is possible that
3323                  * there is space for this request.
3324                  */
3325                 args->minlen = ap->minlen;
3326         } else if (*blen < args->maxlen) {
3327                 /*
3328                  * If the best seen length is less than the request length,
3329                  * use the best as the minimum.
3330                  */
3331                 args->minlen = *blen;
3332         } else {
3333                 /*
3334                  * Otherwise we've seen an extent as big as maxlen, use that
3335                  * as the minimum.
3336                  */
3337                 args->minlen = args->maxlen;
3338         }
3339 }
3340
3341 STATIC int
3342 xfs_bmap_btalloc_nullfb(
3343         struct xfs_bmalloca     *ap,
3344         struct xfs_alloc_arg    *args,
3345         xfs_extlen_t            *blen)
3346 {
3347         struct xfs_mount        *mp = ap->ip->i_mount;
3348         xfs_agnumber_t          ag, startag;
3349         int                     notinit = 0;
3350         int                     error;
3351
3352         args->type = XFS_ALLOCTYPE_START_BNO;
3353         args->total = ap->total;
3354
3355         startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3356         if (startag == NULLAGNUMBER)
3357                 startag = ag = 0;
3358
3359         while (*blen < args->maxlen) {
3360                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3361                                                      &notinit);
3362                 if (error)
3363                         return error;
3364
3365                 if (++ag == mp->m_sb.sb_agcount)
3366                         ag = 0;
3367                 if (ag == startag)
3368                         break;
3369         }
3370
3371         xfs_bmap_select_minlen(ap, args, blen, notinit);
3372         return 0;
3373 }
3374
3375 STATIC int
3376 xfs_bmap_btalloc_filestreams(
3377         struct xfs_bmalloca     *ap,
3378         struct xfs_alloc_arg    *args,
3379         xfs_extlen_t            *blen)
3380 {
3381         struct xfs_mount        *mp = ap->ip->i_mount;
3382         xfs_agnumber_t          ag;
3383         int                     notinit = 0;
3384         int                     error;
3385
3386         args->type = XFS_ALLOCTYPE_NEAR_BNO;
3387         args->total = ap->total;
3388
3389         ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3390         if (ag == NULLAGNUMBER)
3391                 ag = 0;
3392
3393         error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3394         if (error)
3395                 return error;
3396
3397         if (*blen < args->maxlen) {
3398                 error = xfs_filestream_new_ag(ap, &ag);
3399                 if (error)
3400                         return error;
3401
3402                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3403                                                      &notinit);
3404                 if (error)
3405                         return error;
3406
3407         }
3408
3409         xfs_bmap_select_minlen(ap, args, blen, notinit);
3410
3411         /*
3412          * Set the failure fallback case to look in the selected AG as stream
3413          * may have moved.
3414          */
3415         ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3416         return 0;
3417 }
3418
3419 /* Update all inode and quota accounting for the allocation we just did. */
3420 static void
3421 xfs_bmap_btalloc_accounting(
3422         struct xfs_bmalloca     *ap,
3423         struct xfs_alloc_arg    *args)
3424 {
3425         if (ap->flags & XFS_BMAPI_COWFORK) {
3426                 /*
3427                  * COW fork blocks are in-core only and thus are treated as
3428                  * in-core quota reservation (like delalloc blocks) even when
3429                  * converted to real blocks. The quota reservation is not
3430                  * accounted to disk until blocks are remapped to the data
3431                  * fork. So if these blocks were previously delalloc, we
3432                  * already have quota reservation and there's nothing to do
3433                  * yet.
3434                  */
3435                 if (ap->wasdel) {
3436                         xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3437                         return;
3438                 }
3439
3440                 /*
3441                  * Otherwise, we've allocated blocks in a hole. The transaction
3442                  * has acquired in-core quota reservation for this extent.
3443                  * Rather than account these as real blocks, however, we reduce
3444                  * the transaction quota reservation based on the allocation.
3445                  * This essentially transfers the transaction quota reservation
3446                  * to that of a delalloc extent.
3447                  */
3448                 ap->ip->i_delayed_blks += args->len;
3449                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3450                                 -(long)args->len);
3451                 return;
3452         }
3453
3454         /* data/attr fork only */
3455         ap->ip->i_d.di_nblocks += args->len;
3456         xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3457         if (ap->wasdel) {
3458                 ap->ip->i_delayed_blks -= args->len;
3459                 xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3460         }
3461         xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3462                 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3463                 args->len);
3464 }
3465
3466 STATIC int
3467 xfs_bmap_btalloc(
3468         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3469 {
3470         xfs_mount_t     *mp;            /* mount point structure */
3471         xfs_alloctype_t atype = 0;      /* type for allocation routines */
3472         xfs_extlen_t    align = 0;      /* minimum allocation alignment */
3473         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3474         xfs_agnumber_t  ag;
3475         xfs_alloc_arg_t args;
3476         xfs_fileoff_t   orig_offset;
3477         xfs_extlen_t    orig_length;
3478         xfs_extlen_t    blen;
3479         xfs_extlen_t    nextminlen = 0;
3480         int             nullfb;         /* true if ap->firstblock isn't set */
3481         int             isaligned;
3482         int             tryagain;
3483         int             error;
3484         int             stripe_align;
3485
3486         ASSERT(ap->length);
3487         orig_offset = ap->offset;
3488         orig_length = ap->length;
3489
3490         mp = ap->ip->i_mount;
3491
3492         /* stripe alignment for allocation is determined by mount parameters */
3493         stripe_align = 0;
3494         if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3495                 stripe_align = mp->m_swidth;
3496         else if (mp->m_dalign)
3497                 stripe_align = mp->m_dalign;
3498
3499         if (ap->flags & XFS_BMAPI_COWFORK)
3500                 align = xfs_get_cowextsz_hint(ap->ip);
3501         else if (ap->datatype & XFS_ALLOC_USERDATA)
3502                 align = xfs_get_extsz_hint(ap->ip);
3503         if (align) {
3504                 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3505                                                 align, 0, ap->eof, 0, ap->conv,
3506                                                 &ap->offset, &ap->length);
3507                 ASSERT(!error);
3508                 ASSERT(ap->length);
3509         }
3510
3511
3512         nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3513         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3514                                                         ap->tp->t_firstblock);
3515         if (nullfb) {
3516                 if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3517                     xfs_inode_is_filestream(ap->ip)) {
3518                         ag = xfs_filestream_lookup_ag(ap->ip);
3519                         ag = (ag != NULLAGNUMBER) ? ag : 0;
3520                         ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3521                 } else {
3522                         ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3523                 }
3524         } else
3525                 ap->blkno = ap->tp->t_firstblock;
3526
3527         xfs_bmap_adjacent(ap);
3528
3529         /*
3530          * If allowed, use ap->blkno; otherwise must use firstblock since
3531          * it's in the right allocation group.
3532          */
3533         if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3534                 ;
3535         else
3536                 ap->blkno = ap->tp->t_firstblock;
3537         /*
3538          * Normal allocation, done through xfs_alloc_vextent.
3539          */
3540         tryagain = isaligned = 0;
3541         memset(&args, 0, sizeof(args));
3542         args.tp = ap->tp;
3543         args.mp = mp;
3544         args.fsbno = ap->blkno;
3545         args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3546
3547         /* Trim the allocation back to the maximum an AG can fit. */
3548         args.maxlen = min(ap->length, mp->m_ag_max_usable);
3549         blen = 0;
3550         if (nullfb) {
3551                 /*
3552                  * Search for an allocation group with a single extent large
3553                  * enough for the request.  If one isn't found, then adjust
3554                  * the minimum allocation size to the largest space found.
3555                  */
3556                 if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3557                     xfs_inode_is_filestream(ap->ip))
3558                         error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3559                 else
3560                         error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3561                 if (error)
3562                         return error;
3563         } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3564                 if (xfs_inode_is_filestream(ap->ip))
3565                         args.type = XFS_ALLOCTYPE_FIRST_AG;
3566                 else
3567                         args.type = XFS_ALLOCTYPE_START_BNO;
3568                 args.total = args.minlen = ap->minlen;
3569         } else {
3570                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3571                 args.total = ap->total;
3572                 args.minlen = ap->minlen;
3573         }
3574         /* apply extent size hints if obtained earlier */
3575         if (align) {
3576                 args.prod = align;
3577                 div_u64_rem(ap->offset, args.prod, &args.mod);
3578                 if (args.mod)
3579                         args.mod = args.prod - args.mod;
3580         } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3581                 args.prod = 1;
3582                 args.mod = 0;
3583         } else {
3584                 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3585                 div_u64_rem(ap->offset, args.prod, &args.mod);
3586                 if (args.mod)
3587                         args.mod = args.prod - args.mod;
3588         }
3589         /*
3590          * If we are not low on available data blocks, and the underlying
3591          * logical volume manager is a stripe, and the file offset is zero then
3592          * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3593          * is only set if the allocation length is >= the stripe unit and the
3594          * allocation offset is at the end of file.
3595          */
3596         if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3597                 if (!ap->offset) {
3598                         args.alignment = stripe_align;
3599                         atype = args.type;
3600                         isaligned = 1;
3601                         /*
3602                          * Adjust minlen to try and preserve alignment if we
3603                          * can't guarantee an aligned maxlen extent.
3604                          */
3605                         if (blen > args.alignment &&
3606                             blen <= args.maxlen + args.alignment)
3607                                 args.minlen = blen - args.alignment;
3608                         args.minalignslop = 0;
3609                 } else {
3610                         /*
3611                          * First try an exact bno allocation.
3612                          * If it fails then do a near or start bno
3613                          * allocation with alignment turned on.
3614                          */
3615                         atype = args.type;
3616                         tryagain = 1;
3617                         args.type = XFS_ALLOCTYPE_THIS_BNO;
3618                         args.alignment = 1;
3619                         /*
3620                          * Compute the minlen+alignment for the
3621                          * next case.  Set slop so that the value
3622                          * of minlen+alignment+slop doesn't go up
3623                          * between the calls.
3624                          */
3625                         if (blen > stripe_align && blen <= args.maxlen)
3626                                 nextminlen = blen - stripe_align;
3627                         else
3628                                 nextminlen = args.minlen;
3629                         if (nextminlen + stripe_align > args.minlen + 1)
3630                                 args.minalignslop =
3631                                         nextminlen + stripe_align -
3632                                         args.minlen - 1;
3633                         else
3634                                 args.minalignslop = 0;
3635                 }
3636         } else {
3637                 args.alignment = 1;
3638                 args.minalignslop = 0;
3639         }
3640         args.minleft = ap->minleft;
3641         args.wasdel = ap->wasdel;
3642         args.resv = XFS_AG_RESV_NONE;
3643         args.datatype = ap->datatype;
3644
3645         error = xfs_alloc_vextent(&args);
3646         if (error)
3647                 return error;
3648
3649         if (tryagain && args.fsbno == NULLFSBLOCK) {
3650                 /*
3651                  * Exact allocation failed. Now try with alignment
3652                  * turned on.
3653                  */
3654                 args.type = atype;
3655                 args.fsbno = ap->blkno;
3656                 args.alignment = stripe_align;
3657                 args.minlen = nextminlen;
3658                 args.minalignslop = 0;
3659                 isaligned = 1;
3660                 if ((error = xfs_alloc_vextent(&args)))
3661                         return error;
3662         }
3663         if (isaligned && args.fsbno == NULLFSBLOCK) {
3664                 /*
3665                  * allocation failed, so turn off alignment and
3666                  * try again.
3667                  */
3668                 args.type = atype;
3669                 args.fsbno = ap->blkno;
3670                 args.alignment = 0;
3671                 if ((error = xfs_alloc_vextent(&args)))
3672                         return error;
3673         }
3674         if (args.fsbno == NULLFSBLOCK && nullfb &&
3675             args.minlen > ap->minlen) {
3676                 args.minlen = ap->minlen;
3677                 args.type = XFS_ALLOCTYPE_START_BNO;
3678                 args.fsbno = ap->blkno;
3679                 if ((error = xfs_alloc_vextent(&args)))
3680                         return error;
3681         }
3682         if (args.fsbno == NULLFSBLOCK && nullfb) {
3683                 args.fsbno = 0;
3684                 args.type = XFS_ALLOCTYPE_FIRST_AG;
3685                 args.total = ap->minlen;
3686                 if ((error = xfs_alloc_vextent(&args)))
3687                         return error;
3688                 ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3689         }
3690         if (args.fsbno != NULLFSBLOCK) {
3691                 /*
3692                  * check the allocation happened at the same or higher AG than
3693                  * the first block that was allocated.
3694                  */
3695                 ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3696                        XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3697                        XFS_FSB_TO_AGNO(mp, args.fsbno));
3698
3699                 ap->blkno = args.fsbno;
3700                 if (ap->tp->t_firstblock == NULLFSBLOCK)
3701                         ap->tp->t_firstblock = args.fsbno;
3702                 ASSERT(nullfb || fb_agno <= args.agno);
3703                 ap->length = args.len;
3704                 /*
3705                  * If the extent size hint is active, we tried to round the
3706                  * caller's allocation request offset down to extsz and the
3707                  * length up to another extsz boundary.  If we found a free
3708                  * extent we mapped it in starting at this new offset.  If the
3709                  * newly mapped space isn't long enough to cover any of the
3710                  * range of offsets that was originally requested, move the
3711                  * mapping up so that we can fill as much of the caller's
3712                  * original request as possible.  Free space is apparently
3713                  * very fragmented so we're unlikely to be able to satisfy the
3714                  * hints anyway.
3715                  */
3716                 if (ap->length <= orig_length)
3717                         ap->offset = orig_offset;
3718                 else if (ap->offset + ap->length < orig_offset + orig_length)
3719                         ap->offset = orig_offset + orig_length - ap->length;
3720                 xfs_bmap_btalloc_accounting(ap, &args);
3721         } else {
3722                 ap->blkno = NULLFSBLOCK;
3723                 ap->length = 0;
3724         }
3725         return 0;
3726 }
3727
3728 /* Trim extent to fit a logical block range. */
3729 void
3730 xfs_trim_extent(
3731         struct xfs_bmbt_irec    *irec,
3732         xfs_fileoff_t           bno,
3733         xfs_filblks_t           len)
3734 {
3735         xfs_fileoff_t           distance;
3736         xfs_fileoff_t           end = bno + len;
3737
3738         if (irec->br_startoff + irec->br_blockcount <= bno ||
3739             irec->br_startoff >= end) {
3740                 irec->br_blockcount = 0;
3741                 return;
3742         }
3743
3744         if (irec->br_startoff < bno) {
3745                 distance = bno - irec->br_startoff;
3746                 if (isnullstartblock(irec->br_startblock))
3747                         irec->br_startblock = DELAYSTARTBLOCK;
3748                 if (irec->br_startblock != DELAYSTARTBLOCK &&
3749                     irec->br_startblock != HOLESTARTBLOCK)
3750                         irec->br_startblock += distance;
3751                 irec->br_startoff += distance;
3752                 irec->br_blockcount -= distance;
3753         }
3754
3755         if (end < irec->br_startoff + irec->br_blockcount) {
3756                 distance = irec->br_startoff + irec->br_blockcount - end;
3757                 irec->br_blockcount -= distance;
3758         }
3759 }
3760
3761 /*
3762  * Trim the returned map to the required bounds
3763  */
3764 STATIC void
3765 xfs_bmapi_trim_map(
3766         struct xfs_bmbt_irec    *mval,
3767         struct xfs_bmbt_irec    *got,
3768         xfs_fileoff_t           *bno,
3769         xfs_filblks_t           len,
3770         xfs_fileoff_t           obno,
3771         xfs_fileoff_t           end,
3772         int                     n,
3773         int                     flags)
3774 {
3775         if ((flags & XFS_BMAPI_ENTIRE) ||
3776             got->br_startoff + got->br_blockcount <= obno) {
3777                 *mval = *got;
3778                 if (isnullstartblock(got->br_startblock))
3779                         mval->br_startblock = DELAYSTARTBLOCK;
3780                 return;
3781         }
3782
3783         if (obno > *bno)
3784                 *bno = obno;
3785         ASSERT((*bno >= obno) || (n == 0));
3786         ASSERT(*bno < end);
3787         mval->br_startoff = *bno;
3788         if (isnullstartblock(got->br_startblock))
3789                 mval->br_startblock = DELAYSTARTBLOCK;
3790         else
3791                 mval->br_startblock = got->br_startblock +
3792                                         (*bno - got->br_startoff);
3793         /*
3794          * Return the minimum of what we got and what we asked for for
3795          * the length.  We can use the len variable here because it is
3796          * modified below and we could have been there before coming
3797          * here if the first part of the allocation didn't overlap what
3798          * was asked for.
3799          */
3800         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3801                         got->br_blockcount - (*bno - got->br_startoff));
3802         mval->br_state = got->br_state;
3803         ASSERT(mval->br_blockcount <= len);
3804         return;
3805 }
3806
3807 /*
3808  * Update and validate the extent map to return
3809  */
3810 STATIC void
3811 xfs_bmapi_update_map(
3812         struct xfs_bmbt_irec    **map,
3813         xfs_fileoff_t           *bno,
3814         xfs_filblks_t           *len,
3815         xfs_fileoff_t           obno,
3816         xfs_fileoff_t           end,
3817         int                     *n,
3818         int                     flags)
3819 {
3820         xfs_bmbt_irec_t *mval = *map;
3821
3822         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3823                ((mval->br_startoff + mval->br_blockcount) <= end));
3824         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3825                (mval->br_startoff < obno));
3826
3827         *bno = mval->br_startoff + mval->br_blockcount;
3828         *len = end - *bno;
3829         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3830                 /* update previous map with new information */
3831                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3832                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3833                 ASSERT(mval->br_state == mval[-1].br_state);
3834                 mval[-1].br_blockcount = mval->br_blockcount;
3835                 mval[-1].br_state = mval->br_state;
3836         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3837                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
3838                    mval[-1].br_startblock != HOLESTARTBLOCK &&
3839                    mval->br_startblock == mval[-1].br_startblock +
3840                                           mval[-1].br_blockcount &&
3841                    mval[-1].br_state == mval->br_state) {
3842                 ASSERT(mval->br_startoff ==
3843                        mval[-1].br_startoff + mval[-1].br_blockcount);
3844                 mval[-1].br_blockcount += mval->br_blockcount;
3845         } else if (*n > 0 &&
3846                    mval->br_startblock == DELAYSTARTBLOCK &&
3847                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
3848                    mval->br_startoff ==
3849                    mval[-1].br_startoff + mval[-1].br_blockcount) {
3850                 mval[-1].br_blockcount += mval->br_blockcount;
3851                 mval[-1].br_state = mval->br_state;
3852         } else if (!((*n == 0) &&
3853                      ((mval->br_startoff + mval->br_blockcount) <=
3854                       obno))) {
3855                 mval++;
3856                 (*n)++;
3857         }
3858         *map = mval;
3859 }
3860
3861 /*
3862  * Map file blocks to filesystem blocks without allocation.
3863  */
3864 int
3865 xfs_bmapi_read(
3866         struct xfs_inode        *ip,
3867         xfs_fileoff_t           bno,
3868         xfs_filblks_t           len,
3869         struct xfs_bmbt_irec    *mval,
3870         int                     *nmap,
3871         int                     flags)
3872 {
3873         struct xfs_mount        *mp = ip->i_mount;
3874         int                     whichfork = xfs_bmapi_whichfork(flags);
3875         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3876         struct xfs_bmbt_irec    got;
3877         xfs_fileoff_t           obno;
3878         xfs_fileoff_t           end;
3879         struct xfs_iext_cursor  icur;
3880         int                     error;
3881         bool                    eof = false;
3882         int                     n = 0;
3883
3884         ASSERT(*nmap >= 1);
3885         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3886         ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3887
3888         if (WARN_ON_ONCE(!ifp))
3889                 return -EFSCORRUPTED;
3890
3891         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3892             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3893                 return -EFSCORRUPTED;
3894
3895         if (XFS_FORCED_SHUTDOWN(mp))
3896                 return -EIO;
3897
3898         XFS_STATS_INC(mp, xs_blk_mapr);
3899
3900         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3901                 error = xfs_iread_extents(NULL, ip, whichfork);
3902                 if (error)
3903                         return error;
3904         }
3905
3906         if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3907                 eof = true;
3908         end = bno + len;
3909         obno = bno;
3910
3911         while (bno < end && n < *nmap) {
3912                 /* Reading past eof, act as though there's a hole up to end. */
3913                 if (eof)
3914                         got.br_startoff = end;
3915                 if (got.br_startoff > bno) {
3916                         /* Reading in a hole.  */
3917                         mval->br_startoff = bno;
3918                         mval->br_startblock = HOLESTARTBLOCK;
3919                         mval->br_blockcount =
3920                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3921                         mval->br_state = XFS_EXT_NORM;
3922                         bno += mval->br_blockcount;
3923                         len -= mval->br_blockcount;
3924                         mval++;
3925                         n++;
3926                         continue;
3927                 }
3928
3929                 /* set up the extent map to return. */
3930                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3931                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3932
3933                 /* If we're done, stop now. */
3934                 if (bno >= end || n >= *nmap)
3935                         break;
3936
3937                 /* Else go on to the next record. */
3938                 if (!xfs_iext_next_extent(ifp, &icur, &got))
3939                         eof = true;
3940         }
3941         *nmap = n;
3942         return 0;
3943 }
3944
3945 /*
3946  * Add a delayed allocation extent to an inode. Blocks are reserved from the
3947  * global pool and the extent inserted into the inode in-core extent tree.
3948  *
3949  * On entry, got refers to the first extent beyond the offset of the extent to
3950  * allocate or eof is specified if no such extent exists. On return, got refers
3951  * to the extent record that was inserted to the inode fork.
3952  *
3953  * Note that the allocated extent may have been merged with contiguous extents
3954  * during insertion into the inode fork. Thus, got does not reflect the current
3955  * state of the inode fork on return. If necessary, the caller can use lastx to
3956  * look up the updated record in the inode fork.
3957  */
3958 int
3959 xfs_bmapi_reserve_delalloc(
3960         struct xfs_inode        *ip,
3961         int                     whichfork,
3962         xfs_fileoff_t           off,
3963         xfs_filblks_t           len,
3964         xfs_filblks_t           prealloc,
3965         struct xfs_bmbt_irec    *got,
3966         struct xfs_iext_cursor  *icur,
3967         int                     eof)
3968 {
3969         struct xfs_mount        *mp = ip->i_mount;
3970         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3971         xfs_extlen_t            alen;
3972         xfs_extlen_t            indlen;
3973         int                     error;
3974         xfs_fileoff_t           aoff = off;
3975
3976         /*
3977          * Cap the alloc length. Keep track of prealloc so we know whether to
3978          * tag the inode before we return.
3979          */
3980         alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3981         if (!eof)
3982                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3983         if (prealloc && alen >= len)
3984                 prealloc = alen - len;
3985
3986         /* Figure out the extent size, adjust alen */
3987         if (whichfork == XFS_COW_FORK) {
3988                 struct xfs_bmbt_irec    prev;
3989                 xfs_extlen_t            extsz = xfs_get_cowextsz_hint(ip);
3990
3991                 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3992                         prev.br_startoff = NULLFILEOFF;
3993
3994                 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3995                                                1, 0, &aoff, &alen);
3996                 ASSERT(!error);
3997         }
3998
3999         /*
4000          * Make a transaction-less quota reservation for delayed allocation
4001          * blocks.  This number gets adjusted later.  We return if we haven't
4002          * allocated blocks already inside this loop.
4003          */
4004         error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4005                                                 XFS_QMOPT_RES_REGBLKS);
4006         if (error)
4007                 return error;
4008
4009         /*
4010          * Split changing sb for alen and indlen since they could be coming
4011          * from different places.
4012          */
4013         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4014         ASSERT(indlen > 0);
4015
4016         error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4017         if (error)
4018                 goto out_unreserve_quota;
4019
4020         error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4021         if (error)
4022                 goto out_unreserve_blocks;
4023
4024
4025         ip->i_delayed_blks += alen;
4026         xfs_mod_delalloc(ip->i_mount, alen + indlen);
4027
4028         got->br_startoff = aoff;
4029         got->br_startblock = nullstartblock(indlen);
4030         got->br_blockcount = alen;
4031         got->br_state = XFS_EXT_NORM;
4032
4033         xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4034
4035         /*
4036          * Tag the inode if blocks were preallocated. Note that COW fork
4037          * preallocation can occur at the start or end of the extent, even when
4038          * prealloc == 0, so we must also check the aligned offset and length.
4039          */
4040         if (whichfork == XFS_DATA_FORK && prealloc)
4041                 xfs_inode_set_eofblocks_tag(ip);
4042         if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4043                 xfs_inode_set_cowblocks_tag(ip);
4044
4045         return 0;
4046
4047 out_unreserve_blocks:
4048         xfs_mod_fdblocks(mp, alen, false);
4049 out_unreserve_quota:
4050         if (XFS_IS_QUOTA_ON(mp))
4051                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
4052                                                 XFS_QMOPT_RES_REGBLKS);
4053         return error;
4054 }
4055
4056 static int
4057 xfs_bmap_alloc_userdata(
4058         struct xfs_bmalloca     *bma)
4059 {
4060         struct xfs_mount        *mp = bma->ip->i_mount;
4061         int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4062         int                     error;
4063
4064         /*
4065          * Set the data type being allocated. For the data fork, the first data
4066          * in the file is treated differently to all other allocations. For the
4067          * attribute fork, we only need to ensure the allocated range is not on
4068          * the busy list.
4069          */
4070         bma->datatype = XFS_ALLOC_NOBUSY;
4071         if (whichfork == XFS_DATA_FORK) {
4072                 bma->datatype |= XFS_ALLOC_USERDATA;
4073                 if (bma->offset == 0)
4074                         bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4075
4076                 if (mp->m_dalign && bma->length >= mp->m_dalign) {
4077                         error = xfs_bmap_isaeof(bma, whichfork);
4078                         if (error)
4079                                 return error;
4080                 }
4081
4082                 if (XFS_IS_REALTIME_INODE(bma->ip))
4083                         return xfs_bmap_rtalloc(bma);
4084         }
4085
4086         return xfs_bmap_btalloc(bma);
4087 }
4088
4089 static int
4090 xfs_bmapi_allocate(
4091         struct xfs_bmalloca     *bma)
4092 {
4093         struct xfs_mount        *mp = bma->ip->i_mount;
4094         int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4095         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4096         int                     tmp_logflags = 0;
4097         int                     error;
4098
4099         ASSERT(bma->length > 0);
4100
4101         /*
4102          * For the wasdelay case, we could also just allocate the stuff asked
4103          * for in this bmap call but that wouldn't be as good.
4104          */
4105         if (bma->wasdel) {
4106                 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4107                 bma->offset = bma->got.br_startoff;
4108                 if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4109                         bma->prev.br_startoff = NULLFILEOFF;
4110         } else {
4111                 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4112                 if (!bma->eof)
4113                         bma->length = XFS_FILBLKS_MIN(bma->length,
4114                                         bma->got.br_startoff - bma->offset);
4115         }
4116
4117         if (bma->flags & XFS_BMAPI_CONTIG)
4118                 bma->minlen = bma->length;
4119         else
4120                 bma->minlen = 1;
4121
4122         if (bma->flags & XFS_BMAPI_METADATA)
4123                 error = xfs_bmap_btalloc(bma);
4124         else
4125                 error = xfs_bmap_alloc_userdata(bma);
4126         if (error || bma->blkno == NULLFSBLOCK)
4127                 return error;
4128
4129         if (bma->flags & XFS_BMAPI_ZERO) {
4130                 error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4131                 if (error)
4132                         return error;
4133         }
4134
4135         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4136                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4137         /*
4138          * Bump the number of extents we've allocated
4139          * in this call.
4140          */
4141         bma->nallocs++;
4142
4143         if (bma->cur)
4144                 bma->cur->bc_ino.flags =
4145                         bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4146
4147         bma->got.br_startoff = bma->offset;
4148         bma->got.br_startblock = bma->blkno;
4149         bma->got.br_blockcount = bma->length;
4150         bma->got.br_state = XFS_EXT_NORM;
4151
4152         if (bma->flags & XFS_BMAPI_PREALLOC)
4153                 bma->got.br_state = XFS_EXT_UNWRITTEN;
4154
4155         if (bma->wasdel)
4156                 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4157         else
4158                 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4159                                 whichfork, &bma->icur, &bma->cur, &bma->got,
4160                                 &bma->logflags, bma->flags);
4161
4162         bma->logflags |= tmp_logflags;
4163         if (error)
4164                 return error;
4165
4166         /*
4167          * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4168          * or xfs_bmap_add_extent_hole_real might have merged it into one of
4169          * the neighbouring ones.
4170          */
4171         xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4172
4173         ASSERT(bma->got.br_startoff <= bma->offset);
4174         ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4175                bma->offset + bma->length);
4176         ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4177                bma->got.br_state == XFS_EXT_UNWRITTEN);
4178         return 0;
4179 }
4180
4181 STATIC int
4182 xfs_bmapi_convert_unwritten(
4183         struct xfs_bmalloca     *bma,
4184         struct xfs_bmbt_irec    *mval,
4185         xfs_filblks_t           len,
4186         int                     flags)
4187 {
4188         int                     whichfork = xfs_bmapi_whichfork(flags);
4189         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4190         int                     tmp_logflags = 0;
4191         int                     error;
4192
4193         /* check if we need to do unwritten->real conversion */
4194         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4195             (flags & XFS_BMAPI_PREALLOC))
4196                 return 0;
4197
4198         /* check if we need to do real->unwritten conversion */
4199         if (mval->br_state == XFS_EXT_NORM &&
4200             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4201                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4202                 return 0;
4203
4204         /*
4205          * Modify (by adding) the state flag, if writing.
4206          */
4207         ASSERT(mval->br_blockcount <= len);
4208         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4209                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4210                                         bma->ip, whichfork);
4211         }
4212         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4213                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4214
4215         /*
4216          * Before insertion into the bmbt, zero the range being converted
4217          * if required.
4218          */
4219         if (flags & XFS_BMAPI_ZERO) {
4220                 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4221                                         mval->br_blockcount);
4222                 if (error)
4223                         return error;
4224         }
4225
4226         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4227                         &bma->icur, &bma->cur, mval, &tmp_logflags);
4228         /*
4229          * Log the inode core unconditionally in the unwritten extent conversion
4230          * path because the conversion might not have done so (e.g., if the
4231          * extent count hasn't changed). We need to make sure the inode is dirty
4232          * in the transaction for the sake of fsync(), even if nothing has
4233          * changed, because fsync() will not force the log for this transaction
4234          * unless it sees the inode pinned.
4235          *
4236          * Note: If we're only converting cow fork extents, there aren't
4237          * any on-disk updates to make, so we don't need to log anything.
4238          */
4239         if (whichfork != XFS_COW_FORK)
4240                 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4241         if (error)
4242                 return error;
4243
4244         /*
4245          * Update our extent pointer, given that
4246          * xfs_bmap_add_extent_unwritten_real might have merged it into one
4247          * of the neighbouring ones.
4248          */
4249         xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4250
4251         /*
4252          * We may have combined previously unwritten space with written space,
4253          * so generate another request.
4254          */
4255         if (mval->br_blockcount < len)
4256                 return -EAGAIN;
4257         return 0;
4258 }
4259
4260 static inline xfs_extlen_t
4261 xfs_bmapi_minleft(
4262         struct xfs_trans        *tp,
4263         struct xfs_inode        *ip,
4264         int                     fork)
4265 {
4266         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, fork);
4267
4268         if (tp && tp->t_firstblock != NULLFSBLOCK)
4269                 return 0;
4270         if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4271                 return 1;
4272         return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4273 }
4274
4275 /*
4276  * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4277  * a case where the data is changed, there's an error, and it's not logged so we
4278  * don't shutdown when we should.  Don't bother logging extents/btree changes if
4279  * we converted to the other format.
4280  */
4281 static void
4282 xfs_bmapi_finish(
4283         struct xfs_bmalloca     *bma,
4284         int                     whichfork,
4285         int                     error)
4286 {
4287         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4288
4289         if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4290             ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4291                 bma->logflags &= ~xfs_ilog_fext(whichfork);
4292         else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4293                  ifp->if_format != XFS_DINODE_FMT_BTREE)
4294                 bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4295
4296         if (bma->logflags)
4297                 xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4298         if (bma->cur)
4299                 xfs_btree_del_cursor(bma->cur, error);
4300 }
4301
4302 /*
4303  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4304  * extent state if necessary.  Details behaviour is controlled by the flags
4305  * parameter.  Only allocates blocks from a single allocation group, to avoid
4306  * locking problems.
4307  */
4308 int
4309 xfs_bmapi_write(
4310         struct xfs_trans        *tp,            /* transaction pointer */
4311         struct xfs_inode        *ip,            /* incore inode */
4312         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4313         xfs_filblks_t           len,            /* length to map in file */
4314         int                     flags,          /* XFS_BMAPI_... */
4315         xfs_extlen_t            total,          /* total blocks needed */
4316         struct xfs_bmbt_irec    *mval,          /* output: map values */
4317         int                     *nmap)          /* i/o: mval size/count */
4318 {
4319         struct xfs_bmalloca     bma = {
4320                 .tp             = tp,
4321                 .ip             = ip,
4322                 .total          = total,
4323         };
4324         struct xfs_mount        *mp = ip->i_mount;
4325         int                     whichfork = xfs_bmapi_whichfork(flags);
4326         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4327         xfs_fileoff_t           end;            /* end of mapped file region */
4328         bool                    eof = false;    /* after the end of extents */
4329         int                     error;          /* error return */
4330         int                     n;              /* current extent index */
4331         xfs_fileoff_t           obno;           /* old block number (offset) */
4332
4333 #ifdef DEBUG
4334         xfs_fileoff_t           orig_bno;       /* original block number value */
4335         int                     orig_flags;     /* original flags arg value */
4336         xfs_filblks_t           orig_len;       /* original value of len arg */
4337         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4338         int                     orig_nmap;      /* original value of *nmap */
4339
4340         orig_bno = bno;
4341         orig_len = len;
4342         orig_flags = flags;
4343         orig_mval = mval;
4344         orig_nmap = *nmap;
4345 #endif
4346
4347         ASSERT(*nmap >= 1);
4348         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4349         ASSERT(tp != NULL);
4350         ASSERT(len > 0);
4351         ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4352         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4353         ASSERT(!(flags & XFS_BMAPI_REMAP));
4354
4355         /* zeroing is for currently only for data extents, not metadata */
4356         ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4357                         (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4358         /*
4359          * we can allocate unwritten extents or pre-zero allocated blocks,
4360          * but it makes no sense to do both at once. This would result in
4361          * zeroing the unwritten extent twice, but it still being an
4362          * unwritten extent....
4363          */
4364         ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4365                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4366
4367         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4368             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4369                 return -EFSCORRUPTED;
4370         }
4371
4372         if (XFS_FORCED_SHUTDOWN(mp))
4373                 return -EIO;
4374
4375         XFS_STATS_INC(mp, xs_blk_mapw);
4376
4377         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4378                 error = xfs_iread_extents(tp, ip, whichfork);
4379                 if (error)
4380                         goto error0;
4381         }
4382
4383         if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4384                 eof = true;
4385         if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4386                 bma.prev.br_startoff = NULLFILEOFF;
4387         bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4388
4389         n = 0;
4390         end = bno + len;
4391         obno = bno;
4392         while (bno < end && n < *nmap) {
4393                 bool                    need_alloc = false, wasdelay = false;
4394
4395                 /* in hole or beyond EOF? */
4396                 if (eof || bma.got.br_startoff > bno) {
4397                         /*
4398                          * CoW fork conversions should /never/ hit EOF or
4399                          * holes.  There should always be something for us
4400                          * to work on.
4401                          */
4402                         ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4403                                  (flags & XFS_BMAPI_COWFORK)));
4404
4405                         need_alloc = true;
4406                 } else if (isnullstartblock(bma.got.br_startblock)) {
4407                         wasdelay = true;
4408                 }
4409
4410                 /*
4411                  * First, deal with the hole before the allocated space
4412                  * that we found, if any.
4413                  */
4414                 if (need_alloc || wasdelay) {
4415                         bma.eof = eof;
4416                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4417                         bma.wasdel = wasdelay;
4418                         bma.offset = bno;
4419                         bma.flags = flags;
4420
4421                         /*
4422                          * There's a 32/64 bit type mismatch between the
4423                          * allocation length request (which can be 64 bits in
4424                          * length) and the bma length request, which is
4425                          * xfs_extlen_t and therefore 32 bits. Hence we have to
4426                          * check for 32-bit overflows and handle them here.
4427                          */
4428                         if (len > (xfs_filblks_t)MAXEXTLEN)
4429                                 bma.length = MAXEXTLEN;
4430                         else
4431                                 bma.length = len;
4432
4433                         ASSERT(len > 0);
4434                         ASSERT(bma.length > 0);
4435                         error = xfs_bmapi_allocate(&bma);
4436                         if (error)
4437                                 goto error0;
4438                         if (bma.blkno == NULLFSBLOCK)
4439                                 break;
4440
4441                         /*
4442                          * If this is a CoW allocation, record the data in
4443                          * the refcount btree for orphan recovery.
4444                          */
4445                         if (whichfork == XFS_COW_FORK)
4446                                 xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4447                                                 bma.length);
4448                 }
4449
4450                 /* Deal with the allocated space we found.  */
4451                 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4452                                                         end, n, flags);
4453
4454                 /* Execute unwritten extent conversion if necessary */
4455                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4456                 if (error == -EAGAIN)
4457                         continue;
4458                 if (error)
4459                         goto error0;
4460
4461                 /* update the extent map to return */
4462                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4463
4464                 /*
4465                  * If we're done, stop now.  Stop when we've allocated
4466                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4467                  * the transaction may get too big.
4468                  */
4469                 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4470                         break;
4471
4472                 /* Else go on to the next record. */
4473                 bma.prev = bma.got;
4474                 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4475                         eof = true;
4476         }
4477         *nmap = n;
4478
4479         error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4480                         whichfork);
4481         if (error)
4482                 goto error0;
4483
4484         ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4485                ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4486         xfs_bmapi_finish(&bma, whichfork, 0);
4487         xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4488                 orig_nmap, *nmap);
4489         return 0;
4490 error0:
4491         xfs_bmapi_finish(&bma, whichfork, error);
4492         return error;
4493 }
4494
4495 /*
4496  * Convert an existing delalloc extent to real blocks based on file offset. This
4497  * attempts to allocate the entire delalloc extent and may require multiple
4498  * invocations to allocate the target offset if a large enough physical extent
4499  * is not available.
4500  */
4501 int
4502 xfs_bmapi_convert_delalloc(
4503         struct xfs_inode        *ip,
4504         int                     whichfork,
4505         xfs_off_t               offset,
4506         struct iomap            *iomap,
4507         unsigned int            *seq)
4508 {
4509         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4510         struct xfs_mount        *mp = ip->i_mount;
4511         xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
4512         struct xfs_bmalloca     bma = { NULL };
4513         uint16_t                flags = 0;
4514         struct xfs_trans        *tp;
4515         int                     error;
4516
4517         if (whichfork == XFS_COW_FORK)
4518                 flags |= IOMAP_F_SHARED;
4519
4520         /*
4521          * Space for the extent and indirect blocks was reserved when the
4522          * delalloc extent was created so there's no need to do so here.
4523          */
4524         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4525                                 XFS_TRANS_RESERVE, &tp);
4526         if (error)
4527                 return error;
4528
4529         xfs_ilock(ip, XFS_ILOCK_EXCL);
4530         xfs_trans_ijoin(tp, ip, 0);
4531
4532         if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4533             bma.got.br_startoff > offset_fsb) {
4534                 /*
4535                  * No extent found in the range we are trying to convert.  This
4536                  * should only happen for the COW fork, where another thread
4537                  * might have moved the extent to the data fork in the meantime.
4538                  */
4539                 WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4540                 error = -EAGAIN;
4541                 goto out_trans_cancel;
4542         }
4543
4544         /*
4545          * If we find a real extent here we raced with another thread converting
4546          * the extent.  Just return the real extent at this offset.
4547          */
4548         if (!isnullstartblock(bma.got.br_startblock)) {
4549                 xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4550                 *seq = READ_ONCE(ifp->if_seq);
4551                 goto out_trans_cancel;
4552         }
4553
4554         bma.tp = tp;
4555         bma.ip = ip;
4556         bma.wasdel = true;
4557         bma.offset = bma.got.br_startoff;
4558         bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
4559         bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4560
4561         /*
4562          * When we're converting the delalloc reservations backing dirty pages
4563          * in the page cache, we must be careful about how we create the new
4564          * extents:
4565          *
4566          * New CoW fork extents are created unwritten, turned into real extents
4567          * when we're about to write the data to disk, and mapped into the data
4568          * fork after the write finishes.  End of story.
4569          *
4570          * New data fork extents must be mapped in as unwritten and converted
4571          * to real extents after the write succeeds to avoid exposing stale
4572          * disk contents if we crash.
4573          */
4574         bma.flags = XFS_BMAPI_PREALLOC;
4575         if (whichfork == XFS_COW_FORK)
4576                 bma.flags |= XFS_BMAPI_COWFORK;
4577
4578         if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4579                 bma.prev.br_startoff = NULLFILEOFF;
4580
4581         error = xfs_bmapi_allocate(&bma);
4582         if (error)
4583                 goto out_finish;
4584
4585         error = -ENOSPC;
4586         if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4587                 goto out_finish;
4588         error = -EFSCORRUPTED;
4589         if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4590                 goto out_finish;
4591
4592         XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4593         XFS_STATS_INC(mp, xs_xstrat_quick);
4594
4595         ASSERT(!isnullstartblock(bma.got.br_startblock));
4596         xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4597         *seq = READ_ONCE(ifp->if_seq);
4598
4599         if (whichfork == XFS_COW_FORK)
4600                 xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4601
4602         error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603                         whichfork);
4604         if (error)
4605                 goto out_finish;
4606
4607         xfs_bmapi_finish(&bma, whichfork, 0);
4608         error = xfs_trans_commit(tp);
4609         xfs_iunlock(ip, XFS_ILOCK_EXCL);
4610         return error;
4611
4612 out_finish:
4613         xfs_bmapi_finish(&bma, whichfork, error);
4614 out_trans_cancel:
4615         xfs_trans_cancel(tp);
4616         xfs_iunlock(ip, XFS_ILOCK_EXCL);
4617         return error;
4618 }
4619
4620 int
4621 xfs_bmapi_remap(
4622         struct xfs_trans        *tp,
4623         struct xfs_inode        *ip,
4624         xfs_fileoff_t           bno,
4625         xfs_filblks_t           len,
4626         xfs_fsblock_t           startblock,
4627         int                     flags)
4628 {
4629         struct xfs_mount        *mp = ip->i_mount;
4630         struct xfs_ifork        *ifp;
4631         struct xfs_btree_cur    *cur = NULL;
4632         struct xfs_bmbt_irec    got;
4633         struct xfs_iext_cursor  icur;
4634         int                     whichfork = xfs_bmapi_whichfork(flags);
4635         int                     logflags = 0, error;
4636
4637         ifp = XFS_IFORK_PTR(ip, whichfork);
4638         ASSERT(len > 0);
4639         ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4640         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4641         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4642                            XFS_BMAPI_NORMAP)));
4643         ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4644                         (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4645
4646         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4647             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4648                 return -EFSCORRUPTED;
4649         }
4650
4651         if (XFS_FORCED_SHUTDOWN(mp))
4652                 return -EIO;
4653
4654         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4655                 error = xfs_iread_extents(tp, ip, whichfork);
4656                 if (error)
4657                         return error;
4658         }
4659
4660         if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4661                 /* make sure we only reflink into a hole. */
4662                 ASSERT(got.br_startoff > bno);
4663                 ASSERT(got.br_startoff - bno >= len);
4664         }
4665
4666         ip->i_d.di_nblocks += len;
4667         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4668
4669         if (ifp->if_flags & XFS_IFBROOT) {
4670                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4671                 cur->bc_ino.flags = 0;
4672         }
4673
4674         got.br_startoff = bno;
4675         got.br_startblock = startblock;
4676         got.br_blockcount = len;
4677         if (flags & XFS_BMAPI_PREALLOC)
4678                 got.br_state = XFS_EXT_UNWRITTEN;
4679         else
4680                 got.br_state = XFS_EXT_NORM;
4681
4682         error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4683                         &cur, &got, &logflags, flags);
4684         if (error)
4685                 goto error0;
4686
4687         error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4688
4689 error0:
4690         if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4691                 logflags &= ~XFS_ILOG_DEXT;
4692         else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4693                 logflags &= ~XFS_ILOG_DBROOT;
4694
4695         if (logflags)
4696                 xfs_trans_log_inode(tp, ip, logflags);
4697         if (cur)
4698                 xfs_btree_del_cursor(cur, error);
4699         return error;
4700 }
4701
4702 /*
4703  * When a delalloc extent is split (e.g., due to a hole punch), the original
4704  * indlen reservation must be shared across the two new extents that are left
4705  * behind.
4706  *
4707  * Given the original reservation and the worst case indlen for the two new
4708  * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4709  * reservation fairly across the two new extents. If necessary, steal available
4710  * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4711  * ores == 1). The number of stolen blocks is returned. The availability and
4712  * subsequent accounting of stolen blocks is the responsibility of the caller.
4713  */
4714 static xfs_filblks_t
4715 xfs_bmap_split_indlen(
4716         xfs_filblks_t                   ores,           /* original res. */
4717         xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4718         xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
4719         xfs_filblks_t                   avail)          /* stealable blocks */
4720 {
4721         xfs_filblks_t                   len1 = *indlen1;
4722         xfs_filblks_t                   len2 = *indlen2;
4723         xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4724         xfs_filblks_t                   stolen = 0;
4725         xfs_filblks_t                   resfactor;
4726
4727         /*
4728          * Steal as many blocks as we can to try and satisfy the worst case
4729          * indlen for both new extents.
4730          */
4731         if (ores < nres && avail)
4732                 stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4733         ores += stolen;
4734
4735          /* nothing else to do if we've satisfied the new reservation */
4736         if (ores >= nres)
4737                 return stolen;
4738
4739         /*
4740          * We can't meet the total required reservation for the two extents.
4741          * Calculate the percent of the overall shortage between both extents
4742          * and apply this percentage to each of the requested indlen values.
4743          * This distributes the shortage fairly and reduces the chances that one
4744          * of the two extents is left with nothing when extents are repeatedly
4745          * split.
4746          */
4747         resfactor = (ores * 100);
4748         do_div(resfactor, nres);
4749         len1 *= resfactor;
4750         do_div(len1, 100);
4751         len2 *= resfactor;
4752         do_div(len2, 100);
4753         ASSERT(len1 + len2 <= ores);
4754         ASSERT(len1 < *indlen1 && len2 < *indlen2);
4755
4756         /*
4757          * Hand out the remainder to each extent. If one of the two reservations
4758          * is zero, we want to make sure that one gets a block first. The loop
4759          * below starts with len1, so hand len2 a block right off the bat if it
4760          * is zero.
4761          */
4762         ores -= (len1 + len2);
4763         ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4764         if (ores && !len2 && *indlen2) {
4765                 len2++;
4766                 ores--;
4767         }
4768         while (ores) {
4769                 if (len1 < *indlen1) {
4770                         len1++;
4771                         ores--;
4772                 }
4773                 if (!ores)
4774                         break;
4775                 if (len2 < *indlen2) {
4776                         len2++;
4777                         ores--;
4778                 }
4779         }
4780
4781         *indlen1 = len1;
4782         *indlen2 = len2;
4783
4784         return stolen;
4785 }
4786
4787 int
4788 xfs_bmap_del_extent_delay(
4789         struct xfs_inode        *ip,
4790         int                     whichfork,
4791         struct xfs_iext_cursor  *icur,
4792         struct xfs_bmbt_irec    *got,
4793         struct xfs_bmbt_irec    *del)
4794 {
4795         struct xfs_mount        *mp = ip->i_mount;
4796         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4797         struct xfs_bmbt_irec    new;
4798         int64_t                 da_old, da_new, da_diff = 0;
4799         xfs_fileoff_t           del_endoff, got_endoff;
4800         xfs_filblks_t           got_indlen, new_indlen, stolen;
4801         int                     state = xfs_bmap_fork_to_state(whichfork);
4802         int                     error = 0;
4803         bool                    isrt;
4804
4805         XFS_STATS_INC(mp, xs_del_exlist);
4806
4807         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4808         del_endoff = del->br_startoff + del->br_blockcount;
4809         got_endoff = got->br_startoff + got->br_blockcount;
4810         da_old = startblockval(got->br_startblock);
4811         da_new = 0;
4812
4813         ASSERT(del->br_blockcount > 0);
4814         ASSERT(got->br_startoff <= del->br_startoff);
4815         ASSERT(got_endoff >= del_endoff);
4816
4817         if (isrt) {
4818                 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4819
4820                 do_div(rtexts, mp->m_sb.sb_rextsize);
4821                 xfs_mod_frextents(mp, rtexts);
4822         }
4823
4824         /*
4825          * Update the inode delalloc counter now and wait to update the
4826          * sb counters as we might have to borrow some blocks for the
4827          * indirect block accounting.
4828          */
4829         error = xfs_trans_reserve_quota_nblks(NULL, ip,
4830                         -((long)del->br_blockcount), 0,
4831                         isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4832         if (error)
4833                 return error;
4834         ip->i_delayed_blks -= del->br_blockcount;
4835
4836         if (got->br_startoff == del->br_startoff)
4837                 state |= BMAP_LEFT_FILLING;
4838         if (got_endoff == del_endoff)
4839                 state |= BMAP_RIGHT_FILLING;
4840
4841         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4842         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4843                 /*
4844                  * Matches the whole extent.  Delete the entry.
4845                  */
4846                 xfs_iext_remove(ip, icur, state);
4847                 xfs_iext_prev(ifp, icur);
4848                 break;
4849         case BMAP_LEFT_FILLING:
4850                 /*
4851                  * Deleting the first part of the extent.
4852                  */
4853                 got->br_startoff = del_endoff;
4854                 got->br_blockcount -= del->br_blockcount;
4855                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4856                                 got->br_blockcount), da_old);
4857                 got->br_startblock = nullstartblock((int)da_new);
4858                 xfs_iext_update_extent(ip, state, icur, got);
4859                 break;
4860         case BMAP_RIGHT_FILLING:
4861                 /*
4862                  * Deleting the last part of the extent.
4863                  */
4864                 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4865                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4866                                 got->br_blockcount), da_old);
4867                 got->br_startblock = nullstartblock((int)da_new);
4868                 xfs_iext_update_extent(ip, state, icur, got);
4869                 break;
4870         case 0:
4871                 /*
4872                  * Deleting the middle of the extent.
4873                  *
4874                  * Distribute the original indlen reservation across the two new
4875                  * extents.  Steal blocks from the deleted extent if necessary.
4876                  * Stealing blocks simply fudges the fdblocks accounting below.
4877                  * Warn if either of the new indlen reservations is zero as this
4878                  * can lead to delalloc problems.
4879                  */
4880                 got->br_blockcount = del->br_startoff - got->br_startoff;
4881                 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4882
4883                 new.br_blockcount = got_endoff - del_endoff;
4884                 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4885
4886                 WARN_ON_ONCE(!got_indlen || !new_indlen);
4887                 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4888                                                        del->br_blockcount);
4889
4890                 got->br_startblock = nullstartblock((int)got_indlen);
4891
4892                 new.br_startoff = del_endoff;
4893                 new.br_state = got->br_state;
4894                 new.br_startblock = nullstartblock((int)new_indlen);
4895
4896                 xfs_iext_update_extent(ip, state, icur, got);
4897                 xfs_iext_next(ifp, icur);
4898                 xfs_iext_insert(ip, icur, &new, state);
4899
4900                 da_new = got_indlen + new_indlen - stolen;
4901                 del->br_blockcount -= stolen;
4902                 break;
4903         }
4904
4905         ASSERT(da_old >= da_new);
4906         da_diff = da_old - da_new;
4907         if (!isrt)
4908                 da_diff += del->br_blockcount;
4909         if (da_diff) {
4910                 xfs_mod_fdblocks(mp, da_diff, false);
4911                 xfs_mod_delalloc(mp, -da_diff);
4912         }
4913         return error;
4914 }
4915
4916 void
4917 xfs_bmap_del_extent_cow(
4918         struct xfs_inode        *ip,
4919         struct xfs_iext_cursor  *icur,
4920         struct xfs_bmbt_irec    *got,
4921         struct xfs_bmbt_irec    *del)
4922 {
4923         struct xfs_mount        *mp = ip->i_mount;
4924         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4925         struct xfs_bmbt_irec    new;
4926         xfs_fileoff_t           del_endoff, got_endoff;
4927         int                     state = BMAP_COWFORK;
4928
4929         XFS_STATS_INC(mp, xs_del_exlist);
4930
4931         del_endoff = del->br_startoff + del->br_blockcount;
4932         got_endoff = got->br_startoff + got->br_blockcount;
4933
4934         ASSERT(del->br_blockcount > 0);
4935         ASSERT(got->br_startoff <= del->br_startoff);
4936         ASSERT(got_endoff >= del_endoff);
4937         ASSERT(!isnullstartblock(got->br_startblock));
4938
4939         if (got->br_startoff == del->br_startoff)
4940                 state |= BMAP_LEFT_FILLING;
4941         if (got_endoff == del_endoff)
4942                 state |= BMAP_RIGHT_FILLING;
4943
4944         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4945         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4946                 /*
4947                  * Matches the whole extent.  Delete the entry.
4948                  */
4949                 xfs_iext_remove(ip, icur, state);
4950                 xfs_iext_prev(ifp, icur);
4951                 break;
4952         case BMAP_LEFT_FILLING:
4953                 /*
4954                  * Deleting the first part of the extent.
4955                  */
4956                 got->br_startoff = del_endoff;
4957                 got->br_blockcount -= del->br_blockcount;
4958                 got->br_startblock = del->br_startblock + del->br_blockcount;
4959                 xfs_iext_update_extent(ip, state, icur, got);
4960                 break;
4961         case BMAP_RIGHT_FILLING:
4962                 /*
4963                  * Deleting the last part of the extent.
4964                  */
4965                 got->br_blockcount -= del->br_blockcount;
4966                 xfs_iext_update_extent(ip, state, icur, got);
4967                 break;
4968         case 0:
4969                 /*
4970                  * Deleting the middle of the extent.
4971                  */
4972                 got->br_blockcount = del->br_startoff - got->br_startoff;
4973
4974                 new.br_startoff = del_endoff;
4975                 new.br_blockcount = got_endoff - del_endoff;
4976                 new.br_state = got->br_state;
4977                 new.br_startblock = del->br_startblock + del->br_blockcount;
4978
4979                 xfs_iext_update_extent(ip, state, icur, got);
4980                 xfs_iext_next(ifp, icur);
4981                 xfs_iext_insert(ip, icur, &new, state);
4982                 break;
4983         }
4984         ip->i_delayed_blks -= del->br_blockcount;
4985 }
4986
4987 /*
4988  * Called by xfs_bmapi to update file extent records and the btree
4989  * after removing space.
4990  */
4991 STATIC int                              /* error */
4992 xfs_bmap_del_extent_real(
4993         xfs_inode_t             *ip,    /* incore inode pointer */
4994         xfs_trans_t             *tp,    /* current transaction pointer */
4995         struct xfs_iext_cursor  *icur,
4996         xfs_btree_cur_t         *cur,   /* if null, not a btree */
4997         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4998         int                     *logflagsp, /* inode logging flags */
4999         int                     whichfork, /* data or attr fork */
5000         int                     bflags) /* bmapi flags */
5001 {
5002         xfs_fsblock_t           del_endblock=0; /* first block past del */
5003         xfs_fileoff_t           del_endoff;     /* first offset past del */
5004         int                     do_fx;  /* free extent at end of routine */
5005         int                     error;  /* error return value */
5006         int                     flags = 0;/* inode logging flags */
5007         struct xfs_bmbt_irec    got;    /* current extent entry */
5008         xfs_fileoff_t           got_endoff;     /* first offset past got */
5009         int                     i;      /* temp state */
5010         struct xfs_ifork        *ifp;   /* inode fork pointer */
5011         xfs_mount_t             *mp;    /* mount structure */
5012         xfs_filblks_t           nblks;  /* quota/sb block count */
5013         xfs_bmbt_irec_t         new;    /* new record to be inserted */
5014         /* REFERENCED */
5015         uint                    qfield; /* quota field to update */
5016         int                     state = xfs_bmap_fork_to_state(whichfork);
5017         struct xfs_bmbt_irec    old;
5018
5019         mp = ip->i_mount;
5020         XFS_STATS_INC(mp, xs_del_exlist);
5021
5022         ifp = XFS_IFORK_PTR(ip, whichfork);
5023         ASSERT(del->br_blockcount > 0);
5024         xfs_iext_get_extent(ifp, icur, &got);
5025         ASSERT(got.br_startoff <= del->br_startoff);
5026         del_endoff = del->br_startoff + del->br_blockcount;
5027         got_endoff = got.br_startoff + got.br_blockcount;
5028         ASSERT(got_endoff >= del_endoff);
5029         ASSERT(!isnullstartblock(got.br_startblock));
5030         qfield = 0;
5031         error = 0;
5032
5033         /*
5034          * If it's the case where the directory code is running with no block
5035          * reservation, and the deleted block is in the middle of its extent,
5036          * and the resulting insert of an extent would cause transformation to
5037          * btree format, then reject it.  The calling code will then swap blocks
5038          * around instead.  We have to do this now, rather than waiting for the
5039          * conversion to btree format, since the transaction will be dirty then.
5040          */
5041         if (tp->t_blk_res == 0 &&
5042             ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5043             ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5044             del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5045                 return -ENOSPC;
5046
5047         flags = XFS_ILOG_CORE;
5048         if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5049                 xfs_fsblock_t   bno;
5050                 xfs_filblks_t   len;
5051                 xfs_extlen_t    mod;
5052
5053                 bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
5054                                   &mod);
5055                 ASSERT(mod == 0);
5056                 len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5057                                   &mod);
5058                 ASSERT(mod == 0);
5059
5060                 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5061                 if (error)
5062                         goto done;
5063                 do_fx = 0;
5064                 nblks = len * mp->m_sb.sb_rextsize;
5065                 qfield = XFS_TRANS_DQ_RTBCOUNT;
5066         } else {
5067                 do_fx = 1;
5068                 nblks = del->br_blockcount;
5069                 qfield = XFS_TRANS_DQ_BCOUNT;
5070         }
5071
5072         del_endblock = del->br_startblock + del->br_blockcount;
5073         if (cur) {
5074                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5075                 if (error)
5076                         goto done;
5077                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5078                         error = -EFSCORRUPTED;
5079                         goto done;
5080                 }
5081         }
5082
5083         if (got.br_startoff == del->br_startoff)
5084                 state |= BMAP_LEFT_FILLING;
5085         if (got_endoff == del_endoff)
5086                 state |= BMAP_RIGHT_FILLING;
5087
5088         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5089         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5090                 /*
5091                  * Matches the whole extent.  Delete the entry.
5092                  */
5093                 xfs_iext_remove(ip, icur, state);
5094                 xfs_iext_prev(ifp, icur);
5095                 ifp->if_nextents--;
5096
5097                 flags |= XFS_ILOG_CORE;
5098                 if (!cur) {
5099                         flags |= xfs_ilog_fext(whichfork);
5100                         break;
5101                 }
5102                 if ((error = xfs_btree_delete(cur, &i)))
5103                         goto done;
5104                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5105                         error = -EFSCORRUPTED;
5106                         goto done;
5107                 }
5108                 break;
5109         case BMAP_LEFT_FILLING:
5110                 /*
5111                  * Deleting the first part of the extent.
5112                  */
5113                 got.br_startoff = del_endoff;
5114                 got.br_startblock = del_endblock;
5115                 got.br_blockcount -= del->br_blockcount;
5116                 xfs_iext_update_extent(ip, state, icur, &got);
5117                 if (!cur) {
5118                         flags |= xfs_ilog_fext(whichfork);
5119                         break;
5120                 }
5121                 error = xfs_bmbt_update(cur, &got);
5122                 if (error)
5123                         goto done;
5124                 break;
5125         case BMAP_RIGHT_FILLING:
5126                 /*
5127                  * Deleting the last part of the extent.
5128                  */
5129                 got.br_blockcount -= del->br_blockcount;
5130                 xfs_iext_update_extent(ip, state, icur, &got);
5131                 if (!cur) {
5132                         flags |= xfs_ilog_fext(whichfork);
5133                         break;
5134                 }
5135                 error = xfs_bmbt_update(cur, &got);
5136                 if (error)
5137                         goto done;
5138                 break;
5139         case 0:
5140                 /*
5141                  * Deleting the middle of the extent.
5142                  */
5143                 old = got;
5144
5145                 got.br_blockcount = del->br_startoff - got.br_startoff;
5146                 xfs_iext_update_extent(ip, state, icur, &got);
5147
5148                 new.br_startoff = del_endoff;
5149                 new.br_blockcount = got_endoff - del_endoff;
5150                 new.br_state = got.br_state;
5151                 new.br_startblock = del_endblock;
5152
5153                 flags |= XFS_ILOG_CORE;
5154                 if (cur) {
5155                         error = xfs_bmbt_update(cur, &got);
5156                         if (error)
5157                                 goto done;
5158                         error = xfs_btree_increment(cur, 0, &i);
5159                         if (error)
5160                                 goto done;
5161                         cur->bc_rec.b = new;
5162                         error = xfs_btree_insert(cur, &i);
5163                         if (error && error != -ENOSPC)
5164                                 goto done;
5165                         /*
5166                          * If get no-space back from btree insert, it tried a
5167                          * split, and we have a zero block reservation.  Fix up
5168                          * our state and return the error.
5169                          */
5170                         if (error == -ENOSPC) {
5171                                 /*
5172                                  * Reset the cursor, don't trust it after any
5173                                  * insert operation.
5174                                  */
5175                                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5176                                 if (error)
5177                                         goto done;
5178                                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5179                                         error = -EFSCORRUPTED;
5180                                         goto done;
5181                                 }
5182                                 /*
5183                                  * Update the btree record back
5184                                  * to the original value.
5185                                  */
5186                                 error = xfs_bmbt_update(cur, &old);
5187                                 if (error)
5188                                         goto done;
5189                                 /*
5190                                  * Reset the extent record back
5191                                  * to the original value.
5192                                  */
5193                                 xfs_iext_update_extent(ip, state, icur, &old);
5194                                 flags = 0;
5195                                 error = -ENOSPC;
5196                                 goto done;
5197                         }
5198                         if (XFS_IS_CORRUPT(mp, i != 1)) {
5199                                 error = -EFSCORRUPTED;
5200                                 goto done;
5201                         }
5202                 } else
5203                         flags |= xfs_ilog_fext(whichfork);
5204
5205                 ifp->if_nextents++;
5206                 xfs_iext_next(ifp, icur);
5207                 xfs_iext_insert(ip, icur, &new, state);
5208                 break;
5209         }
5210
5211         /* remove reverse mapping */
5212         xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5213
5214         /*
5215          * If we need to, add to list of extents to delete.
5216          */
5217         if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5218                 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5219                         xfs_refcount_decrease_extent(tp, del);
5220                 } else {
5221                         __xfs_bmap_add_free(tp, del->br_startblock,
5222                                         del->br_blockcount, NULL,
5223                                         (bflags & XFS_BMAPI_NODISCARD) ||
5224                                         del->br_state == XFS_EXT_UNWRITTEN);
5225                 }
5226         }
5227
5228         /*
5229          * Adjust inode # blocks in the file.
5230          */
5231         if (nblks)
5232                 ip->i_d.di_nblocks -= nblks;
5233         /*
5234          * Adjust quota data.
5235          */
5236         if (qfield && !(bflags & XFS_BMAPI_REMAP))
5237                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5238
5239 done:
5240         *logflagsp = flags;
5241         return error;
5242 }
5243
5244 /*
5245  * Unmap (remove) blocks from a file.
5246  * If nexts is nonzero then the number of extents to remove is limited to
5247  * that value.  If not all extents in the block range can be removed then
5248  * *done is set.
5249  */
5250 int                                             /* error */
5251 __xfs_bunmapi(
5252         struct xfs_trans        *tp,            /* transaction pointer */
5253         struct xfs_inode        *ip,            /* incore inode */
5254         xfs_fileoff_t           start,          /* first file offset deleted */
5255         xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5256         int                     flags,          /* misc flags */
5257         xfs_extnum_t            nexts)          /* number of extents max */
5258 {
5259         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
5260         struct xfs_bmbt_irec    del;            /* extent being deleted */
5261         int                     error;          /* error return value */
5262         xfs_extnum_t            extno;          /* extent number in list */
5263         struct xfs_bmbt_irec    got;            /* current extent record */
5264         struct xfs_ifork        *ifp;           /* inode fork pointer */
5265         int                     isrt;           /* freeing in rt area */
5266         int                     logflags;       /* transaction logging flags */
5267         xfs_extlen_t            mod;            /* rt extent offset */
5268         struct xfs_mount        *mp = ip->i_mount;
5269         int                     tmp_logflags;   /* partial logging flags */
5270         int                     wasdel;         /* was a delayed alloc extent */
5271         int                     whichfork;      /* data or attribute fork */
5272         xfs_fsblock_t           sum;
5273         xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5274         xfs_fileoff_t           max_len;
5275         xfs_agnumber_t          prev_agno = NULLAGNUMBER, agno;
5276         xfs_fileoff_t           end;
5277         struct xfs_iext_cursor  icur;
5278         bool                    done = false;
5279
5280         trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5281
5282         whichfork = xfs_bmapi_whichfork(flags);
5283         ASSERT(whichfork != XFS_COW_FORK);
5284         ifp = XFS_IFORK_PTR(ip, whichfork);
5285         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5286                 return -EFSCORRUPTED;
5287         if (XFS_FORCED_SHUTDOWN(mp))
5288                 return -EIO;
5289
5290         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5291         ASSERT(len > 0);
5292         ASSERT(nexts >= 0);
5293
5294         /*
5295          * Guesstimate how many blocks we can unmap without running the risk of
5296          * blowing out the transaction with a mix of EFIs and reflink
5297          * adjustments.
5298          */
5299         if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5300                 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5301         else
5302                 max_len = len;
5303
5304         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5305             (error = xfs_iread_extents(tp, ip, whichfork)))
5306                 return error;
5307         if (xfs_iext_count(ifp) == 0) {
5308                 *rlen = 0;
5309                 return 0;
5310         }
5311         XFS_STATS_INC(mp, xs_blk_unmap);
5312         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5313         end = start + len;
5314
5315         if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5316                 *rlen = 0;
5317                 return 0;
5318         }
5319         end--;
5320
5321         logflags = 0;
5322         if (ifp->if_flags & XFS_IFBROOT) {
5323                 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5324                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5325                 cur->bc_ino.flags = 0;
5326         } else
5327                 cur = NULL;
5328
5329         if (isrt) {
5330                 /*
5331                  * Synchronize by locking the bitmap inode.
5332                  */
5333                 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5334                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5335                 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5336                 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5337         }
5338
5339         extno = 0;
5340         while (end != (xfs_fileoff_t)-1 && end >= start &&
5341                (nexts == 0 || extno < nexts) && max_len > 0) {
5342                 /*
5343                  * Is the found extent after a hole in which end lives?
5344                  * Just back up to the previous extent, if so.
5345                  */
5346                 if (got.br_startoff > end &&
5347                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5348                         done = true;
5349                         break;
5350                 }
5351                 /*
5352                  * Is the last block of this extent before the range
5353                  * we're supposed to delete?  If so, we're done.
5354                  */
5355                 end = XFS_FILEOFF_MIN(end,
5356                         got.br_startoff + got.br_blockcount - 1);
5357                 if (end < start)
5358                         break;
5359                 /*
5360                  * Then deal with the (possibly delayed) allocated space
5361                  * we found.
5362                  */
5363                 del = got;
5364                 wasdel = isnullstartblock(del.br_startblock);
5365
5366                 /*
5367                  * Make sure we don't touch multiple AGF headers out of order
5368                  * in a single transaction, as that could cause AB-BA deadlocks.
5369                  */
5370                 if (!wasdel && !isrt) {
5371                         agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5372                         if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5373                                 break;
5374                         prev_agno = agno;
5375                 }
5376                 if (got.br_startoff < start) {
5377                         del.br_startoff = start;
5378                         del.br_blockcount -= start - got.br_startoff;
5379                         if (!wasdel)
5380                                 del.br_startblock += start - got.br_startoff;
5381                 }
5382                 if (del.br_startoff + del.br_blockcount > end + 1)
5383                         del.br_blockcount = end + 1 - del.br_startoff;
5384
5385                 /* How much can we safely unmap? */
5386                 if (max_len < del.br_blockcount) {
5387                         del.br_startoff += del.br_blockcount - max_len;
5388                         if (!wasdel)
5389                                 del.br_startblock += del.br_blockcount - max_len;
5390                         del.br_blockcount = max_len;
5391                 }
5392
5393                 if (!isrt)
5394                         goto delete;
5395
5396                 sum = del.br_startblock + del.br_blockcount;
5397                 div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5398                 if (mod) {
5399                         /*
5400                          * Realtime extent not lined up at the end.
5401                          * The extent could have been split into written
5402                          * and unwritten pieces, or we could just be
5403                          * unmapping part of it.  But we can't really
5404                          * get rid of part of a realtime extent.
5405                          */
5406                         if (del.br_state == XFS_EXT_UNWRITTEN) {
5407                                 /*
5408                                  * This piece is unwritten, or we're not
5409                                  * using unwritten extents.  Skip over it.
5410                                  */
5411                                 ASSERT(end >= mod);
5412                                 end -= mod > del.br_blockcount ?
5413                                         del.br_blockcount : mod;
5414                                 if (end < got.br_startoff &&
5415                                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5416                                         done = true;
5417                                         break;
5418                                 }
5419                                 continue;
5420                         }
5421                         /*
5422                          * It's written, turn it unwritten.
5423                          * This is better than zeroing it.
5424                          */
5425                         ASSERT(del.br_state == XFS_EXT_NORM);
5426                         ASSERT(tp->t_blk_res > 0);
5427                         /*
5428                          * If this spans a realtime extent boundary,
5429                          * chop it back to the start of the one we end at.
5430                          */
5431                         if (del.br_blockcount > mod) {
5432                                 del.br_startoff += del.br_blockcount - mod;
5433                                 del.br_startblock += del.br_blockcount - mod;
5434                                 del.br_blockcount = mod;
5435                         }
5436                         del.br_state = XFS_EXT_UNWRITTEN;
5437                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5438                                         whichfork, &icur, &cur, &del,
5439                                         &logflags);
5440                         if (error)
5441                                 goto error0;
5442                         goto nodelete;
5443                 }
5444                 div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5445                 if (mod) {
5446                         xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5447
5448                         /*
5449                          * Realtime extent is lined up at the end but not
5450                          * at the front.  We'll get rid of full extents if
5451                          * we can.
5452                          */
5453                         if (del.br_blockcount > off) {
5454                                 del.br_blockcount -= off;
5455                                 del.br_startoff += off;
5456                                 del.br_startblock += off;
5457                         } else if (del.br_startoff == start &&
5458                                    (del.br_state == XFS_EXT_UNWRITTEN ||
5459                                     tp->t_blk_res == 0)) {
5460                                 /*
5461                                  * Can't make it unwritten.  There isn't
5462                                  * a full extent here so just skip it.
5463                                  */
5464                                 ASSERT(end >= del.br_blockcount);
5465                                 end -= del.br_blockcount;
5466                                 if (got.br_startoff > end &&
5467                                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5468                                         done = true;
5469                                         break;
5470                                 }
5471                                 continue;
5472                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5473                                 struct xfs_bmbt_irec    prev;
5474                                 xfs_fileoff_t           unwrite_start;
5475
5476                                 /*
5477                                  * This one is already unwritten.
5478                                  * It must have a written left neighbor.
5479                                  * Unwrite the killed part of that one and
5480                                  * try again.
5481                                  */
5482                                 if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5483                                         ASSERT(0);
5484                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5485                                 ASSERT(!isnullstartblock(prev.br_startblock));
5486                                 ASSERT(del.br_startblock ==
5487                                        prev.br_startblock + prev.br_blockcount);
5488                                 unwrite_start = max3(start,
5489                                                      del.br_startoff - mod,
5490                                                      prev.br_startoff);
5491                                 mod = unwrite_start - prev.br_startoff;
5492                                 prev.br_startoff = unwrite_start;
5493                                 prev.br_startblock += mod;
5494                                 prev.br_blockcount -= mod;
5495                                 prev.br_state = XFS_EXT_UNWRITTEN;
5496                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5497                                                 ip, whichfork, &icur, &cur,
5498                                                 &prev, &logflags);
5499                                 if (error)
5500                                         goto error0;
5501                                 goto nodelete;
5502                         } else {
5503                                 ASSERT(del.br_state == XFS_EXT_NORM);
5504                                 del.br_state = XFS_EXT_UNWRITTEN;
5505                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5506                                                 ip, whichfork, &icur, &cur,
5507                                                 &del, &logflags);
5508                                 if (error)
5509                                         goto error0;
5510                                 goto nodelete;
5511                         }
5512                 }
5513
5514 delete:
5515                 if (wasdel) {
5516                         error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5517                                         &got, &del);
5518                 } else {
5519                         error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5520                                         &del, &tmp_logflags, whichfork,
5521                                         flags);
5522                         logflags |= tmp_logflags;
5523                 }
5524
5525                 if (error)
5526                         goto error0;
5527
5528                 max_len -= del.br_blockcount;
5529                 end = del.br_startoff - 1;
5530 nodelete:
5531                 /*
5532                  * If not done go on to the next (previous) record.
5533                  */
5534                 if (end != (xfs_fileoff_t)-1 && end >= start) {
5535                         if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5536                             (got.br_startoff > end &&
5537                              !xfs_iext_prev_extent(ifp, &icur, &got))) {
5538                                 done = true;
5539                                 break;
5540                         }
5541                         extno++;
5542                 }
5543         }
5544         if (done || end == (xfs_fileoff_t)-1 || end < start)
5545                 *rlen = 0;
5546         else
5547                 *rlen = end - start + 1;
5548
5549         /*
5550          * Convert to a btree if necessary.
5551          */
5552         if (xfs_bmap_needs_btree(ip, whichfork)) {
5553                 ASSERT(cur == NULL);
5554                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5555                                 &tmp_logflags, whichfork);
5556                 logflags |= tmp_logflags;
5557         } else {
5558                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5559                         whichfork);
5560         }
5561
5562 error0:
5563         /*
5564          * Log everything.  Do this after conversion, there's no point in
5565          * logging the extent records if we've converted to btree format.
5566          */
5567         if ((logflags & xfs_ilog_fext(whichfork)) &&
5568             ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5569                 logflags &= ~xfs_ilog_fext(whichfork);
5570         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5571                  ifp->if_format != XFS_DINODE_FMT_BTREE)
5572                 logflags &= ~xfs_ilog_fbroot(whichfork);
5573         /*
5574          * Log inode even in the error case, if the transaction
5575          * is dirty we'll need to shut down the filesystem.
5576          */
5577         if (logflags)
5578                 xfs_trans_log_inode(tp, ip, logflags);
5579         if (cur) {
5580                 if (!error)
5581                         cur->bc_ino.allocated = 0;
5582                 xfs_btree_del_cursor(cur, error);
5583         }
5584         return error;
5585 }
5586
5587 /* Unmap a range of a file. */
5588 int
5589 xfs_bunmapi(
5590         xfs_trans_t             *tp,
5591         struct xfs_inode        *ip,
5592         xfs_fileoff_t           bno,
5593         xfs_filblks_t           len,
5594         int                     flags,
5595         xfs_extnum_t            nexts,
5596         int                     *done)
5597 {
5598         int                     error;
5599
5600         error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5601         *done = (len == 0);
5602         return error;
5603 }
5604
5605 /*
5606  * Determine whether an extent shift can be accomplished by a merge with the
5607  * extent that precedes the target hole of the shift.
5608  */
5609 STATIC bool
5610 xfs_bmse_can_merge(
5611         struct xfs_bmbt_irec    *left,  /* preceding extent */
5612         struct xfs_bmbt_irec    *got,   /* current extent to shift */
5613         xfs_fileoff_t           shift)  /* shift fsb */
5614 {
5615         xfs_fileoff_t           startoff;
5616
5617         startoff = got->br_startoff - shift;
5618
5619         /*
5620          * The extent, once shifted, must be adjacent in-file and on-disk with
5621          * the preceding extent.
5622          */
5623         if ((left->br_startoff + left->br_blockcount != startoff) ||
5624             (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5625             (left->br_state != got->br_state) ||
5626             (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5627                 return false;
5628
5629         return true;
5630 }
5631
5632 /*
5633  * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5634  * hole in the file. If an extent shift would result in the extent being fully
5635  * adjacent to the extent that currently precedes the hole, we can merge with
5636  * the preceding extent rather than do the shift.
5637  *
5638  * This function assumes the caller has verified a shift-by-merge is possible
5639  * with the provided extents via xfs_bmse_can_merge().
5640  */
5641 STATIC int
5642 xfs_bmse_merge(
5643         struct xfs_trans                *tp,
5644         struct xfs_inode                *ip,
5645         int                             whichfork,
5646         xfs_fileoff_t                   shift,          /* shift fsb */
5647         struct xfs_iext_cursor          *icur,
5648         struct xfs_bmbt_irec            *got,           /* extent to shift */
5649         struct xfs_bmbt_irec            *left,          /* preceding extent */
5650         struct xfs_btree_cur            *cur,
5651         int                             *logflags)      /* output */
5652 {
5653         struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
5654         struct xfs_bmbt_irec            new;
5655         xfs_filblks_t                   blockcount;
5656         int                             error, i;
5657         struct xfs_mount                *mp = ip->i_mount;
5658
5659         blockcount = left->br_blockcount + got->br_blockcount;
5660
5661         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5662         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5663         ASSERT(xfs_bmse_can_merge(left, got, shift));
5664
5665         new = *left;
5666         new.br_blockcount = blockcount;
5667
5668         /*
5669          * Update the on-disk extent count, the btree if necessary and log the
5670          * inode.
5671          */
5672         ifp->if_nextents--;
5673         *logflags |= XFS_ILOG_CORE;
5674         if (!cur) {
5675                 *logflags |= XFS_ILOG_DEXT;
5676                 goto done;
5677         }
5678
5679         /* lookup and remove the extent to merge */
5680         error = xfs_bmbt_lookup_eq(cur, got, &i);
5681         if (error)
5682                 return error;
5683         if (XFS_IS_CORRUPT(mp, i != 1))
5684                 return -EFSCORRUPTED;
5685
5686         error = xfs_btree_delete(cur, &i);
5687         if (error)
5688                 return error;
5689         if (XFS_IS_CORRUPT(mp, i != 1))
5690                 return -EFSCORRUPTED;
5691
5692         /* lookup and update size of the previous extent */
5693         error = xfs_bmbt_lookup_eq(cur, left, &i);
5694         if (error)
5695                 return error;
5696         if (XFS_IS_CORRUPT(mp, i != 1))
5697                 return -EFSCORRUPTED;
5698
5699         error = xfs_bmbt_update(cur, &new);
5700         if (error)
5701                 return error;
5702
5703         /* change to extent format if required after extent removal */
5704         error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5705         if (error)
5706                 return error;
5707
5708 done:
5709         xfs_iext_remove(ip, icur, 0);
5710         xfs_iext_prev(ifp, icur);
5711         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5712                         &new);
5713
5714         /* update reverse mapping. rmap functions merge the rmaps for us */
5715         xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5716         memcpy(&new, got, sizeof(new));
5717         new.br_startoff = left->br_startoff + left->br_blockcount;
5718         xfs_rmap_map_extent(tp, ip, whichfork, &new);
5719         return 0;
5720 }
5721
5722 static int
5723 xfs_bmap_shift_update_extent(
5724         struct xfs_trans        *tp,
5725         struct xfs_inode        *ip,
5726         int                     whichfork,
5727         struct xfs_iext_cursor  *icur,
5728         struct xfs_bmbt_irec    *got,
5729         struct xfs_btree_cur    *cur,
5730         int                     *logflags,
5731         xfs_fileoff_t           startoff)
5732 {
5733         struct xfs_mount        *mp = ip->i_mount;
5734         struct xfs_bmbt_irec    prev = *got;
5735         int                     error, i;
5736
5737         *logflags |= XFS_ILOG_CORE;
5738
5739         got->br_startoff = startoff;
5740
5741         if (cur) {
5742                 error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5743                 if (error)
5744                         return error;
5745                 if (XFS_IS_CORRUPT(mp, i != 1))
5746                         return -EFSCORRUPTED;
5747
5748                 error = xfs_bmbt_update(cur, got);
5749                 if (error)
5750                         return error;
5751         } else {
5752                 *logflags |= XFS_ILOG_DEXT;
5753         }
5754
5755         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5756                         got);
5757
5758         /* update reverse mapping */
5759         xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5760         xfs_rmap_map_extent(tp, ip, whichfork, got);
5761         return 0;
5762 }
5763
5764 int
5765 xfs_bmap_collapse_extents(
5766         struct xfs_trans        *tp,
5767         struct xfs_inode        *ip,
5768         xfs_fileoff_t           *next_fsb,
5769         xfs_fileoff_t           offset_shift_fsb,
5770         bool                    *done)
5771 {
5772         int                     whichfork = XFS_DATA_FORK;
5773         struct xfs_mount        *mp = ip->i_mount;
5774         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5775         struct xfs_btree_cur    *cur = NULL;
5776         struct xfs_bmbt_irec    got, prev;
5777         struct xfs_iext_cursor  icur;
5778         xfs_fileoff_t           new_startoff;
5779         int                     error = 0;
5780         int                     logflags = 0;
5781
5782         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5783             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5784                 return -EFSCORRUPTED;
5785         }
5786
5787         if (XFS_FORCED_SHUTDOWN(mp))
5788                 return -EIO;
5789
5790         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5791
5792         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5793                 error = xfs_iread_extents(tp, ip, whichfork);
5794                 if (error)
5795                         return error;
5796         }
5797
5798         if (ifp->if_flags & XFS_IFBROOT) {
5799                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5800                 cur->bc_ino.flags = 0;
5801         }
5802
5803         if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5804                 *done = true;
5805                 goto del_cursor;
5806         }
5807         if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5808                 error = -EFSCORRUPTED;
5809                 goto del_cursor;
5810         }
5811
5812         new_startoff = got.br_startoff - offset_shift_fsb;
5813         if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5814                 if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5815                         error = -EINVAL;
5816                         goto del_cursor;
5817                 }
5818
5819                 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5820                         error = xfs_bmse_merge(tp, ip, whichfork,
5821                                         offset_shift_fsb, &icur, &got, &prev,
5822                                         cur, &logflags);
5823                         if (error)
5824                                 goto del_cursor;
5825                         goto done;
5826                 }
5827         } else {
5828                 if (got.br_startoff < offset_shift_fsb) {
5829                         error = -EINVAL;
5830                         goto del_cursor;
5831                 }
5832         }
5833
5834         error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5835                         cur, &logflags, new_startoff);
5836         if (error)
5837                 goto del_cursor;
5838
5839 done:
5840         if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5841                 *done = true;
5842                 goto del_cursor;
5843         }
5844
5845         *next_fsb = got.br_startoff;
5846 del_cursor:
5847         if (cur)
5848                 xfs_btree_del_cursor(cur, error);
5849         if (logflags)
5850                 xfs_trans_log_inode(tp, ip, logflags);
5851         return error;
5852 }
5853
5854 /* Make sure we won't be right-shifting an extent past the maximum bound. */
5855 int
5856 xfs_bmap_can_insert_extents(
5857         struct xfs_inode        *ip,
5858         xfs_fileoff_t           off,
5859         xfs_fileoff_t           shift)
5860 {
5861         struct xfs_bmbt_irec    got;
5862         int                     is_empty;
5863         int                     error = 0;
5864
5865         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5866
5867         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5868                 return -EIO;
5869
5870         xfs_ilock(ip, XFS_ILOCK_EXCL);
5871         error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5872         if (!error && !is_empty && got.br_startoff >= off &&
5873             ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5874                 error = -EINVAL;
5875         xfs_iunlock(ip, XFS_ILOCK_EXCL);
5876
5877         return error;
5878 }
5879
5880 int
5881 xfs_bmap_insert_extents(
5882         struct xfs_trans        *tp,
5883         struct xfs_inode        *ip,
5884         xfs_fileoff_t           *next_fsb,
5885         xfs_fileoff_t           offset_shift_fsb,
5886         bool                    *done,
5887         xfs_fileoff_t           stop_fsb)
5888 {
5889         int                     whichfork = XFS_DATA_FORK;
5890         struct xfs_mount        *mp = ip->i_mount;
5891         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5892         struct xfs_btree_cur    *cur = NULL;
5893         struct xfs_bmbt_irec    got, next;
5894         struct xfs_iext_cursor  icur;
5895         xfs_fileoff_t           new_startoff;
5896         int                     error = 0;
5897         int                     logflags = 0;
5898
5899         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5900             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5901                 return -EFSCORRUPTED;
5902         }
5903
5904         if (XFS_FORCED_SHUTDOWN(mp))
5905                 return -EIO;
5906
5907         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5908
5909         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5910                 error = xfs_iread_extents(tp, ip, whichfork);
5911                 if (error)
5912                         return error;
5913         }
5914
5915         if (ifp->if_flags & XFS_IFBROOT) {
5916                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5917                 cur->bc_ino.flags = 0;
5918         }
5919
5920         if (*next_fsb == NULLFSBLOCK) {
5921                 xfs_iext_last(ifp, &icur);
5922                 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5923                     stop_fsb > got.br_startoff) {
5924                         *done = true;
5925                         goto del_cursor;
5926                 }
5927         } else {
5928                 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5929                         *done = true;
5930                         goto del_cursor;
5931                 }
5932         }
5933         if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5934                 error = -EFSCORRUPTED;
5935                 goto del_cursor;
5936         }
5937
5938         if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5939                 error = -EFSCORRUPTED;
5940                 goto del_cursor;
5941         }
5942
5943         new_startoff = got.br_startoff + offset_shift_fsb;
5944         if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5945                 if (new_startoff + got.br_blockcount > next.br_startoff) {
5946                         error = -EINVAL;
5947                         goto del_cursor;
5948                 }
5949
5950                 /*
5951                  * Unlike a left shift (which involves a hole punch), a right
5952                  * shift does not modify extent neighbors in any way.  We should
5953                  * never find mergeable extents in this scenario.  Check anyways
5954                  * and warn if we encounter two extents that could be one.
5955                  */
5956                 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5957                         WARN_ON_ONCE(1);
5958         }
5959
5960         error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5961                         cur, &logflags, new_startoff);
5962         if (error)
5963                 goto del_cursor;
5964
5965         if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5966             stop_fsb >= got.br_startoff + got.br_blockcount) {
5967                 *done = true;
5968                 goto del_cursor;
5969         }
5970
5971         *next_fsb = got.br_startoff;
5972 del_cursor:
5973         if (cur)
5974                 xfs_btree_del_cursor(cur, error);
5975         if (logflags)
5976                 xfs_trans_log_inode(tp, ip, logflags);
5977         return error;
5978 }
5979
5980 /*
5981  * Splits an extent into two extents at split_fsb block such that it is the
5982  * first block of the current_ext. @ext is a target extent to be split.
5983  * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5984  * hole or the first block of extents, just return 0.
5985  */
5986 int
5987 xfs_bmap_split_extent(
5988         struct xfs_trans        *tp,
5989         struct xfs_inode        *ip,
5990         xfs_fileoff_t           split_fsb)
5991 {
5992         int                             whichfork = XFS_DATA_FORK;
5993         struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
5994         struct xfs_btree_cur            *cur = NULL;
5995         struct xfs_bmbt_irec            got;
5996         struct xfs_bmbt_irec            new; /* split extent */
5997         struct xfs_mount                *mp = ip->i_mount;
5998         xfs_fsblock_t                   gotblkcnt; /* new block count for got */
5999         struct xfs_iext_cursor          icur;
6000         int                             error = 0;
6001         int                             logflags = 0;
6002         int                             i = 0;
6003
6004         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6005             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6006                 return -EFSCORRUPTED;
6007         }
6008
6009         if (XFS_FORCED_SHUTDOWN(mp))
6010                 return -EIO;
6011
6012         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6013                 /* Read in all the extents */
6014                 error = xfs_iread_extents(tp, ip, whichfork);
6015                 if (error)
6016                         return error;
6017         }
6018
6019         /*
6020          * If there are not extents, or split_fsb lies in a hole we are done.
6021          */
6022         if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6023             got.br_startoff >= split_fsb)
6024                 return 0;
6025
6026         gotblkcnt = split_fsb - got.br_startoff;
6027         new.br_startoff = split_fsb;
6028         new.br_startblock = got.br_startblock + gotblkcnt;
6029         new.br_blockcount = got.br_blockcount - gotblkcnt;
6030         new.br_state = got.br_state;
6031
6032         if (ifp->if_flags & XFS_IFBROOT) {
6033                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6034                 cur->bc_ino.flags = 0;
6035                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
6036                 if (error)
6037                         goto del_cursor;
6038                 if (XFS_IS_CORRUPT(mp, i != 1)) {
6039                         error = -EFSCORRUPTED;
6040                         goto del_cursor;
6041                 }
6042         }
6043
6044         got.br_blockcount = gotblkcnt;
6045         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6046                         &got);
6047
6048         logflags = XFS_ILOG_CORE;
6049         if (cur) {
6050                 error = xfs_bmbt_update(cur, &got);
6051                 if (error)
6052                         goto del_cursor;
6053         } else
6054                 logflags |= XFS_ILOG_DEXT;
6055
6056         /* Add new extent */
6057         xfs_iext_next(ifp, &icur);
6058         xfs_iext_insert(ip, &icur, &new, 0);
6059         ifp->if_nextents++;
6060
6061         if (cur) {
6062                 error = xfs_bmbt_lookup_eq(cur, &new, &i);
6063                 if (error)
6064                         goto del_cursor;
6065                 if (XFS_IS_CORRUPT(mp, i != 0)) {
6066                         error = -EFSCORRUPTED;
6067                         goto del_cursor;
6068                 }
6069                 error = xfs_btree_insert(cur, &i);
6070                 if (error)
6071                         goto del_cursor;
6072                 if (XFS_IS_CORRUPT(mp, i != 1)) {
6073                         error = -EFSCORRUPTED;
6074                         goto del_cursor;
6075                 }
6076         }
6077
6078         /*
6079          * Convert to a btree if necessary.
6080          */
6081         if (xfs_bmap_needs_btree(ip, whichfork)) {
6082                 int tmp_logflags; /* partial log flag return val */
6083
6084                 ASSERT(cur == NULL);
6085                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6086                                 &tmp_logflags, whichfork);
6087                 logflags |= tmp_logflags;
6088         }
6089
6090 del_cursor:
6091         if (cur) {
6092                 cur->bc_ino.allocated = 0;
6093                 xfs_btree_del_cursor(cur, error);
6094         }
6095
6096         if (logflags)
6097                 xfs_trans_log_inode(tp, ip, logflags);
6098         return error;
6099 }
6100
6101 /* Deferred mapping is only for real extents in the data fork. */
6102 static bool
6103 xfs_bmap_is_update_needed(
6104         struct xfs_bmbt_irec    *bmap)
6105 {
6106         return  bmap->br_startblock != HOLESTARTBLOCK &&
6107                 bmap->br_startblock != DELAYSTARTBLOCK;
6108 }
6109
6110 /* Record a bmap intent. */
6111 static int
6112 __xfs_bmap_add(
6113         struct xfs_trans                *tp,
6114         enum xfs_bmap_intent_type       type,
6115         struct xfs_inode                *ip,
6116         int                             whichfork,
6117         struct xfs_bmbt_irec            *bmap)
6118 {
6119         struct xfs_bmap_intent          *bi;
6120
6121         trace_xfs_bmap_defer(tp->t_mountp,
6122                         XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6123                         type,
6124                         XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6125                         ip->i_ino, whichfork,
6126                         bmap->br_startoff,
6127                         bmap->br_blockcount,
6128                         bmap->br_state);
6129
6130         bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6131         INIT_LIST_HEAD(&bi->bi_list);
6132         bi->bi_type = type;
6133         bi->bi_owner = ip;
6134         bi->bi_whichfork = whichfork;
6135         bi->bi_bmap = *bmap;
6136
6137         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6138         return 0;
6139 }
6140
6141 /* Map an extent into a file. */
6142 void
6143 xfs_bmap_map_extent(
6144         struct xfs_trans        *tp,
6145         struct xfs_inode        *ip,
6146         struct xfs_bmbt_irec    *PREV)
6147 {
6148         if (!xfs_bmap_is_update_needed(PREV))
6149                 return;
6150
6151         __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6152 }
6153
6154 /* Unmap an extent out of a file. */
6155 void
6156 xfs_bmap_unmap_extent(
6157         struct xfs_trans        *tp,
6158         struct xfs_inode        *ip,
6159         struct xfs_bmbt_irec    *PREV)
6160 {
6161         if (!xfs_bmap_is_update_needed(PREV))
6162                 return;
6163
6164         __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6165 }
6166
6167 /*
6168  * Process one of the deferred bmap operations.  We pass back the
6169  * btree cursor to maintain our lock on the bmapbt between calls.
6170  */
6171 int
6172 xfs_bmap_finish_one(
6173         struct xfs_trans                *tp,
6174         struct xfs_inode                *ip,
6175         enum xfs_bmap_intent_type       type,
6176         int                             whichfork,
6177         xfs_fileoff_t                   startoff,
6178         xfs_fsblock_t                   startblock,
6179         xfs_filblks_t                   *blockcount,
6180         xfs_exntst_t                    state)
6181 {
6182         int                             error = 0;
6183
6184         ASSERT(tp->t_firstblock == NULLFSBLOCK);
6185
6186         trace_xfs_bmap_deferred(tp->t_mountp,
6187                         XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6188                         XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6189                         ip->i_ino, whichfork, startoff, *blockcount, state);
6190
6191         if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6192                 return -EFSCORRUPTED;
6193
6194         if (XFS_TEST_ERROR(false, tp->t_mountp,
6195                         XFS_ERRTAG_BMAP_FINISH_ONE))
6196                 return -EIO;
6197
6198         switch (type) {
6199         case XFS_BMAP_MAP:
6200                 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6201                                 startblock, 0);
6202                 *blockcount = 0;
6203                 break;
6204         case XFS_BMAP_UNMAP:
6205                 error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6206                                 XFS_BMAPI_REMAP, 1);
6207                 break;
6208         default:
6209                 ASSERT(0);
6210                 error = -EFSCORRUPTED;
6211         }
6212
6213         return error;
6214 }
6215
6216 /* Check that an inode's extent does not have invalid flags or bad ranges. */
6217 xfs_failaddr_t
6218 xfs_bmap_validate_extent(
6219         struct xfs_inode        *ip,
6220         int                     whichfork,
6221         struct xfs_bmbt_irec    *irec)
6222 {
6223         struct xfs_mount        *mp = ip->i_mount;
6224         xfs_fsblock_t           endfsb;
6225         bool                    isrt;
6226
6227         isrt = XFS_IS_REALTIME_INODE(ip);
6228         endfsb = irec->br_startblock + irec->br_blockcount - 1;
6229         if (isrt && whichfork == XFS_DATA_FORK) {
6230                 if (!xfs_verify_rtbno(mp, irec->br_startblock))
6231                         return __this_address;
6232                 if (!xfs_verify_rtbno(mp, endfsb))
6233                         return __this_address;
6234         } else {
6235                 if (!xfs_verify_fsbno(mp, irec->br_startblock))
6236                         return __this_address;
6237                 if (!xfs_verify_fsbno(mp, endfsb))
6238                         return __this_address;
6239                 if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6240                     XFS_FSB_TO_AGNO(mp, endfsb))
6241                         return __this_address;
6242         }
6243         if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6244                 return __this_address;
6245         return NULL;
6246 }