Merge branch 'KASAN-read_word_at_a_time'
[linux-2.6-microblaze.git] / fs / xfs / libxfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_inode.h"
28 #include "xfs_trans.h"
29 #include "xfs_inode_item.h"
30 #include "xfs_buf_item.h"
31 #include "xfs_btree.h"
32 #include "xfs_errortag.h"
33 #include "xfs_error.h"
34 #include "xfs_trace.h"
35 #include "xfs_cksum.h"
36 #include "xfs_alloc.h"
37 #include "xfs_log.h"
38
39 /*
40  * Cursor allocation zone.
41  */
42 kmem_zone_t     *xfs_btree_cur_zone;
43
44 /*
45  * Btree magic numbers.
46  */
47 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
48         { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
49           XFS_FIBT_MAGIC, 0 },
50         { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
51           XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
52           XFS_REFC_CRC_MAGIC }
53 };
54
55 uint32_t
56 xfs_btree_magic(
57         int                     crc,
58         xfs_btnum_t             btnum)
59 {
60         uint32_t                magic = xfs_magics[crc][btnum];
61
62         /* Ensure we asked for crc for crc-only magics. */
63         ASSERT(magic != 0);
64         return magic;
65 }
66
67 /*
68  * Check a long btree block header.  Return the address of the failing check,
69  * or NULL if everything is ok.
70  */
71 xfs_failaddr_t
72 __xfs_btree_check_lblock(
73         struct xfs_btree_cur    *cur,
74         struct xfs_btree_block  *block,
75         int                     level,
76         struct xfs_buf          *bp)
77 {
78         struct xfs_mount        *mp = cur->bc_mp;
79         xfs_btnum_t             btnum = cur->bc_btnum;
80         int                     crc = xfs_sb_version_hascrc(&mp->m_sb);
81
82         if (crc) {
83                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
84                         return __this_address;
85                 if (block->bb_u.l.bb_blkno !=
86                     cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
87                         return __this_address;
88                 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
89                         return __this_address;
90         }
91
92         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
93                 return __this_address;
94         if (be16_to_cpu(block->bb_level) != level)
95                 return __this_address;
96         if (be16_to_cpu(block->bb_numrecs) >
97             cur->bc_ops->get_maxrecs(cur, level))
98                 return __this_address;
99         if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
100             !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
101                         level + 1))
102                 return __this_address;
103         if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
104             !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
105                         level + 1))
106                 return __this_address;
107
108         return NULL;
109 }
110
111 /* Check a long btree block header. */
112 static int
113 xfs_btree_check_lblock(
114         struct xfs_btree_cur    *cur,
115         struct xfs_btree_block  *block,
116         int                     level,
117         struct xfs_buf          *bp)
118 {
119         struct xfs_mount        *mp = cur->bc_mp;
120         xfs_failaddr_t          fa;
121
122         fa = __xfs_btree_check_lblock(cur, block, level, bp);
123         if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
124                         XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
125                 if (bp)
126                         trace_xfs_btree_corrupt(bp, _RET_IP_);
127                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
128                 return -EFSCORRUPTED;
129         }
130         return 0;
131 }
132
133 /*
134  * Check a short btree block header.  Return the address of the failing check,
135  * or NULL if everything is ok.
136  */
137 xfs_failaddr_t
138 __xfs_btree_check_sblock(
139         struct xfs_btree_cur    *cur,
140         struct xfs_btree_block  *block,
141         int                     level,
142         struct xfs_buf          *bp)
143 {
144         struct xfs_mount        *mp = cur->bc_mp;
145         xfs_btnum_t             btnum = cur->bc_btnum;
146         int                     crc = xfs_sb_version_hascrc(&mp->m_sb);
147
148         if (crc) {
149                 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
150                         return __this_address;
151                 if (block->bb_u.s.bb_blkno !=
152                     cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
153                         return __this_address;
154         }
155
156         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
157                 return __this_address;
158         if (be16_to_cpu(block->bb_level) != level)
159                 return __this_address;
160         if (be16_to_cpu(block->bb_numrecs) >
161             cur->bc_ops->get_maxrecs(cur, level))
162                 return __this_address;
163         if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
164             !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
165                         level + 1))
166                 return __this_address;
167         if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
168             !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
169                         level + 1))
170                 return __this_address;
171
172         return NULL;
173 }
174
175 /* Check a short btree block header. */
176 STATIC int
177 xfs_btree_check_sblock(
178         struct xfs_btree_cur    *cur,
179         struct xfs_btree_block  *block,
180         int                     level,
181         struct xfs_buf          *bp)
182 {
183         struct xfs_mount        *mp = cur->bc_mp;
184         xfs_failaddr_t          fa;
185
186         fa = __xfs_btree_check_sblock(cur, block, level, bp);
187         if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
188                         XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
189                 if (bp)
190                         trace_xfs_btree_corrupt(bp, _RET_IP_);
191                 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
192                 return -EFSCORRUPTED;
193         }
194         return 0;
195 }
196
197 /*
198  * Debug routine: check that block header is ok.
199  */
200 int
201 xfs_btree_check_block(
202         struct xfs_btree_cur    *cur,   /* btree cursor */
203         struct xfs_btree_block  *block, /* generic btree block pointer */
204         int                     level,  /* level of the btree block */
205         struct xfs_buf          *bp)    /* buffer containing block, if any */
206 {
207         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
208                 return xfs_btree_check_lblock(cur, block, level, bp);
209         else
210                 return xfs_btree_check_sblock(cur, block, level, bp);
211 }
212
213 /* Check that this long pointer is valid and points within the fs. */
214 bool
215 xfs_btree_check_lptr(
216         struct xfs_btree_cur    *cur,
217         xfs_fsblock_t           fsbno,
218         int                     level)
219 {
220         if (level <= 0)
221                 return false;
222         return xfs_verify_fsbno(cur->bc_mp, fsbno);
223 }
224
225 /* Check that this short pointer is valid and points within the AG. */
226 bool
227 xfs_btree_check_sptr(
228         struct xfs_btree_cur    *cur,
229         xfs_agblock_t           agbno,
230         int                     level)
231 {
232         if (level <= 0)
233                 return false;
234         return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno);
235 }
236
237 #ifdef DEBUG
238 /*
239  * Check that a given (indexed) btree pointer at a certain level of a
240  * btree is valid and doesn't point past where it should.
241  */
242 static int
243 xfs_btree_check_ptr(
244         struct xfs_btree_cur    *cur,
245         union xfs_btree_ptr     *ptr,
246         int                     index,
247         int                     level)
248 {
249         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
250                 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
251                                 xfs_btree_check_lptr(cur,
252                                         be64_to_cpu((&ptr->l)[index]), level));
253         } else {
254                 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
255                                 xfs_btree_check_sptr(cur,
256                                         be32_to_cpu((&ptr->s)[index]), level));
257         }
258
259         return 0;
260 }
261 #endif
262
263 /*
264  * Calculate CRC on the whole btree block and stuff it into the
265  * long-form btree header.
266  *
267  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
268  * it into the buffer so recovery knows what the last modification was that made
269  * it to disk.
270  */
271 void
272 xfs_btree_lblock_calc_crc(
273         struct xfs_buf          *bp)
274 {
275         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
276         struct xfs_buf_log_item *bip = bp->b_log_item;
277
278         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
279                 return;
280         if (bip)
281                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
282         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
283 }
284
285 bool
286 xfs_btree_lblock_verify_crc(
287         struct xfs_buf          *bp)
288 {
289         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
290         struct xfs_mount        *mp = bp->b_target->bt_mount;
291
292         if (xfs_sb_version_hascrc(&mp->m_sb)) {
293                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
294                         return false;
295                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
296         }
297
298         return true;
299 }
300
301 /*
302  * Calculate CRC on the whole btree block and stuff it into the
303  * short-form btree header.
304  *
305  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
306  * it into the buffer so recovery knows what the last modification was that made
307  * it to disk.
308  */
309 void
310 xfs_btree_sblock_calc_crc(
311         struct xfs_buf          *bp)
312 {
313         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
314         struct xfs_buf_log_item *bip = bp->b_log_item;
315
316         if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
317                 return;
318         if (bip)
319                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
320         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
321 }
322
323 bool
324 xfs_btree_sblock_verify_crc(
325         struct xfs_buf          *bp)
326 {
327         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
328         struct xfs_mount        *mp = bp->b_target->bt_mount;
329
330         if (xfs_sb_version_hascrc(&mp->m_sb)) {
331                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
332                         return __this_address;
333                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
334         }
335
336         return true;
337 }
338
339 static int
340 xfs_btree_free_block(
341         struct xfs_btree_cur    *cur,
342         struct xfs_buf          *bp)
343 {
344         int                     error;
345
346         error = cur->bc_ops->free_block(cur, bp);
347         if (!error) {
348                 xfs_trans_binval(cur->bc_tp, bp);
349                 XFS_BTREE_STATS_INC(cur, free);
350         }
351         return error;
352 }
353
354 /*
355  * Delete the btree cursor.
356  */
357 void
358 xfs_btree_del_cursor(
359         xfs_btree_cur_t *cur,           /* btree cursor */
360         int             error)          /* del because of error */
361 {
362         int             i;              /* btree level */
363
364         /*
365          * Clear the buffer pointers, and release the buffers.
366          * If we're doing this in the face of an error, we
367          * need to make sure to inspect all of the entries
368          * in the bc_bufs array for buffers to be unlocked.
369          * This is because some of the btree code works from
370          * level n down to 0, and if we get an error along
371          * the way we won't have initialized all the entries
372          * down to 0.
373          */
374         for (i = 0; i < cur->bc_nlevels; i++) {
375                 if (cur->bc_bufs[i])
376                         xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
377                 else if (!error)
378                         break;
379         }
380         /*
381          * Can't free a bmap cursor without having dealt with the
382          * allocated indirect blocks' accounting.
383          */
384         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
385                cur->bc_private.b.allocated == 0);
386         /*
387          * Free the cursor.
388          */
389         kmem_zone_free(xfs_btree_cur_zone, cur);
390 }
391
392 /*
393  * Duplicate the btree cursor.
394  * Allocate a new one, copy the record, re-get the buffers.
395  */
396 int                                     /* error */
397 xfs_btree_dup_cursor(
398         xfs_btree_cur_t *cur,           /* input cursor */
399         xfs_btree_cur_t **ncur)         /* output cursor */
400 {
401         xfs_buf_t       *bp;            /* btree block's buffer pointer */
402         int             error;          /* error return value */
403         int             i;              /* level number of btree block */
404         xfs_mount_t     *mp;            /* mount structure for filesystem */
405         xfs_btree_cur_t *new;           /* new cursor value */
406         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
407
408         tp = cur->bc_tp;
409         mp = cur->bc_mp;
410
411         /*
412          * Allocate a new cursor like the old one.
413          */
414         new = cur->bc_ops->dup_cursor(cur);
415
416         /*
417          * Copy the record currently in the cursor.
418          */
419         new->bc_rec = cur->bc_rec;
420
421         /*
422          * For each level current, re-get the buffer and copy the ptr value.
423          */
424         for (i = 0; i < new->bc_nlevels; i++) {
425                 new->bc_ptrs[i] = cur->bc_ptrs[i];
426                 new->bc_ra[i] = cur->bc_ra[i];
427                 bp = cur->bc_bufs[i];
428                 if (bp) {
429                         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
430                                                    XFS_BUF_ADDR(bp), mp->m_bsize,
431                                                    0, &bp,
432                                                    cur->bc_ops->buf_ops);
433                         if (error) {
434                                 xfs_btree_del_cursor(new, error);
435                                 *ncur = NULL;
436                                 return error;
437                         }
438                 }
439                 new->bc_bufs[i] = bp;
440         }
441         *ncur = new;
442         return 0;
443 }
444
445 /*
446  * XFS btree block layout and addressing:
447  *
448  * There are two types of blocks in the btree: leaf and non-leaf blocks.
449  *
450  * The leaf record start with a header then followed by records containing
451  * the values.  A non-leaf block also starts with the same header, and
452  * then first contains lookup keys followed by an equal number of pointers
453  * to the btree blocks at the previous level.
454  *
455  *              +--------+-------+-------+-------+-------+-------+-------+
456  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
457  *              +--------+-------+-------+-------+-------+-------+-------+
458  *
459  *              +--------+-------+-------+-------+-------+-------+-------+
460  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
461  *              +--------+-------+-------+-------+-------+-------+-------+
462  *
463  * The header is called struct xfs_btree_block for reasons better left unknown
464  * and comes in different versions for short (32bit) and long (64bit) block
465  * pointers.  The record and key structures are defined by the btree instances
466  * and opaque to the btree core.  The block pointers are simple disk endian
467  * integers, available in a short (32bit) and long (64bit) variant.
468  *
469  * The helpers below calculate the offset of a given record, key or pointer
470  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
471  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
472  * inside the btree block is done using indices starting at one, not zero!
473  *
474  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
475  * overlapping intervals.  In such a tree, records are still sorted lowest to
476  * highest and indexed by the smallest key value that refers to the record.
477  * However, nodes are different: each pointer has two associated keys -- one
478  * indexing the lowest key available in the block(s) below (the same behavior
479  * as the key in a regular btree) and another indexing the highest key
480  * available in the block(s) below.  Because records are /not/ sorted by the
481  * highest key, all leaf block updates require us to compute the highest key
482  * that matches any record in the leaf and to recursively update the high keys
483  * in the nodes going further up in the tree, if necessary.  Nodes look like
484  * this:
485  *
486  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
487  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
488  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
489  *
490  * To perform an interval query on an overlapped tree, perform the usual
491  * depth-first search and use the low and high keys to decide if we can skip
492  * that particular node.  If a leaf node is reached, return the records that
493  * intersect the interval.  Note that an interval query may return numerous
494  * entries.  For a non-overlapped tree, simply search for the record associated
495  * with the lowest key and iterate forward until a non-matching record is
496  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
497  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
498  * more detail.
499  *
500  * Why do we care about overlapping intervals?  Let's say you have a bunch of
501  * reverse mapping records on a reflink filesystem:
502  *
503  * 1: +- file A startblock B offset C length D -----------+
504  * 2:      +- file E startblock F offset G length H --------------+
505  * 3:      +- file I startblock F offset J length K --+
506  * 4:                                                        +- file L... --+
507  *
508  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
509  * we'd simply increment the length of record 1.  But how do we find the record
510  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
511  * record 3 because the keys are ordered first by startblock.  An interval
512  * query would return records 1 and 2 because they both overlap (B+D-1), and
513  * from that we can pick out record 1 as the appropriate left neighbor.
514  *
515  * In the non-overlapped case you can do a LE lookup and decrement the cursor
516  * because a record's interval must end before the next record.
517  */
518
519 /*
520  * Return size of the btree block header for this btree instance.
521  */
522 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
523 {
524         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
525                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
526                         return XFS_BTREE_LBLOCK_CRC_LEN;
527                 return XFS_BTREE_LBLOCK_LEN;
528         }
529         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
530                 return XFS_BTREE_SBLOCK_CRC_LEN;
531         return XFS_BTREE_SBLOCK_LEN;
532 }
533
534 /*
535  * Return size of btree block pointers for this btree instance.
536  */
537 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
538 {
539         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
540                 sizeof(__be64) : sizeof(__be32);
541 }
542
543 /*
544  * Calculate offset of the n-th record in a btree block.
545  */
546 STATIC size_t
547 xfs_btree_rec_offset(
548         struct xfs_btree_cur    *cur,
549         int                     n)
550 {
551         return xfs_btree_block_len(cur) +
552                 (n - 1) * cur->bc_ops->rec_len;
553 }
554
555 /*
556  * Calculate offset of the n-th key in a btree block.
557  */
558 STATIC size_t
559 xfs_btree_key_offset(
560         struct xfs_btree_cur    *cur,
561         int                     n)
562 {
563         return xfs_btree_block_len(cur) +
564                 (n - 1) * cur->bc_ops->key_len;
565 }
566
567 /*
568  * Calculate offset of the n-th high key in a btree block.
569  */
570 STATIC size_t
571 xfs_btree_high_key_offset(
572         struct xfs_btree_cur    *cur,
573         int                     n)
574 {
575         return xfs_btree_block_len(cur) +
576                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
577 }
578
579 /*
580  * Calculate offset of the n-th block pointer in a btree block.
581  */
582 STATIC size_t
583 xfs_btree_ptr_offset(
584         struct xfs_btree_cur    *cur,
585         int                     n,
586         int                     level)
587 {
588         return xfs_btree_block_len(cur) +
589                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
590                 (n - 1) * xfs_btree_ptr_len(cur);
591 }
592
593 /*
594  * Return a pointer to the n-th record in the btree block.
595  */
596 union xfs_btree_rec *
597 xfs_btree_rec_addr(
598         struct xfs_btree_cur    *cur,
599         int                     n,
600         struct xfs_btree_block  *block)
601 {
602         return (union xfs_btree_rec *)
603                 ((char *)block + xfs_btree_rec_offset(cur, n));
604 }
605
606 /*
607  * Return a pointer to the n-th key in the btree block.
608  */
609 union xfs_btree_key *
610 xfs_btree_key_addr(
611         struct xfs_btree_cur    *cur,
612         int                     n,
613         struct xfs_btree_block  *block)
614 {
615         return (union xfs_btree_key *)
616                 ((char *)block + xfs_btree_key_offset(cur, n));
617 }
618
619 /*
620  * Return a pointer to the n-th high key in the btree block.
621  */
622 union xfs_btree_key *
623 xfs_btree_high_key_addr(
624         struct xfs_btree_cur    *cur,
625         int                     n,
626         struct xfs_btree_block  *block)
627 {
628         return (union xfs_btree_key *)
629                 ((char *)block + xfs_btree_high_key_offset(cur, n));
630 }
631
632 /*
633  * Return a pointer to the n-th block pointer in the btree block.
634  */
635 union xfs_btree_ptr *
636 xfs_btree_ptr_addr(
637         struct xfs_btree_cur    *cur,
638         int                     n,
639         struct xfs_btree_block  *block)
640 {
641         int                     level = xfs_btree_get_level(block);
642
643         ASSERT(block->bb_level != 0);
644
645         return (union xfs_btree_ptr *)
646                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
647 }
648
649 /*
650  * Get the root block which is stored in the inode.
651  *
652  * For now this btree implementation assumes the btree root is always
653  * stored in the if_broot field of an inode fork.
654  */
655 STATIC struct xfs_btree_block *
656 xfs_btree_get_iroot(
657         struct xfs_btree_cur    *cur)
658 {
659         struct xfs_ifork        *ifp;
660
661         ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
662         return (struct xfs_btree_block *)ifp->if_broot;
663 }
664
665 /*
666  * Retrieve the block pointer from the cursor at the given level.
667  * This may be an inode btree root or from a buffer.
668  */
669 struct xfs_btree_block *                /* generic btree block pointer */
670 xfs_btree_get_block(
671         struct xfs_btree_cur    *cur,   /* btree cursor */
672         int                     level,  /* level in btree */
673         struct xfs_buf          **bpp)  /* buffer containing the block */
674 {
675         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
676             (level == cur->bc_nlevels - 1)) {
677                 *bpp = NULL;
678                 return xfs_btree_get_iroot(cur);
679         }
680
681         *bpp = cur->bc_bufs[level];
682         return XFS_BUF_TO_BLOCK(*bpp);
683 }
684
685 /*
686  * Get a buffer for the block, return it with no data read.
687  * Long-form addressing.
688  */
689 xfs_buf_t *                             /* buffer for fsbno */
690 xfs_btree_get_bufl(
691         xfs_mount_t     *mp,            /* file system mount point */
692         xfs_trans_t     *tp,            /* transaction pointer */
693         xfs_fsblock_t   fsbno,          /* file system block number */
694         uint            lock)           /* lock flags for get_buf */
695 {
696         xfs_daddr_t             d;              /* real disk block address */
697
698         ASSERT(fsbno != NULLFSBLOCK);
699         d = XFS_FSB_TO_DADDR(mp, fsbno);
700         return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
701 }
702
703 /*
704  * Get a buffer for the block, return it with no data read.
705  * Short-form addressing.
706  */
707 xfs_buf_t *                             /* buffer for agno/agbno */
708 xfs_btree_get_bufs(
709         xfs_mount_t     *mp,            /* file system mount point */
710         xfs_trans_t     *tp,            /* transaction pointer */
711         xfs_agnumber_t  agno,           /* allocation group number */
712         xfs_agblock_t   agbno,          /* allocation group block number */
713         uint            lock)           /* lock flags for get_buf */
714 {
715         xfs_daddr_t             d;              /* real disk block address */
716
717         ASSERT(agno != NULLAGNUMBER);
718         ASSERT(agbno != NULLAGBLOCK);
719         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
720         return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
721 }
722
723 /*
724  * Check for the cursor referring to the last block at the given level.
725  */
726 int                                     /* 1=is last block, 0=not last block */
727 xfs_btree_islastblock(
728         xfs_btree_cur_t         *cur,   /* btree cursor */
729         int                     level)  /* level to check */
730 {
731         struct xfs_btree_block  *block; /* generic btree block pointer */
732         xfs_buf_t               *bp;    /* buffer containing block */
733
734         block = xfs_btree_get_block(cur, level, &bp);
735         xfs_btree_check_block(cur, block, level, bp);
736         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
737                 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
738         else
739                 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
740 }
741
742 /*
743  * Change the cursor to point to the first record at the given level.
744  * Other levels are unaffected.
745  */
746 STATIC int                              /* success=1, failure=0 */
747 xfs_btree_firstrec(
748         xfs_btree_cur_t         *cur,   /* btree cursor */
749         int                     level)  /* level to change */
750 {
751         struct xfs_btree_block  *block; /* generic btree block pointer */
752         xfs_buf_t               *bp;    /* buffer containing block */
753
754         /*
755          * Get the block pointer for this level.
756          */
757         block = xfs_btree_get_block(cur, level, &bp);
758         if (xfs_btree_check_block(cur, block, level, bp))
759                 return 0;
760         /*
761          * It's empty, there is no such record.
762          */
763         if (!block->bb_numrecs)
764                 return 0;
765         /*
766          * Set the ptr value to 1, that's the first record/key.
767          */
768         cur->bc_ptrs[level] = 1;
769         return 1;
770 }
771
772 /*
773  * Change the cursor to point to the last record in the current block
774  * at the given level.  Other levels are unaffected.
775  */
776 STATIC int                              /* success=1, failure=0 */
777 xfs_btree_lastrec(
778         xfs_btree_cur_t         *cur,   /* btree cursor */
779         int                     level)  /* level to change */
780 {
781         struct xfs_btree_block  *block; /* generic btree block pointer */
782         xfs_buf_t               *bp;    /* buffer containing block */
783
784         /*
785          * Get the block pointer for this level.
786          */
787         block = xfs_btree_get_block(cur, level, &bp);
788         if (xfs_btree_check_block(cur, block, level, bp))
789                 return 0;
790         /*
791          * It's empty, there is no such record.
792          */
793         if (!block->bb_numrecs)
794                 return 0;
795         /*
796          * Set the ptr value to numrecs, that's the last record/key.
797          */
798         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
799         return 1;
800 }
801
802 /*
803  * Compute first and last byte offsets for the fields given.
804  * Interprets the offsets table, which contains struct field offsets.
805  */
806 void
807 xfs_btree_offsets(
808         int64_t         fields,         /* bitmask of fields */
809         const short     *offsets,       /* table of field offsets */
810         int             nbits,          /* number of bits to inspect */
811         int             *first,         /* output: first byte offset */
812         int             *last)          /* output: last byte offset */
813 {
814         int             i;              /* current bit number */
815         int64_t         imask;          /* mask for current bit number */
816
817         ASSERT(fields != 0);
818         /*
819          * Find the lowest bit, so the first byte offset.
820          */
821         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
822                 if (imask & fields) {
823                         *first = offsets[i];
824                         break;
825                 }
826         }
827         /*
828          * Find the highest bit, so the last byte offset.
829          */
830         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
831                 if (imask & fields) {
832                         *last = offsets[i + 1] - 1;
833                         break;
834                 }
835         }
836 }
837
838 /*
839  * Get a buffer for the block, return it read in.
840  * Long-form addressing.
841  */
842 int
843 xfs_btree_read_bufl(
844         struct xfs_mount        *mp,            /* file system mount point */
845         struct xfs_trans        *tp,            /* transaction pointer */
846         xfs_fsblock_t           fsbno,          /* file system block number */
847         uint                    lock,           /* lock flags for read_buf */
848         struct xfs_buf          **bpp,          /* buffer for fsbno */
849         int                     refval,         /* ref count value for buffer */
850         const struct xfs_buf_ops *ops)
851 {
852         struct xfs_buf          *bp;            /* return value */
853         xfs_daddr_t             d;              /* real disk block address */
854         int                     error;
855
856         if (!xfs_verify_fsbno(mp, fsbno))
857                 return -EFSCORRUPTED;
858         d = XFS_FSB_TO_DADDR(mp, fsbno);
859         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
860                                    mp->m_bsize, lock, &bp, ops);
861         if (error)
862                 return error;
863         if (bp)
864                 xfs_buf_set_ref(bp, refval);
865         *bpp = bp;
866         return 0;
867 }
868
869 /*
870  * Read-ahead the block, don't wait for it, don't return a buffer.
871  * Long-form addressing.
872  */
873 /* ARGSUSED */
874 void
875 xfs_btree_reada_bufl(
876         struct xfs_mount        *mp,            /* file system mount point */
877         xfs_fsblock_t           fsbno,          /* file system block number */
878         xfs_extlen_t            count,          /* count of filesystem blocks */
879         const struct xfs_buf_ops *ops)
880 {
881         xfs_daddr_t             d;
882
883         ASSERT(fsbno != NULLFSBLOCK);
884         d = XFS_FSB_TO_DADDR(mp, fsbno);
885         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
886 }
887
888 /*
889  * Read-ahead the block, don't wait for it, don't return a buffer.
890  * Short-form addressing.
891  */
892 /* ARGSUSED */
893 void
894 xfs_btree_reada_bufs(
895         struct xfs_mount        *mp,            /* file system mount point */
896         xfs_agnumber_t          agno,           /* allocation group number */
897         xfs_agblock_t           agbno,          /* allocation group block number */
898         xfs_extlen_t            count,          /* count of filesystem blocks */
899         const struct xfs_buf_ops *ops)
900 {
901         xfs_daddr_t             d;
902
903         ASSERT(agno != NULLAGNUMBER);
904         ASSERT(agbno != NULLAGBLOCK);
905         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
906         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
907 }
908
909 STATIC int
910 xfs_btree_readahead_lblock(
911         struct xfs_btree_cur    *cur,
912         int                     lr,
913         struct xfs_btree_block  *block)
914 {
915         int                     rval = 0;
916         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
917         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
918
919         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
920                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
921                                      cur->bc_ops->buf_ops);
922                 rval++;
923         }
924
925         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
926                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
927                                      cur->bc_ops->buf_ops);
928                 rval++;
929         }
930
931         return rval;
932 }
933
934 STATIC int
935 xfs_btree_readahead_sblock(
936         struct xfs_btree_cur    *cur,
937         int                     lr,
938         struct xfs_btree_block *block)
939 {
940         int                     rval = 0;
941         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
942         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
943
944
945         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
946                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
947                                      left, 1, cur->bc_ops->buf_ops);
948                 rval++;
949         }
950
951         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
952                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
953                                      right, 1, cur->bc_ops->buf_ops);
954                 rval++;
955         }
956
957         return rval;
958 }
959
960 /*
961  * Read-ahead btree blocks, at the given level.
962  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
963  */
964 STATIC int
965 xfs_btree_readahead(
966         struct xfs_btree_cur    *cur,           /* btree cursor */
967         int                     lev,            /* level in btree */
968         int                     lr)             /* left/right bits */
969 {
970         struct xfs_btree_block  *block;
971
972         /*
973          * No readahead needed if we are at the root level and the
974          * btree root is stored in the inode.
975          */
976         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
977             (lev == cur->bc_nlevels - 1))
978                 return 0;
979
980         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
981                 return 0;
982
983         cur->bc_ra[lev] |= lr;
984         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
985
986         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
987                 return xfs_btree_readahead_lblock(cur, lr, block);
988         return xfs_btree_readahead_sblock(cur, lr, block);
989 }
990
991 STATIC xfs_daddr_t
992 xfs_btree_ptr_to_daddr(
993         struct xfs_btree_cur    *cur,
994         union xfs_btree_ptr     *ptr)
995 {
996         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
997                 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
998
999                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1000         } else {
1001                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1002                 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
1003
1004                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1005                                         be32_to_cpu(ptr->s));
1006         }
1007 }
1008
1009 /*
1010  * Readahead @count btree blocks at the given @ptr location.
1011  *
1012  * We don't need to care about long or short form btrees here as we have a
1013  * method of converting the ptr directly to a daddr available to us.
1014  */
1015 STATIC void
1016 xfs_btree_readahead_ptr(
1017         struct xfs_btree_cur    *cur,
1018         union xfs_btree_ptr     *ptr,
1019         xfs_extlen_t            count)
1020 {
1021         xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
1022                           xfs_btree_ptr_to_daddr(cur, ptr),
1023                           cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
1024 }
1025
1026 /*
1027  * Set the buffer for level "lev" in the cursor to bp, releasing
1028  * any previous buffer.
1029  */
1030 STATIC void
1031 xfs_btree_setbuf(
1032         xfs_btree_cur_t         *cur,   /* btree cursor */
1033         int                     lev,    /* level in btree */
1034         xfs_buf_t               *bp)    /* new buffer to set */
1035 {
1036         struct xfs_btree_block  *b;     /* btree block */
1037
1038         if (cur->bc_bufs[lev])
1039                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
1040         cur->bc_bufs[lev] = bp;
1041         cur->bc_ra[lev] = 0;
1042
1043         b = XFS_BUF_TO_BLOCK(bp);
1044         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1045                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1046                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1047                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1048                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1049         } else {
1050                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1051                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1052                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1053                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1054         }
1055 }
1056
1057 bool
1058 xfs_btree_ptr_is_null(
1059         struct xfs_btree_cur    *cur,
1060         union xfs_btree_ptr     *ptr)
1061 {
1062         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1063                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
1064         else
1065                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
1066 }
1067
1068 STATIC void
1069 xfs_btree_set_ptr_null(
1070         struct xfs_btree_cur    *cur,
1071         union xfs_btree_ptr     *ptr)
1072 {
1073         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1074                 ptr->l = cpu_to_be64(NULLFSBLOCK);
1075         else
1076                 ptr->s = cpu_to_be32(NULLAGBLOCK);
1077 }
1078
1079 /*
1080  * Get/set/init sibling pointers
1081  */
1082 void
1083 xfs_btree_get_sibling(
1084         struct xfs_btree_cur    *cur,
1085         struct xfs_btree_block  *block,
1086         union xfs_btree_ptr     *ptr,
1087         int                     lr)
1088 {
1089         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1090
1091         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1092                 if (lr == XFS_BB_RIGHTSIB)
1093                         ptr->l = block->bb_u.l.bb_rightsib;
1094                 else
1095                         ptr->l = block->bb_u.l.bb_leftsib;
1096         } else {
1097                 if (lr == XFS_BB_RIGHTSIB)
1098                         ptr->s = block->bb_u.s.bb_rightsib;
1099                 else
1100                         ptr->s = block->bb_u.s.bb_leftsib;
1101         }
1102 }
1103
1104 STATIC void
1105 xfs_btree_set_sibling(
1106         struct xfs_btree_cur    *cur,
1107         struct xfs_btree_block  *block,
1108         union xfs_btree_ptr     *ptr,
1109         int                     lr)
1110 {
1111         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1112
1113         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1114                 if (lr == XFS_BB_RIGHTSIB)
1115                         block->bb_u.l.bb_rightsib = ptr->l;
1116                 else
1117                         block->bb_u.l.bb_leftsib = ptr->l;
1118         } else {
1119                 if (lr == XFS_BB_RIGHTSIB)
1120                         block->bb_u.s.bb_rightsib = ptr->s;
1121                 else
1122                         block->bb_u.s.bb_leftsib = ptr->s;
1123         }
1124 }
1125
1126 void
1127 xfs_btree_init_block_int(
1128         struct xfs_mount        *mp,
1129         struct xfs_btree_block  *buf,
1130         xfs_daddr_t             blkno,
1131         xfs_btnum_t             btnum,
1132         __u16                   level,
1133         __u16                   numrecs,
1134         __u64                   owner,
1135         unsigned int            flags)
1136 {
1137         int                     crc = xfs_sb_version_hascrc(&mp->m_sb);
1138         __u32                   magic = xfs_btree_magic(crc, btnum);
1139
1140         buf->bb_magic = cpu_to_be32(magic);
1141         buf->bb_level = cpu_to_be16(level);
1142         buf->bb_numrecs = cpu_to_be16(numrecs);
1143
1144         if (flags & XFS_BTREE_LONG_PTRS) {
1145                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1146                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1147                 if (crc) {
1148                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1149                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1150                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1151                         buf->bb_u.l.bb_pad = 0;
1152                         buf->bb_u.l.bb_lsn = 0;
1153                 }
1154         } else {
1155                 /* owner is a 32 bit value on short blocks */
1156                 __u32 __owner = (__u32)owner;
1157
1158                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1159                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1160                 if (crc) {
1161                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1162                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1163                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1164                         buf->bb_u.s.bb_lsn = 0;
1165                 }
1166         }
1167 }
1168
1169 void
1170 xfs_btree_init_block(
1171         struct xfs_mount *mp,
1172         struct xfs_buf  *bp,
1173         xfs_btnum_t     btnum,
1174         __u16           level,
1175         __u16           numrecs,
1176         __u64           owner,
1177         unsigned int    flags)
1178 {
1179         xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1180                                  btnum, level, numrecs, owner, flags);
1181 }
1182
1183 STATIC void
1184 xfs_btree_init_block_cur(
1185         struct xfs_btree_cur    *cur,
1186         struct xfs_buf          *bp,
1187         int                     level,
1188         int                     numrecs)
1189 {
1190         __u64                   owner;
1191
1192         /*
1193          * we can pull the owner from the cursor right now as the different
1194          * owners align directly with the pointer size of the btree. This may
1195          * change in future, but is safe for current users of the generic btree
1196          * code.
1197          */
1198         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1199                 owner = cur->bc_private.b.ip->i_ino;
1200         else
1201                 owner = cur->bc_private.a.agno;
1202
1203         xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1204                                  cur->bc_btnum, level, numrecs,
1205                                  owner, cur->bc_flags);
1206 }
1207
1208 /*
1209  * Return true if ptr is the last record in the btree and
1210  * we need to track updates to this record.  The decision
1211  * will be further refined in the update_lastrec method.
1212  */
1213 STATIC int
1214 xfs_btree_is_lastrec(
1215         struct xfs_btree_cur    *cur,
1216         struct xfs_btree_block  *block,
1217         int                     level)
1218 {
1219         union xfs_btree_ptr     ptr;
1220
1221         if (level > 0)
1222                 return 0;
1223         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1224                 return 0;
1225
1226         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1227         if (!xfs_btree_ptr_is_null(cur, &ptr))
1228                 return 0;
1229         return 1;
1230 }
1231
1232 STATIC void
1233 xfs_btree_buf_to_ptr(
1234         struct xfs_btree_cur    *cur,
1235         struct xfs_buf          *bp,
1236         union xfs_btree_ptr     *ptr)
1237 {
1238         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1239                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1240                                         XFS_BUF_ADDR(bp)));
1241         else {
1242                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1243                                         XFS_BUF_ADDR(bp)));
1244         }
1245 }
1246
1247 STATIC void
1248 xfs_btree_set_refs(
1249         struct xfs_btree_cur    *cur,
1250         struct xfs_buf          *bp)
1251 {
1252         switch (cur->bc_btnum) {
1253         case XFS_BTNUM_BNO:
1254         case XFS_BTNUM_CNT:
1255                 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1256                 break;
1257         case XFS_BTNUM_INO:
1258         case XFS_BTNUM_FINO:
1259                 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1260                 break;
1261         case XFS_BTNUM_BMAP:
1262                 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1263                 break;
1264         case XFS_BTNUM_RMAP:
1265                 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1266                 break;
1267         case XFS_BTNUM_REFC:
1268                 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
1269                 break;
1270         default:
1271                 ASSERT(0);
1272         }
1273 }
1274
1275 STATIC int
1276 xfs_btree_get_buf_block(
1277         struct xfs_btree_cur    *cur,
1278         union xfs_btree_ptr     *ptr,
1279         int                     flags,
1280         struct xfs_btree_block  **block,
1281         struct xfs_buf          **bpp)
1282 {
1283         struct xfs_mount        *mp = cur->bc_mp;
1284         xfs_daddr_t             d;
1285
1286         /* need to sort out how callers deal with failures first */
1287         ASSERT(!(flags & XBF_TRYLOCK));
1288
1289         d = xfs_btree_ptr_to_daddr(cur, ptr);
1290         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1291                                  mp->m_bsize, flags);
1292
1293         if (!*bpp)
1294                 return -ENOMEM;
1295
1296         (*bpp)->b_ops = cur->bc_ops->buf_ops;
1297         *block = XFS_BUF_TO_BLOCK(*bpp);
1298         return 0;
1299 }
1300
1301 /*
1302  * Read in the buffer at the given ptr and return the buffer and
1303  * the block pointer within the buffer.
1304  */
1305 STATIC int
1306 xfs_btree_read_buf_block(
1307         struct xfs_btree_cur    *cur,
1308         union xfs_btree_ptr     *ptr,
1309         int                     flags,
1310         struct xfs_btree_block  **block,
1311         struct xfs_buf          **bpp)
1312 {
1313         struct xfs_mount        *mp = cur->bc_mp;
1314         xfs_daddr_t             d;
1315         int                     error;
1316
1317         /* need to sort out how callers deal with failures first */
1318         ASSERT(!(flags & XBF_TRYLOCK));
1319
1320         d = xfs_btree_ptr_to_daddr(cur, ptr);
1321         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1322                                    mp->m_bsize, flags, bpp,
1323                                    cur->bc_ops->buf_ops);
1324         if (error)
1325                 return error;
1326
1327         xfs_btree_set_refs(cur, *bpp);
1328         *block = XFS_BUF_TO_BLOCK(*bpp);
1329         return 0;
1330 }
1331
1332 /*
1333  * Copy keys from one btree block to another.
1334  */
1335 STATIC void
1336 xfs_btree_copy_keys(
1337         struct xfs_btree_cur    *cur,
1338         union xfs_btree_key     *dst_key,
1339         union xfs_btree_key     *src_key,
1340         int                     numkeys)
1341 {
1342         ASSERT(numkeys >= 0);
1343         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1344 }
1345
1346 /*
1347  * Copy records from one btree block to another.
1348  */
1349 STATIC void
1350 xfs_btree_copy_recs(
1351         struct xfs_btree_cur    *cur,
1352         union xfs_btree_rec     *dst_rec,
1353         union xfs_btree_rec     *src_rec,
1354         int                     numrecs)
1355 {
1356         ASSERT(numrecs >= 0);
1357         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1358 }
1359
1360 /*
1361  * Copy block pointers from one btree block to another.
1362  */
1363 STATIC void
1364 xfs_btree_copy_ptrs(
1365         struct xfs_btree_cur    *cur,
1366         union xfs_btree_ptr     *dst_ptr,
1367         union xfs_btree_ptr     *src_ptr,
1368         int                     numptrs)
1369 {
1370         ASSERT(numptrs >= 0);
1371         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1372 }
1373
1374 /*
1375  * Shift keys one index left/right inside a single btree block.
1376  */
1377 STATIC void
1378 xfs_btree_shift_keys(
1379         struct xfs_btree_cur    *cur,
1380         union xfs_btree_key     *key,
1381         int                     dir,
1382         int                     numkeys)
1383 {
1384         char                    *dst_key;
1385
1386         ASSERT(numkeys >= 0);
1387         ASSERT(dir == 1 || dir == -1);
1388
1389         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1390         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1391 }
1392
1393 /*
1394  * Shift records one index left/right inside a single btree block.
1395  */
1396 STATIC void
1397 xfs_btree_shift_recs(
1398         struct xfs_btree_cur    *cur,
1399         union xfs_btree_rec     *rec,
1400         int                     dir,
1401         int                     numrecs)
1402 {
1403         char                    *dst_rec;
1404
1405         ASSERT(numrecs >= 0);
1406         ASSERT(dir == 1 || dir == -1);
1407
1408         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1409         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1410 }
1411
1412 /*
1413  * Shift block pointers one index left/right inside a single btree block.
1414  */
1415 STATIC void
1416 xfs_btree_shift_ptrs(
1417         struct xfs_btree_cur    *cur,
1418         union xfs_btree_ptr     *ptr,
1419         int                     dir,
1420         int                     numptrs)
1421 {
1422         char                    *dst_ptr;
1423
1424         ASSERT(numptrs >= 0);
1425         ASSERT(dir == 1 || dir == -1);
1426
1427         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1428         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1429 }
1430
1431 /*
1432  * Log key values from the btree block.
1433  */
1434 STATIC void
1435 xfs_btree_log_keys(
1436         struct xfs_btree_cur    *cur,
1437         struct xfs_buf          *bp,
1438         int                     first,
1439         int                     last)
1440 {
1441         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1442         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1443
1444         if (bp) {
1445                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1446                 xfs_trans_log_buf(cur->bc_tp, bp,
1447                                   xfs_btree_key_offset(cur, first),
1448                                   xfs_btree_key_offset(cur, last + 1) - 1);
1449         } else {
1450                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1451                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1452         }
1453
1454         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1455 }
1456
1457 /*
1458  * Log record values from the btree block.
1459  */
1460 void
1461 xfs_btree_log_recs(
1462         struct xfs_btree_cur    *cur,
1463         struct xfs_buf          *bp,
1464         int                     first,
1465         int                     last)
1466 {
1467         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1468         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1469
1470         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1471         xfs_trans_log_buf(cur->bc_tp, bp,
1472                           xfs_btree_rec_offset(cur, first),
1473                           xfs_btree_rec_offset(cur, last + 1) - 1);
1474
1475         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1476 }
1477
1478 /*
1479  * Log block pointer fields from a btree block (nonleaf).
1480  */
1481 STATIC void
1482 xfs_btree_log_ptrs(
1483         struct xfs_btree_cur    *cur,   /* btree cursor */
1484         struct xfs_buf          *bp,    /* buffer containing btree block */
1485         int                     first,  /* index of first pointer to log */
1486         int                     last)   /* index of last pointer to log */
1487 {
1488         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1489         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1490
1491         if (bp) {
1492                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1493                 int                     level = xfs_btree_get_level(block);
1494
1495                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1496                 xfs_trans_log_buf(cur->bc_tp, bp,
1497                                 xfs_btree_ptr_offset(cur, first, level),
1498                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1499         } else {
1500                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1501                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1502         }
1503
1504         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1505 }
1506
1507 /*
1508  * Log fields from a btree block header.
1509  */
1510 void
1511 xfs_btree_log_block(
1512         struct xfs_btree_cur    *cur,   /* btree cursor */
1513         struct xfs_buf          *bp,    /* buffer containing btree block */
1514         int                     fields) /* mask of fields: XFS_BB_... */
1515 {
1516         int                     first;  /* first byte offset logged */
1517         int                     last;   /* last byte offset logged */
1518         static const short      soffsets[] = {  /* table of offsets (short) */
1519                 offsetof(struct xfs_btree_block, bb_magic),
1520                 offsetof(struct xfs_btree_block, bb_level),
1521                 offsetof(struct xfs_btree_block, bb_numrecs),
1522                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1523                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1524                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1525                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1526                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1527                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1528                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1529                 XFS_BTREE_SBLOCK_CRC_LEN
1530         };
1531         static const short      loffsets[] = {  /* table of offsets (long) */
1532                 offsetof(struct xfs_btree_block, bb_magic),
1533                 offsetof(struct xfs_btree_block, bb_level),
1534                 offsetof(struct xfs_btree_block, bb_numrecs),
1535                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1536                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1537                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1538                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1539                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1540                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1541                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1542                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1543                 XFS_BTREE_LBLOCK_CRC_LEN
1544         };
1545
1546         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1547         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1548
1549         if (bp) {
1550                 int nbits;
1551
1552                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1553                         /*
1554                          * We don't log the CRC when updating a btree
1555                          * block but instead recreate it during log
1556                          * recovery.  As the log buffers have checksums
1557                          * of their own this is safe and avoids logging a crc
1558                          * update in a lot of places.
1559                          */
1560                         if (fields == XFS_BB_ALL_BITS)
1561                                 fields = XFS_BB_ALL_BITS_CRC;
1562                         nbits = XFS_BB_NUM_BITS_CRC;
1563                 } else {
1564                         nbits = XFS_BB_NUM_BITS;
1565                 }
1566                 xfs_btree_offsets(fields,
1567                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1568                                         loffsets : soffsets,
1569                                   nbits, &first, &last);
1570                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1571                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1572         } else {
1573                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1574                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1575         }
1576
1577         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1578 }
1579
1580 /*
1581  * Increment cursor by one record at the level.
1582  * For nonzero levels the leaf-ward information is untouched.
1583  */
1584 int                                             /* error */
1585 xfs_btree_increment(
1586         struct xfs_btree_cur    *cur,
1587         int                     level,
1588         int                     *stat)          /* success/failure */
1589 {
1590         struct xfs_btree_block  *block;
1591         union xfs_btree_ptr     ptr;
1592         struct xfs_buf          *bp;
1593         int                     error;          /* error return value */
1594         int                     lev;
1595
1596         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1597         XFS_BTREE_TRACE_ARGI(cur, level);
1598
1599         ASSERT(level < cur->bc_nlevels);
1600
1601         /* Read-ahead to the right at this level. */
1602         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1603
1604         /* Get a pointer to the btree block. */
1605         block = xfs_btree_get_block(cur, level, &bp);
1606
1607 #ifdef DEBUG
1608         error = xfs_btree_check_block(cur, block, level, bp);
1609         if (error)
1610                 goto error0;
1611 #endif
1612
1613         /* We're done if we remain in the block after the increment. */
1614         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1615                 goto out1;
1616
1617         /* Fail if we just went off the right edge of the tree. */
1618         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1619         if (xfs_btree_ptr_is_null(cur, &ptr))
1620                 goto out0;
1621
1622         XFS_BTREE_STATS_INC(cur, increment);
1623
1624         /*
1625          * March up the tree incrementing pointers.
1626          * Stop when we don't go off the right edge of a block.
1627          */
1628         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1629                 block = xfs_btree_get_block(cur, lev, &bp);
1630
1631 #ifdef DEBUG
1632                 error = xfs_btree_check_block(cur, block, lev, bp);
1633                 if (error)
1634                         goto error0;
1635 #endif
1636
1637                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1638                         break;
1639
1640                 /* Read-ahead the right block for the next loop. */
1641                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1642         }
1643
1644         /*
1645          * If we went off the root then we are either seriously
1646          * confused or have the tree root in an inode.
1647          */
1648         if (lev == cur->bc_nlevels) {
1649                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1650                         goto out0;
1651                 ASSERT(0);
1652                 error = -EFSCORRUPTED;
1653                 goto error0;
1654         }
1655         ASSERT(lev < cur->bc_nlevels);
1656
1657         /*
1658          * Now walk back down the tree, fixing up the cursor's buffer
1659          * pointers and key numbers.
1660          */
1661         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1662                 union xfs_btree_ptr     *ptrp;
1663
1664                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1665                 --lev;
1666                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1667                 if (error)
1668                         goto error0;
1669
1670                 xfs_btree_setbuf(cur, lev, bp);
1671                 cur->bc_ptrs[lev] = 1;
1672         }
1673 out1:
1674         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1675         *stat = 1;
1676         return 0;
1677
1678 out0:
1679         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1680         *stat = 0;
1681         return 0;
1682
1683 error0:
1684         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1685         return error;
1686 }
1687
1688 /*
1689  * Decrement cursor by one record at the level.
1690  * For nonzero levels the leaf-ward information is untouched.
1691  */
1692 int                                             /* error */
1693 xfs_btree_decrement(
1694         struct xfs_btree_cur    *cur,
1695         int                     level,
1696         int                     *stat)          /* success/failure */
1697 {
1698         struct xfs_btree_block  *block;
1699         xfs_buf_t               *bp;
1700         int                     error;          /* error return value */
1701         int                     lev;
1702         union xfs_btree_ptr     ptr;
1703
1704         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1705         XFS_BTREE_TRACE_ARGI(cur, level);
1706
1707         ASSERT(level < cur->bc_nlevels);
1708
1709         /* Read-ahead to the left at this level. */
1710         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1711
1712         /* We're done if we remain in the block after the decrement. */
1713         if (--cur->bc_ptrs[level] > 0)
1714                 goto out1;
1715
1716         /* Get a pointer to the btree block. */
1717         block = xfs_btree_get_block(cur, level, &bp);
1718
1719 #ifdef DEBUG
1720         error = xfs_btree_check_block(cur, block, level, bp);
1721         if (error)
1722                 goto error0;
1723 #endif
1724
1725         /* Fail if we just went off the left edge of the tree. */
1726         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1727         if (xfs_btree_ptr_is_null(cur, &ptr))
1728                 goto out0;
1729
1730         XFS_BTREE_STATS_INC(cur, decrement);
1731
1732         /*
1733          * March up the tree decrementing pointers.
1734          * Stop when we don't go off the left edge of a block.
1735          */
1736         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1737                 if (--cur->bc_ptrs[lev] > 0)
1738                         break;
1739                 /* Read-ahead the left block for the next loop. */
1740                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1741         }
1742
1743         /*
1744          * If we went off the root then we are seriously confused.
1745          * or the root of the tree is in an inode.
1746          */
1747         if (lev == cur->bc_nlevels) {
1748                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1749                         goto out0;
1750                 ASSERT(0);
1751                 error = -EFSCORRUPTED;
1752                 goto error0;
1753         }
1754         ASSERT(lev < cur->bc_nlevels);
1755
1756         /*
1757          * Now walk back down the tree, fixing up the cursor's buffer
1758          * pointers and key numbers.
1759          */
1760         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1761                 union xfs_btree_ptr     *ptrp;
1762
1763                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1764                 --lev;
1765                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1766                 if (error)
1767                         goto error0;
1768                 xfs_btree_setbuf(cur, lev, bp);
1769                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1770         }
1771 out1:
1772         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1773         *stat = 1;
1774         return 0;
1775
1776 out0:
1777         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1778         *stat = 0;
1779         return 0;
1780
1781 error0:
1782         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1783         return error;
1784 }
1785
1786 int
1787 xfs_btree_lookup_get_block(
1788         struct xfs_btree_cur    *cur,   /* btree cursor */
1789         int                     level,  /* level in the btree */
1790         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1791         struct xfs_btree_block  **blkp) /* return btree block */
1792 {
1793         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1794         int                     error = 0;
1795
1796         /* special case the root block if in an inode */
1797         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1798             (level == cur->bc_nlevels - 1)) {
1799                 *blkp = xfs_btree_get_iroot(cur);
1800                 return 0;
1801         }
1802
1803         /*
1804          * If the old buffer at this level for the disk address we are
1805          * looking for re-use it.
1806          *
1807          * Otherwise throw it away and get a new one.
1808          */
1809         bp = cur->bc_bufs[level];
1810         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1811                 *blkp = XFS_BUF_TO_BLOCK(bp);
1812                 return 0;
1813         }
1814
1815         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1816         if (error)
1817                 return error;
1818
1819         /* Check the inode owner since the verifiers don't. */
1820         if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
1821             !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
1822             (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
1823             be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
1824                         cur->bc_private.b.ip->i_ino)
1825                 goto out_bad;
1826
1827         /* Did we get the level we were looking for? */
1828         if (be16_to_cpu((*blkp)->bb_level) != level)
1829                 goto out_bad;
1830
1831         /* Check that internal nodes have at least one record. */
1832         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1833                 goto out_bad;
1834
1835         xfs_btree_setbuf(cur, level, bp);
1836         return 0;
1837
1838 out_bad:
1839         *blkp = NULL;
1840         xfs_trans_brelse(cur->bc_tp, bp);
1841         return -EFSCORRUPTED;
1842 }
1843
1844 /*
1845  * Get current search key.  For level 0 we don't actually have a key
1846  * structure so we make one up from the record.  For all other levels
1847  * we just return the right key.
1848  */
1849 STATIC union xfs_btree_key *
1850 xfs_lookup_get_search_key(
1851         struct xfs_btree_cur    *cur,
1852         int                     level,
1853         int                     keyno,
1854         struct xfs_btree_block  *block,
1855         union xfs_btree_key     *kp)
1856 {
1857         if (level == 0) {
1858                 cur->bc_ops->init_key_from_rec(kp,
1859                                 xfs_btree_rec_addr(cur, keyno, block));
1860                 return kp;
1861         }
1862
1863         return xfs_btree_key_addr(cur, keyno, block);
1864 }
1865
1866 /*
1867  * Lookup the record.  The cursor is made to point to it, based on dir.
1868  * stat is set to 0 if can't find any such record, 1 for success.
1869  */
1870 int                                     /* error */
1871 xfs_btree_lookup(
1872         struct xfs_btree_cur    *cur,   /* btree cursor */
1873         xfs_lookup_t            dir,    /* <=, ==, or >= */
1874         int                     *stat)  /* success/failure */
1875 {
1876         struct xfs_btree_block  *block; /* current btree block */
1877         int64_t                 diff;   /* difference for the current key */
1878         int                     error;  /* error return value */
1879         int                     keyno;  /* current key number */
1880         int                     level;  /* level in the btree */
1881         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1882         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1883
1884         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1885         XFS_BTREE_TRACE_ARGI(cur, dir);
1886
1887         XFS_BTREE_STATS_INC(cur, lookup);
1888
1889         /* No such thing as a zero-level tree. */
1890         if (cur->bc_nlevels == 0)
1891                 return -EFSCORRUPTED;
1892
1893         block = NULL;
1894         keyno = 0;
1895
1896         /* initialise start pointer from cursor */
1897         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1898         pp = &ptr;
1899
1900         /*
1901          * Iterate over each level in the btree, starting at the root.
1902          * For each level above the leaves, find the key we need, based
1903          * on the lookup record, then follow the corresponding block
1904          * pointer down to the next level.
1905          */
1906         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1907                 /* Get the block we need to do the lookup on. */
1908                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1909                 if (error)
1910                         goto error0;
1911
1912                 if (diff == 0) {
1913                         /*
1914                          * If we already had a key match at a higher level, we
1915                          * know we need to use the first entry in this block.
1916                          */
1917                         keyno = 1;
1918                 } else {
1919                         /* Otherwise search this block. Do a binary search. */
1920
1921                         int     high;   /* high entry number */
1922                         int     low;    /* low entry number */
1923
1924                         /* Set low and high entry numbers, 1-based. */
1925                         low = 1;
1926                         high = xfs_btree_get_numrecs(block);
1927                         if (!high) {
1928                                 /* Block is empty, must be an empty leaf. */
1929                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1930
1931                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1932                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1933                                 *stat = 0;
1934                                 return 0;
1935                         }
1936
1937                         /* Binary search the block. */
1938                         while (low <= high) {
1939                                 union xfs_btree_key     key;
1940                                 union xfs_btree_key     *kp;
1941
1942                                 XFS_BTREE_STATS_INC(cur, compare);
1943
1944                                 /* keyno is average of low and high. */
1945                                 keyno = (low + high) >> 1;
1946
1947                                 /* Get current search key */
1948                                 kp = xfs_lookup_get_search_key(cur, level,
1949                                                 keyno, block, &key);
1950
1951                                 /*
1952                                  * Compute difference to get next direction:
1953                                  *  - less than, move right
1954                                  *  - greater than, move left
1955                                  *  - equal, we're done
1956                                  */
1957                                 diff = cur->bc_ops->key_diff(cur, kp);
1958                                 if (diff < 0)
1959                                         low = keyno + 1;
1960                                 else if (diff > 0)
1961                                         high = keyno - 1;
1962                                 else
1963                                         break;
1964                         }
1965                 }
1966
1967                 /*
1968                  * If there are more levels, set up for the next level
1969                  * by getting the block number and filling in the cursor.
1970                  */
1971                 if (level > 0) {
1972                         /*
1973                          * If we moved left, need the previous key number,
1974                          * unless there isn't one.
1975                          */
1976                         if (diff > 0 && --keyno < 1)
1977                                 keyno = 1;
1978                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1979
1980 #ifdef DEBUG
1981                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1982                         if (error)
1983                                 goto error0;
1984 #endif
1985                         cur->bc_ptrs[level] = keyno;
1986                 }
1987         }
1988
1989         /* Done with the search. See if we need to adjust the results. */
1990         if (dir != XFS_LOOKUP_LE && diff < 0) {
1991                 keyno++;
1992                 /*
1993                  * If ge search and we went off the end of the block, but it's
1994                  * not the last block, we're in the wrong block.
1995                  */
1996                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1997                 if (dir == XFS_LOOKUP_GE &&
1998                     keyno > xfs_btree_get_numrecs(block) &&
1999                     !xfs_btree_ptr_is_null(cur, &ptr)) {
2000                         int     i;
2001
2002                         cur->bc_ptrs[0] = keyno;
2003                         error = xfs_btree_increment(cur, 0, &i);
2004                         if (error)
2005                                 goto error0;
2006                         XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
2007                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2008                         *stat = 1;
2009                         return 0;
2010                 }
2011         } else if (dir == XFS_LOOKUP_LE && diff > 0)
2012                 keyno--;
2013         cur->bc_ptrs[0] = keyno;
2014
2015         /* Return if we succeeded or not. */
2016         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
2017                 *stat = 0;
2018         else if (dir != XFS_LOOKUP_EQ || diff == 0)
2019                 *stat = 1;
2020         else
2021                 *stat = 0;
2022         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2023         return 0;
2024
2025 error0:
2026         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2027         return error;
2028 }
2029
2030 /* Find the high key storage area from a regular key. */
2031 union xfs_btree_key *
2032 xfs_btree_high_key_from_key(
2033         struct xfs_btree_cur    *cur,
2034         union xfs_btree_key     *key)
2035 {
2036         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2037         return (union xfs_btree_key *)((char *)key +
2038                         (cur->bc_ops->key_len / 2));
2039 }
2040
2041 /* Determine the low (and high if overlapped) keys of a leaf block */
2042 STATIC void
2043 xfs_btree_get_leaf_keys(
2044         struct xfs_btree_cur    *cur,
2045         struct xfs_btree_block  *block,
2046         union xfs_btree_key     *key)
2047 {
2048         union xfs_btree_key     max_hkey;
2049         union xfs_btree_key     hkey;
2050         union xfs_btree_rec     *rec;
2051         union xfs_btree_key     *high;
2052         int                     n;
2053
2054         rec = xfs_btree_rec_addr(cur, 1, block);
2055         cur->bc_ops->init_key_from_rec(key, rec);
2056
2057         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2058
2059                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2060                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2061                         rec = xfs_btree_rec_addr(cur, n, block);
2062                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2063                         if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
2064                                         > 0)
2065                                 max_hkey = hkey;
2066                 }
2067
2068                 high = xfs_btree_high_key_from_key(cur, key);
2069                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2070         }
2071 }
2072
2073 /* Determine the low (and high if overlapped) keys of a node block */
2074 STATIC void
2075 xfs_btree_get_node_keys(
2076         struct xfs_btree_cur    *cur,
2077         struct xfs_btree_block  *block,
2078         union xfs_btree_key     *key)
2079 {
2080         union xfs_btree_key     *hkey;
2081         union xfs_btree_key     *max_hkey;
2082         union xfs_btree_key     *high;
2083         int                     n;
2084
2085         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2086                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2087                                 cur->bc_ops->key_len / 2);
2088
2089                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2090                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2091                         hkey = xfs_btree_high_key_addr(cur, n, block);
2092                         if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2093                                 max_hkey = hkey;
2094                 }
2095
2096                 high = xfs_btree_high_key_from_key(cur, key);
2097                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2098         } else {
2099                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2100                                 cur->bc_ops->key_len);
2101         }
2102 }
2103
2104 /* Derive the keys for any btree block. */
2105 void
2106 xfs_btree_get_keys(
2107         struct xfs_btree_cur    *cur,
2108         struct xfs_btree_block  *block,
2109         union xfs_btree_key     *key)
2110 {
2111         if (be16_to_cpu(block->bb_level) == 0)
2112                 xfs_btree_get_leaf_keys(cur, block, key);
2113         else
2114                 xfs_btree_get_node_keys(cur, block, key);
2115 }
2116
2117 /*
2118  * Decide if we need to update the parent keys of a btree block.  For
2119  * a standard btree this is only necessary if we're updating the first
2120  * record/key.  For an overlapping btree, we must always update the
2121  * keys because the highest key can be in any of the records or keys
2122  * in the block.
2123  */
2124 static inline bool
2125 xfs_btree_needs_key_update(
2126         struct xfs_btree_cur    *cur,
2127         int                     ptr)
2128 {
2129         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2130 }
2131
2132 /*
2133  * Update the low and high parent keys of the given level, progressing
2134  * towards the root.  If force_all is false, stop if the keys for a given
2135  * level do not need updating.
2136  */
2137 STATIC int
2138 __xfs_btree_updkeys(
2139         struct xfs_btree_cur    *cur,
2140         int                     level,
2141         struct xfs_btree_block  *block,
2142         struct xfs_buf          *bp0,
2143         bool                    force_all)
2144 {
2145         union xfs_btree_key     key;    /* keys from current level */
2146         union xfs_btree_key     *lkey;  /* keys from the next level up */
2147         union xfs_btree_key     *hkey;
2148         union xfs_btree_key     *nlkey; /* keys from the next level up */
2149         union xfs_btree_key     *nhkey;
2150         struct xfs_buf          *bp;
2151         int                     ptr;
2152
2153         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2154
2155         /* Exit if there aren't any parent levels to update. */
2156         if (level + 1 >= cur->bc_nlevels)
2157                 return 0;
2158
2159         trace_xfs_btree_updkeys(cur, level, bp0);
2160
2161         lkey = &key;
2162         hkey = xfs_btree_high_key_from_key(cur, lkey);
2163         xfs_btree_get_keys(cur, block, lkey);
2164         for (level++; level < cur->bc_nlevels; level++) {
2165 #ifdef DEBUG
2166                 int             error;
2167 #endif
2168                 block = xfs_btree_get_block(cur, level, &bp);
2169                 trace_xfs_btree_updkeys(cur, level, bp);
2170 #ifdef DEBUG
2171                 error = xfs_btree_check_block(cur, block, level, bp);
2172                 if (error) {
2173                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2174                         return error;
2175                 }
2176 #endif
2177                 ptr = cur->bc_ptrs[level];
2178                 nlkey = xfs_btree_key_addr(cur, ptr, block);
2179                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2180                 if (!force_all &&
2181                     !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2182                       cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2183                         break;
2184                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2185                 xfs_btree_log_keys(cur, bp, ptr, ptr);
2186                 if (level + 1 >= cur->bc_nlevels)
2187                         break;
2188                 xfs_btree_get_node_keys(cur, block, lkey);
2189         }
2190
2191         return 0;
2192 }
2193
2194 /* Update all the keys from some level in cursor back to the root. */
2195 STATIC int
2196 xfs_btree_updkeys_force(
2197         struct xfs_btree_cur    *cur,
2198         int                     level)
2199 {
2200         struct xfs_buf          *bp;
2201         struct xfs_btree_block  *block;
2202
2203         block = xfs_btree_get_block(cur, level, &bp);
2204         return __xfs_btree_updkeys(cur, level, block, bp, true);
2205 }
2206
2207 /*
2208  * Update the parent keys of the given level, progressing towards the root.
2209  */
2210 STATIC int
2211 xfs_btree_update_keys(
2212         struct xfs_btree_cur    *cur,
2213         int                     level)
2214 {
2215         struct xfs_btree_block  *block;
2216         struct xfs_buf          *bp;
2217         union xfs_btree_key     *kp;
2218         union xfs_btree_key     key;
2219         int                     ptr;
2220
2221         ASSERT(level >= 0);
2222
2223         block = xfs_btree_get_block(cur, level, &bp);
2224         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2225                 return __xfs_btree_updkeys(cur, level, block, bp, false);
2226
2227         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2228         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
2229
2230         /*
2231          * Go up the tree from this level toward the root.
2232          * At each level, update the key value to the value input.
2233          * Stop when we reach a level where the cursor isn't pointing
2234          * at the first entry in the block.
2235          */
2236         xfs_btree_get_keys(cur, block, &key);
2237         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2238 #ifdef DEBUG
2239                 int             error;
2240 #endif
2241                 block = xfs_btree_get_block(cur, level, &bp);
2242 #ifdef DEBUG
2243                 error = xfs_btree_check_block(cur, block, level, bp);
2244                 if (error) {
2245                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2246                         return error;
2247                 }
2248 #endif
2249                 ptr = cur->bc_ptrs[level];
2250                 kp = xfs_btree_key_addr(cur, ptr, block);
2251                 xfs_btree_copy_keys(cur, kp, &key, 1);
2252                 xfs_btree_log_keys(cur, bp, ptr, ptr);
2253         }
2254
2255         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2256         return 0;
2257 }
2258
2259 /*
2260  * Update the record referred to by cur to the value in the
2261  * given record. This either works (return 0) or gets an
2262  * EFSCORRUPTED error.
2263  */
2264 int
2265 xfs_btree_update(
2266         struct xfs_btree_cur    *cur,
2267         union xfs_btree_rec     *rec)
2268 {
2269         struct xfs_btree_block  *block;
2270         struct xfs_buf          *bp;
2271         int                     error;
2272         int                     ptr;
2273         union xfs_btree_rec     *rp;
2274
2275         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2276         XFS_BTREE_TRACE_ARGR(cur, rec);
2277
2278         /* Pick up the current block. */
2279         block = xfs_btree_get_block(cur, 0, &bp);
2280
2281 #ifdef DEBUG
2282         error = xfs_btree_check_block(cur, block, 0, bp);
2283         if (error)
2284                 goto error0;
2285 #endif
2286         /* Get the address of the rec to be updated. */
2287         ptr = cur->bc_ptrs[0];
2288         rp = xfs_btree_rec_addr(cur, ptr, block);
2289
2290         /* Fill in the new contents and log them. */
2291         xfs_btree_copy_recs(cur, rp, rec, 1);
2292         xfs_btree_log_recs(cur, bp, ptr, ptr);
2293
2294         /*
2295          * If we are tracking the last record in the tree and
2296          * we are at the far right edge of the tree, update it.
2297          */
2298         if (xfs_btree_is_lastrec(cur, block, 0)) {
2299                 cur->bc_ops->update_lastrec(cur, block, rec,
2300                                             ptr, LASTREC_UPDATE);
2301         }
2302
2303         /* Pass new key value up to our parent. */
2304         if (xfs_btree_needs_key_update(cur, ptr)) {
2305                 error = xfs_btree_update_keys(cur, 0);
2306                 if (error)
2307                         goto error0;
2308         }
2309
2310         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2311         return 0;
2312
2313 error0:
2314         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2315         return error;
2316 }
2317
2318 /*
2319  * Move 1 record left from cur/level if possible.
2320  * Update cur to reflect the new path.
2321  */
2322 STATIC int                                      /* error */
2323 xfs_btree_lshift(
2324         struct xfs_btree_cur    *cur,
2325         int                     level,
2326         int                     *stat)          /* success/failure */
2327 {
2328         struct xfs_buf          *lbp;           /* left buffer pointer */
2329         struct xfs_btree_block  *left;          /* left btree block */
2330         int                     lrecs;          /* left record count */
2331         struct xfs_buf          *rbp;           /* right buffer pointer */
2332         struct xfs_btree_block  *right;         /* right btree block */
2333         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2334         int                     rrecs;          /* right record count */
2335         union xfs_btree_ptr     lptr;           /* left btree pointer */
2336         union xfs_btree_key     *rkp = NULL;    /* right btree key */
2337         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
2338         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
2339         int                     error;          /* error return value */
2340         int                     i;
2341
2342         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2343         XFS_BTREE_TRACE_ARGI(cur, level);
2344
2345         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2346             level == cur->bc_nlevels - 1)
2347                 goto out0;
2348
2349         /* Set up variables for this block as "right". */
2350         right = xfs_btree_get_block(cur, level, &rbp);
2351
2352 #ifdef DEBUG
2353         error = xfs_btree_check_block(cur, right, level, rbp);
2354         if (error)
2355                 goto error0;
2356 #endif
2357
2358         /* If we've got no left sibling then we can't shift an entry left. */
2359         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2360         if (xfs_btree_ptr_is_null(cur, &lptr))
2361                 goto out0;
2362
2363         /*
2364          * If the cursor entry is the one that would be moved, don't
2365          * do it... it's too complicated.
2366          */
2367         if (cur->bc_ptrs[level] <= 1)
2368                 goto out0;
2369
2370         /* Set up the left neighbor as "left". */
2371         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2372         if (error)
2373                 goto error0;
2374
2375         /* If it's full, it can't take another entry. */
2376         lrecs = xfs_btree_get_numrecs(left);
2377         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2378                 goto out0;
2379
2380         rrecs = xfs_btree_get_numrecs(right);
2381
2382         /*
2383          * We add one entry to the left side and remove one for the right side.
2384          * Account for it here, the changes will be updated on disk and logged
2385          * later.
2386          */
2387         lrecs++;
2388         rrecs--;
2389
2390         XFS_BTREE_STATS_INC(cur, lshift);
2391         XFS_BTREE_STATS_ADD(cur, moves, 1);
2392
2393         /*
2394          * If non-leaf, copy a key and a ptr to the left block.
2395          * Log the changes to the left block.
2396          */
2397         if (level > 0) {
2398                 /* It's a non-leaf.  Move keys and pointers. */
2399                 union xfs_btree_key     *lkp;   /* left btree key */
2400                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2401
2402                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2403                 rkp = xfs_btree_key_addr(cur, 1, right);
2404
2405                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2406                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2407 #ifdef DEBUG
2408                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2409                 if (error)
2410                         goto error0;
2411 #endif
2412                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2413                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2414
2415                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2416                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2417
2418                 ASSERT(cur->bc_ops->keys_inorder(cur,
2419                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2420         } else {
2421                 /* It's a leaf.  Move records.  */
2422                 union xfs_btree_rec     *lrp;   /* left record pointer */
2423
2424                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2425                 rrp = xfs_btree_rec_addr(cur, 1, right);
2426
2427                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2428                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2429
2430                 ASSERT(cur->bc_ops->recs_inorder(cur,
2431                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2432         }
2433
2434         xfs_btree_set_numrecs(left, lrecs);
2435         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2436
2437         xfs_btree_set_numrecs(right, rrecs);
2438         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2439
2440         /*
2441          * Slide the contents of right down one entry.
2442          */
2443         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2444         if (level > 0) {
2445                 /* It's a nonleaf. operate on keys and ptrs */
2446 #ifdef DEBUG
2447                 int                     i;              /* loop index */
2448
2449                 for (i = 0; i < rrecs; i++) {
2450                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2451                         if (error)
2452                                 goto error0;
2453                 }
2454 #endif
2455                 xfs_btree_shift_keys(cur,
2456                                 xfs_btree_key_addr(cur, 2, right),
2457                                 -1, rrecs);
2458                 xfs_btree_shift_ptrs(cur,
2459                                 xfs_btree_ptr_addr(cur, 2, right),
2460                                 -1, rrecs);
2461
2462                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2463                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2464         } else {
2465                 /* It's a leaf. operate on records */
2466                 xfs_btree_shift_recs(cur,
2467                         xfs_btree_rec_addr(cur, 2, right),
2468                         -1, rrecs);
2469                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2470         }
2471
2472         /*
2473          * Using a temporary cursor, update the parent key values of the
2474          * block on the left.
2475          */
2476         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2477                 error = xfs_btree_dup_cursor(cur, &tcur);
2478                 if (error)
2479                         goto error0;
2480                 i = xfs_btree_firstrec(tcur, level);
2481                 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2482
2483                 error = xfs_btree_decrement(tcur, level, &i);
2484                 if (error)
2485                         goto error1;
2486
2487                 /* Update the parent high keys of the left block, if needed. */
2488                 error = xfs_btree_update_keys(tcur, level);
2489                 if (error)
2490                         goto error1;
2491
2492                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2493         }
2494
2495         /* Update the parent keys of the right block. */
2496         error = xfs_btree_update_keys(cur, level);
2497         if (error)
2498                 goto error0;
2499
2500         /* Slide the cursor value left one. */
2501         cur->bc_ptrs[level]--;
2502
2503         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2504         *stat = 1;
2505         return 0;
2506
2507 out0:
2508         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2509         *stat = 0;
2510         return 0;
2511
2512 error0:
2513         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2514         return error;
2515
2516 error1:
2517         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2518         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2519         return error;
2520 }
2521
2522 /*
2523  * Move 1 record right from cur/level if possible.
2524  * Update cur to reflect the new path.
2525  */
2526 STATIC int                                      /* error */
2527 xfs_btree_rshift(
2528         struct xfs_btree_cur    *cur,
2529         int                     level,
2530         int                     *stat)          /* success/failure */
2531 {
2532         struct xfs_buf          *lbp;           /* left buffer pointer */
2533         struct xfs_btree_block  *left;          /* left btree block */
2534         struct xfs_buf          *rbp;           /* right buffer pointer */
2535         struct xfs_btree_block  *right;         /* right btree block */
2536         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2537         union xfs_btree_ptr     rptr;           /* right block pointer */
2538         union xfs_btree_key     *rkp;           /* right btree key */
2539         int                     rrecs;          /* right record count */
2540         int                     lrecs;          /* left record count */
2541         int                     error;          /* error return value */
2542         int                     i;              /* loop counter */
2543
2544         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2545         XFS_BTREE_TRACE_ARGI(cur, level);
2546
2547         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2548             (level == cur->bc_nlevels - 1))
2549                 goto out0;
2550
2551         /* Set up variables for this block as "left". */
2552         left = xfs_btree_get_block(cur, level, &lbp);
2553
2554 #ifdef DEBUG
2555         error = xfs_btree_check_block(cur, left, level, lbp);
2556         if (error)
2557                 goto error0;
2558 #endif
2559
2560         /* If we've got no right sibling then we can't shift an entry right. */
2561         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2562         if (xfs_btree_ptr_is_null(cur, &rptr))
2563                 goto out0;
2564
2565         /*
2566          * If the cursor entry is the one that would be moved, don't
2567          * do it... it's too complicated.
2568          */
2569         lrecs = xfs_btree_get_numrecs(left);
2570         if (cur->bc_ptrs[level] >= lrecs)
2571                 goto out0;
2572
2573         /* Set up the right neighbor as "right". */
2574         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2575         if (error)
2576                 goto error0;
2577
2578         /* If it's full, it can't take another entry. */
2579         rrecs = xfs_btree_get_numrecs(right);
2580         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2581                 goto out0;
2582
2583         XFS_BTREE_STATS_INC(cur, rshift);
2584         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2585
2586         /*
2587          * Make a hole at the start of the right neighbor block, then
2588          * copy the last left block entry to the hole.
2589          */
2590         if (level > 0) {
2591                 /* It's a nonleaf. make a hole in the keys and ptrs */
2592                 union xfs_btree_key     *lkp;
2593                 union xfs_btree_ptr     *lpp;
2594                 union xfs_btree_ptr     *rpp;
2595
2596                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2597                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2598                 rkp = xfs_btree_key_addr(cur, 1, right);
2599                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2600
2601 #ifdef DEBUG
2602                 for (i = rrecs - 1; i >= 0; i--) {
2603                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2604                         if (error)
2605                                 goto error0;
2606                 }
2607 #endif
2608
2609                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2610                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2611
2612 #ifdef DEBUG
2613                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2614                 if (error)
2615                         goto error0;
2616 #endif
2617
2618                 /* Now put the new data in, and log it. */
2619                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2620                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2621
2622                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2623                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2624
2625                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2626                         xfs_btree_key_addr(cur, 2, right)));
2627         } else {
2628                 /* It's a leaf. make a hole in the records */
2629                 union xfs_btree_rec     *lrp;
2630                 union xfs_btree_rec     *rrp;
2631
2632                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2633                 rrp = xfs_btree_rec_addr(cur, 1, right);
2634
2635                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2636
2637                 /* Now put the new data in, and log it. */
2638                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2639                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2640         }
2641
2642         /*
2643          * Decrement and log left's numrecs, bump and log right's numrecs.
2644          */
2645         xfs_btree_set_numrecs(left, --lrecs);
2646         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2647
2648         xfs_btree_set_numrecs(right, ++rrecs);
2649         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2650
2651         /*
2652          * Using a temporary cursor, update the parent key values of the
2653          * block on the right.
2654          */
2655         error = xfs_btree_dup_cursor(cur, &tcur);
2656         if (error)
2657                 goto error0;
2658         i = xfs_btree_lastrec(tcur, level);
2659         XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2660
2661         error = xfs_btree_increment(tcur, level, &i);
2662         if (error)
2663                 goto error1;
2664
2665         /* Update the parent high keys of the left block, if needed. */
2666         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2667                 error = xfs_btree_update_keys(cur, level);
2668                 if (error)
2669                         goto error1;
2670         }
2671
2672         /* Update the parent keys of the right block. */
2673         error = xfs_btree_update_keys(tcur, level);
2674         if (error)
2675                 goto error1;
2676
2677         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2678
2679         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2680         *stat = 1;
2681         return 0;
2682
2683 out0:
2684         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2685         *stat = 0;
2686         return 0;
2687
2688 error0:
2689         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2690         return error;
2691
2692 error1:
2693         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2694         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2695         return error;
2696 }
2697
2698 /*
2699  * Split cur/level block in half.
2700  * Return new block number and the key to its first
2701  * record (to be inserted into parent).
2702  */
2703 STATIC int                                      /* error */
2704 __xfs_btree_split(
2705         struct xfs_btree_cur    *cur,
2706         int                     level,
2707         union xfs_btree_ptr     *ptrp,
2708         union xfs_btree_key     *key,
2709         struct xfs_btree_cur    **curp,
2710         int                     *stat)          /* success/failure */
2711 {
2712         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2713         struct xfs_buf          *lbp;           /* left buffer pointer */
2714         struct xfs_btree_block  *left;          /* left btree block */
2715         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2716         struct xfs_buf          *rbp;           /* right buffer pointer */
2717         struct xfs_btree_block  *right;         /* right btree block */
2718         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2719         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2720         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2721         int                     lrecs;
2722         int                     rrecs;
2723         int                     src_index;
2724         int                     error;          /* error return value */
2725 #ifdef DEBUG
2726         int                     i;
2727 #endif
2728
2729         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2730         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2731
2732         XFS_BTREE_STATS_INC(cur, split);
2733
2734         /* Set up left block (current one). */
2735         left = xfs_btree_get_block(cur, level, &lbp);
2736
2737 #ifdef DEBUG
2738         error = xfs_btree_check_block(cur, left, level, lbp);
2739         if (error)
2740                 goto error0;
2741 #endif
2742
2743         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2744
2745         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2746         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2747         if (error)
2748                 goto error0;
2749         if (*stat == 0)
2750                 goto out0;
2751         XFS_BTREE_STATS_INC(cur, alloc);
2752
2753         /* Set up the new block as "right". */
2754         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2755         if (error)
2756                 goto error0;
2757
2758         /* Fill in the btree header for the new right block. */
2759         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2760
2761         /*
2762          * Split the entries between the old and the new block evenly.
2763          * Make sure that if there's an odd number of entries now, that
2764          * each new block will have the same number of entries.
2765          */
2766         lrecs = xfs_btree_get_numrecs(left);
2767         rrecs = lrecs / 2;
2768         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2769                 rrecs++;
2770         src_index = (lrecs - rrecs + 1);
2771
2772         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2773
2774         /* Adjust numrecs for the later get_*_keys() calls. */
2775         lrecs -= rrecs;
2776         xfs_btree_set_numrecs(left, lrecs);
2777         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2778
2779         /*
2780          * Copy btree block entries from the left block over to the
2781          * new block, the right. Update the right block and log the
2782          * changes.
2783          */
2784         if (level > 0) {
2785                 /* It's a non-leaf.  Move keys and pointers. */
2786                 union xfs_btree_key     *lkp;   /* left btree key */
2787                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2788                 union xfs_btree_key     *rkp;   /* right btree key */
2789                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2790
2791                 lkp = xfs_btree_key_addr(cur, src_index, left);
2792                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2793                 rkp = xfs_btree_key_addr(cur, 1, right);
2794                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2795
2796 #ifdef DEBUG
2797                 for (i = src_index; i < rrecs; i++) {
2798                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2799                         if (error)
2800                                 goto error0;
2801                 }
2802 #endif
2803
2804                 /* Copy the keys & pointers to the new block. */
2805                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2806                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2807
2808                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2809                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2810
2811                 /* Stash the keys of the new block for later insertion. */
2812                 xfs_btree_get_node_keys(cur, right, key);
2813         } else {
2814                 /* It's a leaf.  Move records.  */
2815                 union xfs_btree_rec     *lrp;   /* left record pointer */
2816                 union xfs_btree_rec     *rrp;   /* right record pointer */
2817
2818                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2819                 rrp = xfs_btree_rec_addr(cur, 1, right);
2820
2821                 /* Copy records to the new block. */
2822                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2823                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2824
2825                 /* Stash the keys of the new block for later insertion. */
2826                 xfs_btree_get_leaf_keys(cur, right, key);
2827         }
2828
2829         /*
2830          * Find the left block number by looking in the buffer.
2831          * Adjust sibling pointers.
2832          */
2833         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2834         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2835         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2836         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2837
2838         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2839         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2840
2841         /*
2842          * If there's a block to the new block's right, make that block
2843          * point back to right instead of to left.
2844          */
2845         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2846                 error = xfs_btree_read_buf_block(cur, &rrptr,
2847                                                         0, &rrblock, &rrbp);
2848                 if (error)
2849                         goto error0;
2850                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2851                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2852         }
2853
2854         /* Update the parent high keys of the left block, if needed. */
2855         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2856                 error = xfs_btree_update_keys(cur, level);
2857                 if (error)
2858                         goto error0;
2859         }
2860
2861         /*
2862          * If the cursor is really in the right block, move it there.
2863          * If it's just pointing past the last entry in left, then we'll
2864          * insert there, so don't change anything in that case.
2865          */
2866         if (cur->bc_ptrs[level] > lrecs + 1) {
2867                 xfs_btree_setbuf(cur, level, rbp);
2868                 cur->bc_ptrs[level] -= lrecs;
2869         }
2870         /*
2871          * If there are more levels, we'll need another cursor which refers
2872          * the right block, no matter where this cursor was.
2873          */
2874         if (level + 1 < cur->bc_nlevels) {
2875                 error = xfs_btree_dup_cursor(cur, curp);
2876                 if (error)
2877                         goto error0;
2878                 (*curp)->bc_ptrs[level + 1]++;
2879         }
2880         *ptrp = rptr;
2881         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2882         *stat = 1;
2883         return 0;
2884 out0:
2885         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2886         *stat = 0;
2887         return 0;
2888
2889 error0:
2890         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2891         return error;
2892 }
2893
2894 struct xfs_btree_split_args {
2895         struct xfs_btree_cur    *cur;
2896         int                     level;
2897         union xfs_btree_ptr     *ptrp;
2898         union xfs_btree_key     *key;
2899         struct xfs_btree_cur    **curp;
2900         int                     *stat;          /* success/failure */
2901         int                     result;
2902         bool                    kswapd; /* allocation in kswapd context */
2903         struct completion       *done;
2904         struct work_struct      work;
2905 };
2906
2907 /*
2908  * Stack switching interfaces for allocation
2909  */
2910 static void
2911 xfs_btree_split_worker(
2912         struct work_struct      *work)
2913 {
2914         struct xfs_btree_split_args     *args = container_of(work,
2915                                                 struct xfs_btree_split_args, work);
2916         unsigned long           pflags;
2917         unsigned long           new_pflags = PF_MEMALLOC_NOFS;
2918
2919         /*
2920          * we are in a transaction context here, but may also be doing work
2921          * in kswapd context, and hence we may need to inherit that state
2922          * temporarily to ensure that we don't block waiting for memory reclaim
2923          * in any way.
2924          */
2925         if (args->kswapd)
2926                 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2927
2928         current_set_flags_nested(&pflags, new_pflags);
2929
2930         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2931                                          args->key, args->curp, args->stat);
2932         complete(args->done);
2933
2934         current_restore_flags_nested(&pflags, new_pflags);
2935 }
2936
2937 /*
2938  * BMBT split requests often come in with little stack to work on. Push
2939  * them off to a worker thread so there is lots of stack to use. For the other
2940  * btree types, just call directly to avoid the context switch overhead here.
2941  */
2942 STATIC int                                      /* error */
2943 xfs_btree_split(
2944         struct xfs_btree_cur    *cur,
2945         int                     level,
2946         union xfs_btree_ptr     *ptrp,
2947         union xfs_btree_key     *key,
2948         struct xfs_btree_cur    **curp,
2949         int                     *stat)          /* success/failure */
2950 {
2951         struct xfs_btree_split_args     args;
2952         DECLARE_COMPLETION_ONSTACK(done);
2953
2954         if (cur->bc_btnum != XFS_BTNUM_BMAP)
2955                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2956
2957         args.cur = cur;
2958         args.level = level;
2959         args.ptrp = ptrp;
2960         args.key = key;
2961         args.curp = curp;
2962         args.stat = stat;
2963         args.done = &done;
2964         args.kswapd = current_is_kswapd();
2965         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2966         queue_work(xfs_alloc_wq, &args.work);
2967         wait_for_completion(&done);
2968         destroy_work_on_stack(&args.work);
2969         return args.result;
2970 }
2971
2972
2973 /*
2974  * Copy the old inode root contents into a real block and make the
2975  * broot point to it.
2976  */
2977 int                                             /* error */
2978 xfs_btree_new_iroot(
2979         struct xfs_btree_cur    *cur,           /* btree cursor */
2980         int                     *logflags,      /* logging flags for inode */
2981         int                     *stat)          /* return status - 0 fail */
2982 {
2983         struct xfs_buf          *cbp;           /* buffer for cblock */
2984         struct xfs_btree_block  *block;         /* btree block */
2985         struct xfs_btree_block  *cblock;        /* child btree block */
2986         union xfs_btree_key     *ckp;           /* child key pointer */
2987         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2988         union xfs_btree_key     *kp;            /* pointer to btree key */
2989         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2990         union xfs_btree_ptr     nptr;           /* new block addr */
2991         int                     level;          /* btree level */
2992         int                     error;          /* error return code */
2993 #ifdef DEBUG
2994         int                     i;              /* loop counter */
2995 #endif
2996
2997         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2998         XFS_BTREE_STATS_INC(cur, newroot);
2999
3000         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3001
3002         level = cur->bc_nlevels - 1;
3003
3004         block = xfs_btree_get_iroot(cur);
3005         pp = xfs_btree_ptr_addr(cur, 1, block);
3006
3007         /* Allocate the new block. If we can't do it, we're toast. Give up. */
3008         error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
3009         if (error)
3010                 goto error0;
3011         if (*stat == 0) {
3012                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3013                 return 0;
3014         }
3015         XFS_BTREE_STATS_INC(cur, alloc);
3016
3017         /* Copy the root into a real block. */
3018         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
3019         if (error)
3020                 goto error0;
3021
3022         /*
3023          * we can't just memcpy() the root in for CRC enabled btree blocks.
3024          * In that case have to also ensure the blkno remains correct
3025          */
3026         memcpy(cblock, block, xfs_btree_block_len(cur));
3027         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3028                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3029                         cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
3030                 else
3031                         cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
3032         }
3033
3034         be16_add_cpu(&block->bb_level, 1);
3035         xfs_btree_set_numrecs(block, 1);
3036         cur->bc_nlevels++;
3037         cur->bc_ptrs[level + 1] = 1;
3038
3039         kp = xfs_btree_key_addr(cur, 1, block);
3040         ckp = xfs_btree_key_addr(cur, 1, cblock);
3041         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3042
3043         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3044 #ifdef DEBUG
3045         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3046                 error = xfs_btree_check_ptr(cur, pp, i, level);
3047                 if (error)
3048                         goto error0;
3049         }
3050 #endif
3051         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3052
3053 #ifdef DEBUG
3054         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
3055         if (error)
3056                 goto error0;
3057 #endif
3058         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3059
3060         xfs_iroot_realloc(cur->bc_private.b.ip,
3061                           1 - xfs_btree_get_numrecs(cblock),
3062                           cur->bc_private.b.whichfork);
3063
3064         xfs_btree_setbuf(cur, level, cbp);
3065
3066         /*
3067          * Do all this logging at the end so that
3068          * the root is at the right level.
3069          */
3070         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3071         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3072         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3073
3074         *logflags |=
3075                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
3076         *stat = 1;
3077         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3078         return 0;
3079 error0:
3080         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3081         return error;
3082 }
3083
3084 /*
3085  * Allocate a new root block, fill it in.
3086  */
3087 STATIC int                              /* error */
3088 xfs_btree_new_root(
3089         struct xfs_btree_cur    *cur,   /* btree cursor */
3090         int                     *stat)  /* success/failure */
3091 {
3092         struct xfs_btree_block  *block; /* one half of the old root block */
3093         struct xfs_buf          *bp;    /* buffer containing block */
3094         int                     error;  /* error return value */
3095         struct xfs_buf          *lbp;   /* left buffer pointer */
3096         struct xfs_btree_block  *left;  /* left btree block */
3097         struct xfs_buf          *nbp;   /* new (root) buffer */
3098         struct xfs_btree_block  *new;   /* new (root) btree block */
3099         int                     nptr;   /* new value for key index, 1 or 2 */
3100         struct xfs_buf          *rbp;   /* right buffer pointer */
3101         struct xfs_btree_block  *right; /* right btree block */
3102         union xfs_btree_ptr     rptr;
3103         union xfs_btree_ptr     lptr;
3104
3105         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3106         XFS_BTREE_STATS_INC(cur, newroot);
3107
3108         /* initialise our start point from the cursor */
3109         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3110
3111         /* Allocate the new block. If we can't do it, we're toast. Give up. */
3112         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
3113         if (error)
3114                 goto error0;
3115         if (*stat == 0)
3116                 goto out0;
3117         XFS_BTREE_STATS_INC(cur, alloc);
3118
3119         /* Set up the new block. */
3120         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3121         if (error)
3122                 goto error0;
3123
3124         /* Set the root in the holding structure  increasing the level by 1. */
3125         cur->bc_ops->set_root(cur, &lptr, 1);
3126
3127         /*
3128          * At the previous root level there are now two blocks: the old root,
3129          * and the new block generated when it was split.  We don't know which
3130          * one the cursor is pointing at, so we set up variables "left" and
3131          * "right" for each case.
3132          */
3133         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3134
3135 #ifdef DEBUG
3136         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3137         if (error)
3138                 goto error0;
3139 #endif
3140
3141         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3142         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3143                 /* Our block is left, pick up the right block. */
3144                 lbp = bp;
3145                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3146                 left = block;
3147                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3148                 if (error)
3149                         goto error0;
3150                 bp = rbp;
3151                 nptr = 1;
3152         } else {
3153                 /* Our block is right, pick up the left block. */
3154                 rbp = bp;
3155                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3156                 right = block;
3157                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3158                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3159                 if (error)
3160                         goto error0;
3161                 bp = lbp;
3162                 nptr = 2;
3163         }
3164
3165         /* Fill in the new block's btree header and log it. */
3166         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3167         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3168         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3169                         !xfs_btree_ptr_is_null(cur, &rptr));
3170
3171         /* Fill in the key data in the new root. */
3172         if (xfs_btree_get_level(left) > 0) {
3173                 /*
3174                  * Get the keys for the left block's keys and put them directly
3175                  * in the parent block.  Do the same for the right block.
3176                  */
3177                 xfs_btree_get_node_keys(cur, left,
3178                                 xfs_btree_key_addr(cur, 1, new));
3179                 xfs_btree_get_node_keys(cur, right,
3180                                 xfs_btree_key_addr(cur, 2, new));
3181         } else {
3182                 /*
3183                  * Get the keys for the left block's records and put them
3184                  * directly in the parent block.  Do the same for the right
3185                  * block.
3186                  */
3187                 xfs_btree_get_leaf_keys(cur, left,
3188                         xfs_btree_key_addr(cur, 1, new));
3189                 xfs_btree_get_leaf_keys(cur, right,
3190                         xfs_btree_key_addr(cur, 2, new));
3191         }
3192         xfs_btree_log_keys(cur, nbp, 1, 2);
3193
3194         /* Fill in the pointer data in the new root. */
3195         xfs_btree_copy_ptrs(cur,
3196                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3197         xfs_btree_copy_ptrs(cur,
3198                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3199         xfs_btree_log_ptrs(cur, nbp, 1, 2);
3200
3201         /* Fix up the cursor. */
3202         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3203         cur->bc_ptrs[cur->bc_nlevels] = nptr;
3204         cur->bc_nlevels++;
3205         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3206         *stat = 1;
3207         return 0;
3208 error0:
3209         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3210         return error;
3211 out0:
3212         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3213         *stat = 0;
3214         return 0;
3215 }
3216
3217 STATIC int
3218 xfs_btree_make_block_unfull(
3219         struct xfs_btree_cur    *cur,   /* btree cursor */
3220         int                     level,  /* btree level */
3221         int                     numrecs,/* # of recs in block */
3222         int                     *oindex,/* old tree index */
3223         int                     *index, /* new tree index */
3224         union xfs_btree_ptr     *nptr,  /* new btree ptr */
3225         struct xfs_btree_cur    **ncur, /* new btree cursor */
3226         union xfs_btree_key     *key,   /* key of new block */
3227         int                     *stat)
3228 {
3229         int                     error = 0;
3230
3231         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3232             level == cur->bc_nlevels - 1) {
3233                 struct xfs_inode *ip = cur->bc_private.b.ip;
3234
3235                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3236                         /* A root block that can be made bigger. */
3237                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
3238                         *stat = 1;
3239                 } else {
3240                         /* A root block that needs replacing */
3241                         int     logflags = 0;
3242
3243                         error = xfs_btree_new_iroot(cur, &logflags, stat);
3244                         if (error || *stat == 0)
3245                                 return error;
3246
3247                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3248                 }
3249
3250                 return 0;
3251         }
3252
3253         /* First, try shifting an entry to the right neighbor. */
3254         error = xfs_btree_rshift(cur, level, stat);
3255         if (error || *stat)
3256                 return error;
3257
3258         /* Next, try shifting an entry to the left neighbor. */
3259         error = xfs_btree_lshift(cur, level, stat);
3260         if (error)
3261                 return error;
3262
3263         if (*stat) {
3264                 *oindex = *index = cur->bc_ptrs[level];
3265                 return 0;
3266         }
3267
3268         /*
3269          * Next, try splitting the current block in half.
3270          *
3271          * If this works we have to re-set our variables because we
3272          * could be in a different block now.
3273          */
3274         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3275         if (error || *stat == 0)
3276                 return error;
3277
3278
3279         *index = cur->bc_ptrs[level];
3280         return 0;
3281 }
3282
3283 /*
3284  * Insert one record/level.  Return information to the caller
3285  * allowing the next level up to proceed if necessary.
3286  */
3287 STATIC int
3288 xfs_btree_insrec(
3289         struct xfs_btree_cur    *cur,   /* btree cursor */
3290         int                     level,  /* level to insert record at */
3291         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
3292         union xfs_btree_rec     *rec,   /* record to insert */
3293         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
3294         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
3295         int                     *stat)  /* success/failure */
3296 {
3297         struct xfs_btree_block  *block; /* btree block */
3298         struct xfs_buf          *bp;    /* buffer for block */
3299         union xfs_btree_ptr     nptr;   /* new block ptr */
3300         struct xfs_btree_cur    *ncur;  /* new btree cursor */
3301         union xfs_btree_key     nkey;   /* new block key */
3302         union xfs_btree_key     *lkey;
3303         int                     optr;   /* old key/record index */
3304         int                     ptr;    /* key/record index */
3305         int                     numrecs;/* number of records */
3306         int                     error;  /* error return value */
3307 #ifdef DEBUG
3308         int                     i;
3309 #endif
3310         xfs_daddr_t             old_bn;
3311
3312         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3313         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
3314
3315         ncur = NULL;
3316         lkey = &nkey;
3317
3318         /*
3319          * If we have an external root pointer, and we've made it to the
3320          * root level, allocate a new root block and we're done.
3321          */
3322         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3323             (level >= cur->bc_nlevels)) {
3324                 error = xfs_btree_new_root(cur, stat);
3325                 xfs_btree_set_ptr_null(cur, ptrp);
3326
3327                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3328                 return error;
3329         }
3330
3331         /* If we're off the left edge, return failure. */
3332         ptr = cur->bc_ptrs[level];
3333         if (ptr == 0) {
3334                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3335                 *stat = 0;
3336                 return 0;
3337         }
3338
3339         optr = ptr;
3340
3341         XFS_BTREE_STATS_INC(cur, insrec);
3342
3343         /* Get pointers to the btree buffer and block. */
3344         block = xfs_btree_get_block(cur, level, &bp);
3345         old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
3346         numrecs = xfs_btree_get_numrecs(block);
3347
3348 #ifdef DEBUG
3349         error = xfs_btree_check_block(cur, block, level, bp);
3350         if (error)
3351                 goto error0;
3352
3353         /* Check that the new entry is being inserted in the right place. */
3354         if (ptr <= numrecs) {
3355                 if (level == 0) {
3356                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3357                                 xfs_btree_rec_addr(cur, ptr, block)));
3358                 } else {
3359                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
3360                                 xfs_btree_key_addr(cur, ptr, block)));
3361                 }
3362         }
3363 #endif
3364
3365         /*
3366          * If the block is full, we can't insert the new entry until we
3367          * make the block un-full.
3368          */
3369         xfs_btree_set_ptr_null(cur, &nptr);
3370         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3371                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
3372                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
3373                 if (error || *stat == 0)
3374                         goto error0;
3375         }
3376
3377         /*
3378          * The current block may have changed if the block was
3379          * previously full and we have just made space in it.
3380          */
3381         block = xfs_btree_get_block(cur, level, &bp);
3382         numrecs = xfs_btree_get_numrecs(block);
3383
3384 #ifdef DEBUG
3385         error = xfs_btree_check_block(cur, block, level, bp);
3386         if (error)
3387                 return error;
3388 #endif
3389
3390         /*
3391          * At this point we know there's room for our new entry in the block
3392          * we're pointing at.
3393          */
3394         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3395
3396         if (level > 0) {
3397                 /* It's a nonleaf. make a hole in the keys and ptrs */
3398                 union xfs_btree_key     *kp;
3399                 union xfs_btree_ptr     *pp;
3400
3401                 kp = xfs_btree_key_addr(cur, ptr, block);
3402                 pp = xfs_btree_ptr_addr(cur, ptr, block);
3403
3404 #ifdef DEBUG
3405                 for (i = numrecs - ptr; i >= 0; i--) {
3406                         error = xfs_btree_check_ptr(cur, pp, i, level);
3407                         if (error)
3408                                 return error;
3409                 }
3410 #endif
3411
3412                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3413                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3414
3415 #ifdef DEBUG
3416                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3417                 if (error)
3418                         goto error0;
3419 #endif
3420
3421                 /* Now put the new data in, bump numrecs and log it. */
3422                 xfs_btree_copy_keys(cur, kp, key, 1);
3423                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3424                 numrecs++;
3425                 xfs_btree_set_numrecs(block, numrecs);
3426                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3427                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3428 #ifdef DEBUG
3429                 if (ptr < numrecs) {
3430                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3431                                 xfs_btree_key_addr(cur, ptr + 1, block)));
3432                 }
3433 #endif
3434         } else {
3435                 /* It's a leaf. make a hole in the records */
3436                 union xfs_btree_rec             *rp;
3437
3438                 rp = xfs_btree_rec_addr(cur, ptr, block);
3439
3440                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3441
3442                 /* Now put the new data in, bump numrecs and log it. */
3443                 xfs_btree_copy_recs(cur, rp, rec, 1);
3444                 xfs_btree_set_numrecs(block, ++numrecs);
3445                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3446 #ifdef DEBUG
3447                 if (ptr < numrecs) {
3448                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3449                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
3450                 }
3451 #endif
3452         }
3453
3454         /* Log the new number of records in the btree header. */
3455         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3456
3457         /*
3458          * If we just inserted into a new tree block, we have to
3459          * recalculate nkey here because nkey is out of date.
3460          *
3461          * Otherwise we're just updating an existing block (having shoved
3462          * some records into the new tree block), so use the regular key
3463          * update mechanism.
3464          */
3465         if (bp && bp->b_bn != old_bn) {
3466                 xfs_btree_get_keys(cur, block, lkey);
3467         } else if (xfs_btree_needs_key_update(cur, optr)) {
3468                 error = xfs_btree_update_keys(cur, level);
3469                 if (error)
3470                         goto error0;
3471         }
3472
3473         /*
3474          * If we are tracking the last record in the tree and
3475          * we are at the far right edge of the tree, update it.
3476          */
3477         if (xfs_btree_is_lastrec(cur, block, level)) {
3478                 cur->bc_ops->update_lastrec(cur, block, rec,
3479                                             ptr, LASTREC_INSREC);
3480         }
3481
3482         /*
3483          * Return the new block number, if any.
3484          * If there is one, give back a record value and a cursor too.
3485          */
3486         *ptrp = nptr;
3487         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3488                 xfs_btree_copy_keys(cur, key, lkey, 1);
3489                 *curp = ncur;
3490         }
3491
3492         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3493         *stat = 1;
3494         return 0;
3495
3496 error0:
3497         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3498         return error;
3499 }
3500
3501 /*
3502  * Insert the record at the point referenced by cur.
3503  *
3504  * A multi-level split of the tree on insert will invalidate the original
3505  * cursor.  All callers of this function should assume that the cursor is
3506  * no longer valid and revalidate it.
3507  */
3508 int
3509 xfs_btree_insert(
3510         struct xfs_btree_cur    *cur,
3511         int                     *stat)
3512 {
3513         int                     error;  /* error return value */
3514         int                     i;      /* result value, 0 for failure */
3515         int                     level;  /* current level number in btree */
3516         union xfs_btree_ptr     nptr;   /* new block number (split result) */
3517         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
3518         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
3519         union xfs_btree_key     bkey;   /* key of block to insert */
3520         union xfs_btree_key     *key;
3521         union xfs_btree_rec     rec;    /* record to insert */
3522
3523         level = 0;
3524         ncur = NULL;
3525         pcur = cur;
3526         key = &bkey;
3527
3528         xfs_btree_set_ptr_null(cur, &nptr);
3529
3530         /* Make a key out of the record data to be inserted, and save it. */
3531         cur->bc_ops->init_rec_from_cur(cur, &rec);
3532         cur->bc_ops->init_key_from_rec(key, &rec);
3533
3534         /*
3535          * Loop going up the tree, starting at the leaf level.
3536          * Stop when we don't get a split block, that must mean that
3537          * the insert is finished with this level.
3538          */
3539         do {
3540                 /*
3541                  * Insert nrec/nptr into this level of the tree.
3542                  * Note if we fail, nptr will be null.
3543                  */
3544                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3545                                 &ncur, &i);
3546                 if (error) {
3547                         if (pcur != cur)
3548                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3549                         goto error0;
3550                 }
3551
3552                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3553                 level++;
3554
3555                 /*
3556                  * See if the cursor we just used is trash.
3557                  * Can't trash the caller's cursor, but otherwise we should
3558                  * if ncur is a new cursor or we're about to be done.
3559                  */
3560                 if (pcur != cur &&
3561                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3562                         /* Save the state from the cursor before we trash it */
3563                         if (cur->bc_ops->update_cursor)
3564                                 cur->bc_ops->update_cursor(pcur, cur);
3565                         cur->bc_nlevels = pcur->bc_nlevels;
3566                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3567                 }
3568                 /* If we got a new cursor, switch to it. */
3569                 if (ncur) {
3570                         pcur = ncur;
3571                         ncur = NULL;
3572                 }
3573         } while (!xfs_btree_ptr_is_null(cur, &nptr));
3574
3575         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3576         *stat = i;
3577         return 0;
3578 error0:
3579         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3580         return error;
3581 }
3582
3583 /*
3584  * Try to merge a non-leaf block back into the inode root.
3585  *
3586  * Note: the killroot names comes from the fact that we're effectively
3587  * killing the old root block.  But because we can't just delete the
3588  * inode we have to copy the single block it was pointing to into the
3589  * inode.
3590  */
3591 STATIC int
3592 xfs_btree_kill_iroot(
3593         struct xfs_btree_cur    *cur)
3594 {
3595         int                     whichfork = cur->bc_private.b.whichfork;
3596         struct xfs_inode        *ip = cur->bc_private.b.ip;
3597         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3598         struct xfs_btree_block  *block;
3599         struct xfs_btree_block  *cblock;
3600         union xfs_btree_key     *kp;
3601         union xfs_btree_key     *ckp;
3602         union xfs_btree_ptr     *pp;
3603         union xfs_btree_ptr     *cpp;
3604         struct xfs_buf          *cbp;
3605         int                     level;
3606         int                     index;
3607         int                     numrecs;
3608         int                     error;
3609 #ifdef DEBUG
3610         union xfs_btree_ptr     ptr;
3611         int                     i;
3612 #endif
3613
3614         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3615
3616         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3617         ASSERT(cur->bc_nlevels > 1);
3618
3619         /*
3620          * Don't deal with the root block needs to be a leaf case.
3621          * We're just going to turn the thing back into extents anyway.
3622          */
3623         level = cur->bc_nlevels - 1;
3624         if (level == 1)
3625                 goto out0;
3626
3627         /*
3628          * Give up if the root has multiple children.
3629          */
3630         block = xfs_btree_get_iroot(cur);
3631         if (xfs_btree_get_numrecs(block) != 1)
3632                 goto out0;
3633
3634         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3635         numrecs = xfs_btree_get_numrecs(cblock);
3636
3637         /*
3638          * Only do this if the next level will fit.
3639          * Then the data must be copied up to the inode,
3640          * instead of freeing the root you free the next level.
3641          */
3642         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3643                 goto out0;
3644
3645         XFS_BTREE_STATS_INC(cur, killroot);
3646
3647 #ifdef DEBUG
3648         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3649         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3650         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3651         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3652 #endif
3653
3654         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3655         if (index) {
3656                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3657                                   cur->bc_private.b.whichfork);
3658                 block = ifp->if_broot;
3659         }
3660
3661         be16_add_cpu(&block->bb_numrecs, index);
3662         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3663
3664         kp = xfs_btree_key_addr(cur, 1, block);
3665         ckp = xfs_btree_key_addr(cur, 1, cblock);
3666         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3667
3668         pp = xfs_btree_ptr_addr(cur, 1, block);
3669         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3670 #ifdef DEBUG
3671         for (i = 0; i < numrecs; i++) {
3672                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3673                 if (error) {
3674                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3675                         return error;
3676                 }
3677         }
3678 #endif
3679         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3680
3681         error = xfs_btree_free_block(cur, cbp);
3682         if (error) {
3683                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3684                 return error;
3685         }
3686
3687         cur->bc_bufs[level - 1] = NULL;
3688         be16_add_cpu(&block->bb_level, -1);
3689         xfs_trans_log_inode(cur->bc_tp, ip,
3690                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3691         cur->bc_nlevels--;
3692 out0:
3693         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3694         return 0;
3695 }
3696
3697 /*
3698  * Kill the current root node, and replace it with it's only child node.
3699  */
3700 STATIC int
3701 xfs_btree_kill_root(
3702         struct xfs_btree_cur    *cur,
3703         struct xfs_buf          *bp,
3704         int                     level,
3705         union xfs_btree_ptr     *newroot)
3706 {
3707         int                     error;
3708
3709         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3710         XFS_BTREE_STATS_INC(cur, killroot);
3711
3712         /*
3713          * Update the root pointer, decreasing the level by 1 and then
3714          * free the old root.
3715          */
3716         cur->bc_ops->set_root(cur, newroot, -1);
3717
3718         error = xfs_btree_free_block(cur, bp);
3719         if (error) {
3720                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3721                 return error;
3722         }
3723
3724         cur->bc_bufs[level] = NULL;
3725         cur->bc_ra[level] = 0;
3726         cur->bc_nlevels--;
3727
3728         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3729         return 0;
3730 }
3731
3732 STATIC int
3733 xfs_btree_dec_cursor(
3734         struct xfs_btree_cur    *cur,
3735         int                     level,
3736         int                     *stat)
3737 {
3738         int                     error;
3739         int                     i;
3740
3741         if (level > 0) {
3742                 error = xfs_btree_decrement(cur, level, &i);
3743                 if (error)
3744                         return error;
3745         }
3746
3747         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3748         *stat = 1;
3749         return 0;
3750 }
3751
3752 /*
3753  * Single level of the btree record deletion routine.
3754  * Delete record pointed to by cur/level.
3755  * Remove the record from its block then rebalance the tree.
3756  * Return 0 for error, 1 for done, 2 to go on to the next level.
3757  */
3758 STATIC int                                      /* error */
3759 xfs_btree_delrec(
3760         struct xfs_btree_cur    *cur,           /* btree cursor */
3761         int                     level,          /* level removing record from */
3762         int                     *stat)          /* fail/done/go-on */
3763 {
3764         struct xfs_btree_block  *block;         /* btree block */
3765         union xfs_btree_ptr     cptr;           /* current block ptr */
3766         struct xfs_buf          *bp;            /* buffer for block */
3767         int                     error;          /* error return value */
3768         int                     i;              /* loop counter */
3769         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3770         struct xfs_buf          *lbp;           /* left buffer pointer */
3771         struct xfs_btree_block  *left;          /* left btree block */
3772         int                     lrecs = 0;      /* left record count */
3773         int                     ptr;            /* key/record index */
3774         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3775         struct xfs_buf          *rbp;           /* right buffer pointer */
3776         struct xfs_btree_block  *right;         /* right btree block */
3777         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3778         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3779         int                     rrecs = 0;      /* right record count */
3780         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3781         int                     numrecs;        /* temporary numrec count */
3782
3783         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3784         XFS_BTREE_TRACE_ARGI(cur, level);
3785
3786         tcur = NULL;
3787
3788         /* Get the index of the entry being deleted, check for nothing there. */
3789         ptr = cur->bc_ptrs[level];
3790         if (ptr == 0) {
3791                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3792                 *stat = 0;
3793                 return 0;
3794         }
3795
3796         /* Get the buffer & block containing the record or key/ptr. */
3797         block = xfs_btree_get_block(cur, level, &bp);
3798         numrecs = xfs_btree_get_numrecs(block);
3799
3800 #ifdef DEBUG
3801         error = xfs_btree_check_block(cur, block, level, bp);
3802         if (error)
3803                 goto error0;
3804 #endif
3805
3806         /* Fail if we're off the end of the block. */
3807         if (ptr > numrecs) {
3808                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3809                 *stat = 0;
3810                 return 0;
3811         }
3812
3813         XFS_BTREE_STATS_INC(cur, delrec);
3814         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3815
3816         /* Excise the entries being deleted. */
3817         if (level > 0) {
3818                 /* It's a nonleaf. operate on keys and ptrs */
3819                 union xfs_btree_key     *lkp;
3820                 union xfs_btree_ptr     *lpp;
3821
3822                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3823                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3824
3825 #ifdef DEBUG
3826                 for (i = 0; i < numrecs - ptr; i++) {
3827                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3828                         if (error)
3829                                 goto error0;
3830                 }
3831 #endif
3832
3833                 if (ptr < numrecs) {
3834                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3835                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3836                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3837                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3838                 }
3839         } else {
3840                 /* It's a leaf. operate on records */
3841                 if (ptr < numrecs) {
3842                         xfs_btree_shift_recs(cur,
3843                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3844                                 -1, numrecs - ptr);
3845                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3846                 }
3847         }
3848
3849         /*
3850          * Decrement and log the number of entries in the block.
3851          */
3852         xfs_btree_set_numrecs(block, --numrecs);
3853         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3854
3855         /*
3856          * If we are tracking the last record in the tree and
3857          * we are at the far right edge of the tree, update it.
3858          */
3859         if (xfs_btree_is_lastrec(cur, block, level)) {
3860                 cur->bc_ops->update_lastrec(cur, block, NULL,
3861                                             ptr, LASTREC_DELREC);
3862         }
3863
3864         /*
3865          * We're at the root level.  First, shrink the root block in-memory.
3866          * Try to get rid of the next level down.  If we can't then there's
3867          * nothing left to do.
3868          */
3869         if (level == cur->bc_nlevels - 1) {
3870                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3871                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3872                                           cur->bc_private.b.whichfork);
3873
3874                         error = xfs_btree_kill_iroot(cur);
3875                         if (error)
3876                                 goto error0;
3877
3878                         error = xfs_btree_dec_cursor(cur, level, stat);
3879                         if (error)
3880                                 goto error0;
3881                         *stat = 1;
3882                         return 0;
3883                 }
3884
3885                 /*
3886                  * If this is the root level, and there's only one entry left,
3887                  * and it's NOT the leaf level, then we can get rid of this
3888                  * level.
3889                  */
3890                 if (numrecs == 1 && level > 0) {
3891                         union xfs_btree_ptr     *pp;
3892                         /*
3893                          * pp is still set to the first pointer in the block.
3894                          * Make it the new root of the btree.
3895                          */
3896                         pp = xfs_btree_ptr_addr(cur, 1, block);
3897                         error = xfs_btree_kill_root(cur, bp, level, pp);
3898                         if (error)
3899                                 goto error0;
3900                 } else if (level > 0) {
3901                         error = xfs_btree_dec_cursor(cur, level, stat);
3902                         if (error)
3903                                 goto error0;
3904                 }
3905                 *stat = 1;
3906                 return 0;
3907         }
3908
3909         /*
3910          * If we deleted the leftmost entry in the block, update the
3911          * key values above us in the tree.
3912          */
3913         if (xfs_btree_needs_key_update(cur, ptr)) {
3914                 error = xfs_btree_update_keys(cur, level);
3915                 if (error)
3916                         goto error0;
3917         }
3918
3919         /*
3920          * If the number of records remaining in the block is at least
3921          * the minimum, we're done.
3922          */
3923         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3924                 error = xfs_btree_dec_cursor(cur, level, stat);
3925                 if (error)
3926                         goto error0;
3927                 return 0;
3928         }
3929
3930         /*
3931          * Otherwise, we have to move some records around to keep the
3932          * tree balanced.  Look at the left and right sibling blocks to
3933          * see if we can re-balance by moving only one record.
3934          */
3935         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3936         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3937
3938         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3939                 /*
3940                  * One child of root, need to get a chance to copy its contents
3941                  * into the root and delete it. Can't go up to next level,
3942                  * there's nothing to delete there.
3943                  */
3944                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3945                     xfs_btree_ptr_is_null(cur, &lptr) &&
3946                     level == cur->bc_nlevels - 2) {
3947                         error = xfs_btree_kill_iroot(cur);
3948                         if (!error)
3949                                 error = xfs_btree_dec_cursor(cur, level, stat);
3950                         if (error)
3951                                 goto error0;
3952                         return 0;
3953                 }
3954         }
3955
3956         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3957                !xfs_btree_ptr_is_null(cur, &lptr));
3958
3959         /*
3960          * Duplicate the cursor so our btree manipulations here won't
3961          * disrupt the next level up.
3962          */
3963         error = xfs_btree_dup_cursor(cur, &tcur);
3964         if (error)
3965                 goto error0;
3966
3967         /*
3968          * If there's a right sibling, see if it's ok to shift an entry
3969          * out of it.
3970          */
3971         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3972                 /*
3973                  * Move the temp cursor to the last entry in the next block.
3974                  * Actually any entry but the first would suffice.
3975                  */
3976                 i = xfs_btree_lastrec(tcur, level);
3977                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3978
3979                 error = xfs_btree_increment(tcur, level, &i);
3980                 if (error)
3981                         goto error0;
3982                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3983
3984                 i = xfs_btree_lastrec(tcur, level);
3985                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3986
3987                 /* Grab a pointer to the block. */
3988                 right = xfs_btree_get_block(tcur, level, &rbp);
3989 #ifdef DEBUG
3990                 error = xfs_btree_check_block(tcur, right, level, rbp);
3991                 if (error)
3992                         goto error0;
3993 #endif
3994                 /* Grab the current block number, for future use. */
3995                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3996
3997                 /*
3998                  * If right block is full enough so that removing one entry
3999                  * won't make it too empty, and left-shifting an entry out
4000                  * of right to us works, we're done.
4001                  */
4002                 if (xfs_btree_get_numrecs(right) - 1 >=
4003                     cur->bc_ops->get_minrecs(tcur, level)) {
4004                         error = xfs_btree_lshift(tcur, level, &i);
4005                         if (error)
4006                                 goto error0;
4007                         if (i) {
4008                                 ASSERT(xfs_btree_get_numrecs(block) >=
4009                                        cur->bc_ops->get_minrecs(tcur, level));
4010
4011                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4012                                 tcur = NULL;
4013
4014                                 error = xfs_btree_dec_cursor(cur, level, stat);
4015                                 if (error)
4016                                         goto error0;
4017                                 return 0;
4018                         }
4019                 }
4020
4021                 /*
4022                  * Otherwise, grab the number of records in right for
4023                  * future reference, and fix up the temp cursor to point
4024                  * to our block again (last record).
4025                  */
4026                 rrecs = xfs_btree_get_numrecs(right);
4027                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4028                         i = xfs_btree_firstrec(tcur, level);
4029                         XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4030
4031                         error = xfs_btree_decrement(tcur, level, &i);
4032                         if (error)
4033                                 goto error0;
4034                         XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4035                 }
4036         }
4037
4038         /*
4039          * If there's a left sibling, see if it's ok to shift an entry
4040          * out of it.
4041          */
4042         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4043                 /*
4044                  * Move the temp cursor to the first entry in the
4045                  * previous block.
4046                  */
4047                 i = xfs_btree_firstrec(tcur, level);
4048                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4049
4050                 error = xfs_btree_decrement(tcur, level, &i);
4051                 if (error)
4052                         goto error0;
4053                 i = xfs_btree_firstrec(tcur, level);
4054                 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4055
4056                 /* Grab a pointer to the block. */
4057                 left = xfs_btree_get_block(tcur, level, &lbp);
4058 #ifdef DEBUG
4059                 error = xfs_btree_check_block(cur, left, level, lbp);
4060                 if (error)
4061                         goto error0;
4062 #endif
4063                 /* Grab the current block number, for future use. */
4064                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4065
4066                 /*
4067                  * If left block is full enough so that removing one entry
4068                  * won't make it too empty, and right-shifting an entry out
4069                  * of left to us works, we're done.
4070                  */
4071                 if (xfs_btree_get_numrecs(left) - 1 >=
4072                     cur->bc_ops->get_minrecs(tcur, level)) {
4073                         error = xfs_btree_rshift(tcur, level, &i);
4074                         if (error)
4075                                 goto error0;
4076                         if (i) {
4077                                 ASSERT(xfs_btree_get_numrecs(block) >=
4078                                        cur->bc_ops->get_minrecs(tcur, level));
4079                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4080                                 tcur = NULL;
4081                                 if (level == 0)
4082                                         cur->bc_ptrs[0]++;
4083                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4084                                 *stat = 1;
4085                                 return 0;
4086                         }
4087                 }
4088
4089                 /*
4090                  * Otherwise, grab the number of records in right for
4091                  * future reference.
4092                  */
4093                 lrecs = xfs_btree_get_numrecs(left);
4094         }
4095
4096         /* Delete the temp cursor, we're done with it. */
4097         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4098         tcur = NULL;
4099
4100         /* If here, we need to do a join to keep the tree balanced. */
4101         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4102
4103         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4104             lrecs + xfs_btree_get_numrecs(block) <=
4105                         cur->bc_ops->get_maxrecs(cur, level)) {
4106                 /*
4107                  * Set "right" to be the starting block,
4108                  * "left" to be the left neighbor.
4109                  */
4110                 rptr = cptr;
4111                 right = block;
4112                 rbp = bp;
4113                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4114                 if (error)
4115                         goto error0;
4116
4117         /*
4118          * If that won't work, see if we can join with the right neighbor block.
4119          */
4120         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4121                    rrecs + xfs_btree_get_numrecs(block) <=
4122                         cur->bc_ops->get_maxrecs(cur, level)) {
4123                 /*
4124                  * Set "left" to be the starting block,
4125                  * "right" to be the right neighbor.
4126                  */
4127                 lptr = cptr;
4128                 left = block;
4129                 lbp = bp;
4130                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4131                 if (error)
4132                         goto error0;
4133
4134         /*
4135          * Otherwise, we can't fix the imbalance.
4136          * Just return.  This is probably a logic error, but it's not fatal.
4137          */
4138         } else {
4139                 error = xfs_btree_dec_cursor(cur, level, stat);
4140                 if (error)
4141                         goto error0;
4142                 return 0;
4143         }
4144
4145         rrecs = xfs_btree_get_numrecs(right);
4146         lrecs = xfs_btree_get_numrecs(left);
4147
4148         /*
4149          * We're now going to join "left" and "right" by moving all the stuff
4150          * in "right" to "left" and deleting "right".
4151          */
4152         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4153         if (level > 0) {
4154                 /* It's a non-leaf.  Move keys and pointers. */
4155                 union xfs_btree_key     *lkp;   /* left btree key */
4156                 union xfs_btree_ptr     *lpp;   /* left address pointer */
4157                 union xfs_btree_key     *rkp;   /* right btree key */
4158                 union xfs_btree_ptr     *rpp;   /* right address pointer */
4159
4160                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4161                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4162                 rkp = xfs_btree_key_addr(cur, 1, right);
4163                 rpp = xfs_btree_ptr_addr(cur, 1, right);
4164 #ifdef DEBUG
4165                 for (i = 1; i < rrecs; i++) {
4166                         error = xfs_btree_check_ptr(cur, rpp, i, level);
4167                         if (error)
4168                                 goto error0;
4169                 }
4170 #endif
4171                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4172                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4173
4174                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4175                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4176         } else {
4177                 /* It's a leaf.  Move records.  */
4178                 union xfs_btree_rec     *lrp;   /* left record pointer */
4179                 union xfs_btree_rec     *rrp;   /* right record pointer */
4180
4181                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4182                 rrp = xfs_btree_rec_addr(cur, 1, right);
4183
4184                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4185                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4186         }
4187
4188         XFS_BTREE_STATS_INC(cur, join);
4189
4190         /*
4191          * Fix up the number of records and right block pointer in the
4192          * surviving block, and log it.
4193          */
4194         xfs_btree_set_numrecs(left, lrecs + rrecs);
4195         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4196         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4197         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4198
4199         /* If there is a right sibling, point it to the remaining block. */
4200         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4201         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4202                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4203                 if (error)
4204                         goto error0;
4205                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4206                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4207         }
4208
4209         /* Free the deleted block. */
4210         error = xfs_btree_free_block(cur, rbp);
4211         if (error)
4212                 goto error0;
4213
4214         /*
4215          * If we joined with the left neighbor, set the buffer in the
4216          * cursor to the left block, and fix up the index.
4217          */
4218         if (bp != lbp) {
4219                 cur->bc_bufs[level] = lbp;
4220                 cur->bc_ptrs[level] += lrecs;
4221                 cur->bc_ra[level] = 0;
4222         }
4223         /*
4224          * If we joined with the right neighbor and there's a level above
4225          * us, increment the cursor at that level.
4226          */
4227         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4228                    (level + 1 < cur->bc_nlevels)) {
4229                 error = xfs_btree_increment(cur, level + 1, &i);
4230                 if (error)
4231                         goto error0;
4232         }
4233
4234         /*
4235          * Readjust the ptr at this level if it's not a leaf, since it's
4236          * still pointing at the deletion point, which makes the cursor
4237          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
4238          * We can't use decrement because it would change the next level up.
4239          */
4240         if (level > 0)
4241                 cur->bc_ptrs[level]--;
4242
4243         /*
4244          * We combined blocks, so we have to update the parent keys if the
4245          * btree supports overlapped intervals.  However, bc_ptrs[level + 1]
4246          * points to the old block so that the caller knows which record to
4247          * delete.  Therefore, the caller must be savvy enough to call updkeys
4248          * for us if we return stat == 2.  The other exit points from this
4249          * function don't require deletions further up the tree, so they can
4250          * call updkeys directly.
4251          */
4252
4253         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4254         /* Return value means the next level up has something to do. */
4255         *stat = 2;
4256         return 0;
4257
4258 error0:
4259         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4260         if (tcur)
4261                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4262         return error;
4263 }
4264
4265 /*
4266  * Delete the record pointed to by cur.
4267  * The cursor refers to the place where the record was (could be inserted)
4268  * when the operation returns.
4269  */
4270 int                                     /* error */
4271 xfs_btree_delete(
4272         struct xfs_btree_cur    *cur,
4273         int                     *stat)  /* success/failure */
4274 {
4275         int                     error;  /* error return value */
4276         int                     level;
4277         int                     i;
4278         bool                    joined = false;
4279
4280         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
4281
4282         /*
4283          * Go up the tree, starting at leaf level.
4284          *
4285          * If 2 is returned then a join was done; go to the next level.
4286          * Otherwise we are done.
4287          */
4288         for (level = 0, i = 2; i == 2; level++) {
4289                 error = xfs_btree_delrec(cur, level, &i);
4290                 if (error)
4291                         goto error0;
4292                 if (i == 2)
4293                         joined = true;
4294         }
4295
4296         /*
4297          * If we combined blocks as part of deleting the record, delrec won't
4298          * have updated the parent high keys so we have to do that here.
4299          */
4300         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4301                 error = xfs_btree_updkeys_force(cur, 0);
4302                 if (error)
4303                         goto error0;
4304         }
4305
4306         if (i == 0) {
4307                 for (level = 1; level < cur->bc_nlevels; level++) {
4308                         if (cur->bc_ptrs[level] == 0) {
4309                                 error = xfs_btree_decrement(cur, level, &i);
4310                                 if (error)
4311                                         goto error0;
4312                                 break;
4313                         }
4314                 }
4315         }
4316
4317         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4318         *stat = i;
4319         return 0;
4320 error0:
4321         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4322         return error;
4323 }
4324
4325 /*
4326  * Get the data from the pointed-to record.
4327  */
4328 int                                     /* error */
4329 xfs_btree_get_rec(
4330         struct xfs_btree_cur    *cur,   /* btree cursor */
4331         union xfs_btree_rec     **recp, /* output: btree record */
4332         int                     *stat)  /* output: success/failure */
4333 {
4334         struct xfs_btree_block  *block; /* btree block */
4335         struct xfs_buf          *bp;    /* buffer pointer */
4336         int                     ptr;    /* record number */
4337 #ifdef DEBUG
4338         int                     error;  /* error return value */
4339 #endif
4340
4341         ptr = cur->bc_ptrs[0];
4342         block = xfs_btree_get_block(cur, 0, &bp);
4343
4344 #ifdef DEBUG
4345         error = xfs_btree_check_block(cur, block, 0, bp);
4346         if (error)
4347                 return error;
4348 #endif
4349
4350         /*
4351          * Off the right end or left end, return failure.
4352          */
4353         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4354                 *stat = 0;
4355                 return 0;
4356         }
4357
4358         /*
4359          * Point to the record and extract its data.
4360          */
4361         *recp = xfs_btree_rec_addr(cur, ptr, block);
4362         *stat = 1;
4363         return 0;
4364 }
4365
4366 /* Visit a block in a btree. */
4367 STATIC int
4368 xfs_btree_visit_block(
4369         struct xfs_btree_cur            *cur,
4370         int                             level,
4371         xfs_btree_visit_blocks_fn       fn,
4372         void                            *data)
4373 {
4374         struct xfs_btree_block          *block;
4375         struct xfs_buf                  *bp;
4376         union xfs_btree_ptr             rptr;
4377         int                             error;
4378
4379         /* do right sibling readahead */
4380         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4381         block = xfs_btree_get_block(cur, level, &bp);
4382
4383         /* process the block */
4384         error = fn(cur, level, data);
4385         if (error)
4386                 return error;
4387
4388         /* now read rh sibling block for next iteration */
4389         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4390         if (xfs_btree_ptr_is_null(cur, &rptr))
4391                 return -ENOENT;
4392
4393         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4394 }
4395
4396
4397 /* Visit every block in a btree. */
4398 int
4399 xfs_btree_visit_blocks(
4400         struct xfs_btree_cur            *cur,
4401         xfs_btree_visit_blocks_fn       fn,
4402         void                            *data)
4403 {
4404         union xfs_btree_ptr             lptr;
4405         int                             level;
4406         struct xfs_btree_block          *block = NULL;
4407         int                             error = 0;
4408
4409         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4410
4411         /* for each level */
4412         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4413                 /* grab the left hand block */
4414                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4415                 if (error)
4416                         return error;
4417
4418                 /* readahead the left most block for the next level down */
4419                 if (level > 0) {
4420                         union xfs_btree_ptr     *ptr;
4421
4422                         ptr = xfs_btree_ptr_addr(cur, 1, block);
4423                         xfs_btree_readahead_ptr(cur, ptr, 1);
4424
4425                         /* save for the next iteration of the loop */
4426                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4427                 }
4428
4429                 /* for each buffer in the level */
4430                 do {
4431                         error = xfs_btree_visit_block(cur, level, fn, data);
4432                 } while (!error);
4433
4434                 if (error != -ENOENT)
4435                         return error;
4436         }
4437
4438         return 0;
4439 }
4440
4441 /*
4442  * Change the owner of a btree.
4443  *
4444  * The mechanism we use here is ordered buffer logging. Because we don't know
4445  * how many buffers were are going to need to modify, we don't really want to
4446  * have to make transaction reservations for the worst case of every buffer in a
4447  * full size btree as that may be more space that we can fit in the log....
4448  *
4449  * We do the btree walk in the most optimal manner possible - we have sibling
4450  * pointers so we can just walk all the blocks on each level from left to right
4451  * in a single pass, and then move to the next level and do the same. We can
4452  * also do readahead on the sibling pointers to get IO moving more quickly,
4453  * though for slow disks this is unlikely to make much difference to performance
4454  * as the amount of CPU work we have to do before moving to the next block is
4455  * relatively small.
4456  *
4457  * For each btree block that we load, modify the owner appropriately, set the
4458  * buffer as an ordered buffer and log it appropriately. We need to ensure that
4459  * we mark the region we change dirty so that if the buffer is relogged in
4460  * a subsequent transaction the changes we make here as an ordered buffer are
4461  * correctly relogged in that transaction.  If we are in recovery context, then
4462  * just queue the modified buffer as delayed write buffer so the transaction
4463  * recovery completion writes the changes to disk.
4464  */
4465 struct xfs_btree_block_change_owner_info {
4466         uint64_t                new_owner;
4467         struct list_head        *buffer_list;
4468 };
4469
4470 static int
4471 xfs_btree_block_change_owner(
4472         struct xfs_btree_cur    *cur,
4473         int                     level,
4474         void                    *data)
4475 {
4476         struct xfs_btree_block_change_owner_info        *bbcoi = data;
4477         struct xfs_btree_block  *block;
4478         struct xfs_buf          *bp;
4479
4480         /* modify the owner */
4481         block = xfs_btree_get_block(cur, level, &bp);
4482         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4483                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4484                         return 0;
4485                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4486         } else {
4487                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4488                         return 0;
4489                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4490         }
4491
4492         /*
4493          * If the block is a root block hosted in an inode, we might not have a
4494          * buffer pointer here and we shouldn't attempt to log the change as the
4495          * information is already held in the inode and discarded when the root
4496          * block is formatted into the on-disk inode fork. We still change it,
4497          * though, so everything is consistent in memory.
4498          */
4499         if (!bp) {
4500                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4501                 ASSERT(level == cur->bc_nlevels - 1);
4502                 return 0;
4503         }
4504
4505         if (cur->bc_tp) {
4506                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4507                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4508                         return -EAGAIN;
4509                 }
4510         } else {
4511                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4512         }
4513
4514         return 0;
4515 }
4516
4517 int
4518 xfs_btree_change_owner(
4519         struct xfs_btree_cur    *cur,
4520         uint64_t                new_owner,
4521         struct list_head        *buffer_list)
4522 {
4523         struct xfs_btree_block_change_owner_info        bbcoi;
4524
4525         bbcoi.new_owner = new_owner;
4526         bbcoi.buffer_list = buffer_list;
4527
4528         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4529                         &bbcoi);
4530 }
4531
4532 /* Verify the v5 fields of a long-format btree block. */
4533 xfs_failaddr_t
4534 xfs_btree_lblock_v5hdr_verify(
4535         struct xfs_buf          *bp,
4536         uint64_t                owner)
4537 {
4538         struct xfs_mount        *mp = bp->b_target->bt_mount;
4539         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4540
4541         if (!xfs_sb_version_hascrc(&mp->m_sb))
4542                 return __this_address;
4543         if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4544                 return __this_address;
4545         if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
4546                 return __this_address;
4547         if (owner != XFS_RMAP_OWN_UNKNOWN &&
4548             be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4549                 return __this_address;
4550         return NULL;
4551 }
4552
4553 /* Verify a long-format btree block. */
4554 xfs_failaddr_t
4555 xfs_btree_lblock_verify(
4556         struct xfs_buf          *bp,
4557         unsigned int            max_recs)
4558 {
4559         struct xfs_mount        *mp = bp->b_target->bt_mount;
4560         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4561
4562         /* numrecs verification */
4563         if (be16_to_cpu(block->bb_numrecs) > max_recs)
4564                 return __this_address;
4565
4566         /* sibling pointer verification */
4567         if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
4568             !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
4569                 return __this_address;
4570         if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
4571             !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
4572                 return __this_address;
4573
4574         return NULL;
4575 }
4576
4577 /**
4578  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4579  *                                    btree block
4580  *
4581  * @bp: buffer containing the btree block
4582  * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4583  * @pag_max_level: pointer to the per-ag max level field
4584  */
4585 xfs_failaddr_t
4586 xfs_btree_sblock_v5hdr_verify(
4587         struct xfs_buf          *bp)
4588 {
4589         struct xfs_mount        *mp = bp->b_target->bt_mount;
4590         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4591         struct xfs_perag        *pag = bp->b_pag;
4592
4593         if (!xfs_sb_version_hascrc(&mp->m_sb))
4594                 return __this_address;
4595         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4596                 return __this_address;
4597         if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4598                 return __this_address;
4599         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4600                 return __this_address;
4601         return NULL;
4602 }
4603
4604 /**
4605  * xfs_btree_sblock_verify() -- verify a short-format btree block
4606  *
4607  * @bp: buffer containing the btree block
4608  * @max_recs: maximum records allowed in this btree node
4609  */
4610 xfs_failaddr_t
4611 xfs_btree_sblock_verify(
4612         struct xfs_buf          *bp,
4613         unsigned int            max_recs)
4614 {
4615         struct xfs_mount        *mp = bp->b_target->bt_mount;
4616         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4617         xfs_agblock_t           agno;
4618
4619         /* numrecs verification */
4620         if (be16_to_cpu(block->bb_numrecs) > max_recs)
4621                 return __this_address;
4622
4623         /* sibling pointer verification */
4624         agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
4625         if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
4626             !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
4627                 return __this_address;
4628         if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
4629             !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
4630                 return __this_address;
4631
4632         return NULL;
4633 }
4634
4635 /*
4636  * Calculate the number of btree levels needed to store a given number of
4637  * records in a short-format btree.
4638  */
4639 uint
4640 xfs_btree_compute_maxlevels(
4641         struct xfs_mount        *mp,
4642         uint                    *limits,
4643         unsigned long           len)
4644 {
4645         uint                    level;
4646         unsigned long           maxblocks;
4647
4648         maxblocks = (len + limits[0] - 1) / limits[0];
4649         for (level = 1; maxblocks > 1; level++)
4650                 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4651         return level;
4652 }
4653
4654 /*
4655  * Query a regular btree for all records overlapping a given interval.
4656  * Start with a LE lookup of the key of low_rec and return all records
4657  * until we find a record with a key greater than the key of high_rec.
4658  */
4659 STATIC int
4660 xfs_btree_simple_query_range(
4661         struct xfs_btree_cur            *cur,
4662         union xfs_btree_key             *low_key,
4663         union xfs_btree_key             *high_key,
4664         xfs_btree_query_range_fn        fn,
4665         void                            *priv)
4666 {
4667         union xfs_btree_rec             *recp;
4668         union xfs_btree_key             rec_key;
4669         int64_t                         diff;
4670         int                             stat;
4671         bool                            firstrec = true;
4672         int                             error;
4673
4674         ASSERT(cur->bc_ops->init_high_key_from_rec);
4675         ASSERT(cur->bc_ops->diff_two_keys);
4676
4677         /*
4678          * Find the leftmost record.  The btree cursor must be set
4679          * to the low record used to generate low_key.
4680          */
4681         stat = 0;
4682         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4683         if (error)
4684                 goto out;
4685
4686         /* Nothing?  See if there's anything to the right. */
4687         if (!stat) {
4688                 error = xfs_btree_increment(cur, 0, &stat);
4689                 if (error)
4690                         goto out;
4691         }
4692
4693         while (stat) {
4694                 /* Find the record. */
4695                 error = xfs_btree_get_rec(cur, &recp, &stat);
4696                 if (error || !stat)
4697                         break;
4698
4699                 /* Skip if high_key(rec) < low_key. */
4700                 if (firstrec) {
4701                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4702                         firstrec = false;
4703                         diff = cur->bc_ops->diff_two_keys(cur, low_key,
4704                                         &rec_key);
4705                         if (diff > 0)
4706                                 goto advloop;
4707                 }
4708
4709                 /* Stop if high_key < low_key(rec). */
4710                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4711                 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4712                 if (diff > 0)
4713                         break;
4714
4715                 /* Callback */
4716                 error = fn(cur, recp, priv);
4717                 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4718                         break;
4719
4720 advloop:
4721                 /* Move on to the next record. */
4722                 error = xfs_btree_increment(cur, 0, &stat);
4723                 if (error)
4724                         break;
4725         }
4726
4727 out:
4728         return error;
4729 }
4730
4731 /*
4732  * Query an overlapped interval btree for all records overlapping a given
4733  * interval.  This function roughly follows the algorithm given in
4734  * "Interval Trees" of _Introduction to Algorithms_, which is section
4735  * 14.3 in the 2nd and 3rd editions.
4736  *
4737  * First, generate keys for the low and high records passed in.
4738  *
4739  * For any leaf node, generate the high and low keys for the record.
4740  * If the record keys overlap with the query low/high keys, pass the
4741  * record to the function iterator.
4742  *
4743  * For any internal node, compare the low and high keys of each
4744  * pointer against the query low/high keys.  If there's an overlap,
4745  * follow the pointer.
4746  *
4747  * As an optimization, we stop scanning a block when we find a low key
4748  * that is greater than the query's high key.
4749  */
4750 STATIC int
4751 xfs_btree_overlapped_query_range(
4752         struct xfs_btree_cur            *cur,
4753         union xfs_btree_key             *low_key,
4754         union xfs_btree_key             *high_key,
4755         xfs_btree_query_range_fn        fn,
4756         void                            *priv)
4757 {
4758         union xfs_btree_ptr             ptr;
4759         union xfs_btree_ptr             *pp;
4760         union xfs_btree_key             rec_key;
4761         union xfs_btree_key             rec_hkey;
4762         union xfs_btree_key             *lkp;
4763         union xfs_btree_key             *hkp;
4764         union xfs_btree_rec             *recp;
4765         struct xfs_btree_block          *block;
4766         int64_t                         ldiff;
4767         int64_t                         hdiff;
4768         int                             level;
4769         struct xfs_buf                  *bp;
4770         int                             i;
4771         int                             error;
4772
4773         /* Load the root of the btree. */
4774         level = cur->bc_nlevels - 1;
4775         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4776         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4777         if (error)
4778                 return error;
4779         xfs_btree_get_block(cur, level, &bp);
4780         trace_xfs_btree_overlapped_query_range(cur, level, bp);
4781 #ifdef DEBUG
4782         error = xfs_btree_check_block(cur, block, level, bp);
4783         if (error)
4784                 goto out;
4785 #endif
4786         cur->bc_ptrs[level] = 1;
4787
4788         while (level < cur->bc_nlevels) {
4789                 block = xfs_btree_get_block(cur, level, &bp);
4790
4791                 /* End of node, pop back towards the root. */
4792                 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4793 pop_up:
4794                         if (level < cur->bc_nlevels - 1)
4795                                 cur->bc_ptrs[level + 1]++;
4796                         level++;
4797                         continue;
4798                 }
4799
4800                 if (level == 0) {
4801                         /* Handle a leaf node. */
4802                         recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4803
4804                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4805                         ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4806                                         low_key);
4807
4808                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
4809                         hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4810                                         &rec_key);
4811
4812                         /*
4813                          * If (record's high key >= query's low key) and
4814                          *    (query's high key >= record's low key), then
4815                          * this record overlaps the query range; callback.
4816                          */
4817                         if (ldiff >= 0 && hdiff >= 0) {
4818                                 error = fn(cur, recp, priv);
4819                                 if (error < 0 ||
4820                                     error == XFS_BTREE_QUERY_RANGE_ABORT)
4821                                         break;
4822                         } else if (hdiff < 0) {
4823                                 /* Record is larger than high key; pop. */
4824                                 goto pop_up;
4825                         }
4826                         cur->bc_ptrs[level]++;
4827                         continue;
4828                 }
4829
4830                 /* Handle an internal node. */
4831                 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4832                 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4833                 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4834
4835                 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4836                 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4837
4838                 /*
4839                  * If (pointer's high key >= query's low key) and
4840                  *    (query's high key >= pointer's low key), then
4841                  * this record overlaps the query range; follow pointer.
4842                  */
4843                 if (ldiff >= 0 && hdiff >= 0) {
4844                         level--;
4845                         error = xfs_btree_lookup_get_block(cur, level, pp,
4846                                         &block);
4847                         if (error)
4848                                 goto out;
4849                         xfs_btree_get_block(cur, level, &bp);
4850                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
4851 #ifdef DEBUG
4852                         error = xfs_btree_check_block(cur, block, level, bp);
4853                         if (error)
4854                                 goto out;
4855 #endif
4856                         cur->bc_ptrs[level] = 1;
4857                         continue;
4858                 } else if (hdiff < 0) {
4859                         /* The low key is larger than the upper range; pop. */
4860                         goto pop_up;
4861                 }
4862                 cur->bc_ptrs[level]++;
4863         }
4864
4865 out:
4866         /*
4867          * If we don't end this function with the cursor pointing at a record
4868          * block, a subsequent non-error cursor deletion will not release
4869          * node-level buffers, causing a buffer leak.  This is quite possible
4870          * with a zero-results range query, so release the buffers if we
4871          * failed to return any results.
4872          */
4873         if (cur->bc_bufs[0] == NULL) {
4874                 for (i = 0; i < cur->bc_nlevels; i++) {
4875                         if (cur->bc_bufs[i]) {
4876                                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4877                                 cur->bc_bufs[i] = NULL;
4878                                 cur->bc_ptrs[i] = 0;
4879                                 cur->bc_ra[i] = 0;
4880                         }
4881                 }
4882         }
4883
4884         return error;
4885 }
4886
4887 /*
4888  * Query a btree for all records overlapping a given interval of keys.  The
4889  * supplied function will be called with each record found; return one of the
4890  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4891  * code.  This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4892  * negative error code.
4893  */
4894 int
4895 xfs_btree_query_range(
4896         struct xfs_btree_cur            *cur,
4897         union xfs_btree_irec            *low_rec,
4898         union xfs_btree_irec            *high_rec,
4899         xfs_btree_query_range_fn        fn,
4900         void                            *priv)
4901 {
4902         union xfs_btree_rec             rec;
4903         union xfs_btree_key             low_key;
4904         union xfs_btree_key             high_key;
4905
4906         /* Find the keys of both ends of the interval. */
4907         cur->bc_rec = *high_rec;
4908         cur->bc_ops->init_rec_from_cur(cur, &rec);
4909         cur->bc_ops->init_key_from_rec(&high_key, &rec);
4910
4911         cur->bc_rec = *low_rec;
4912         cur->bc_ops->init_rec_from_cur(cur, &rec);
4913         cur->bc_ops->init_key_from_rec(&low_key, &rec);
4914
4915         /* Enforce low key < high key. */
4916         if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4917                 return -EINVAL;
4918
4919         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4920                 return xfs_btree_simple_query_range(cur, &low_key,
4921                                 &high_key, fn, priv);
4922         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4923                         fn, priv);
4924 }
4925
4926 /* Query a btree for all records. */
4927 int
4928 xfs_btree_query_all(
4929         struct xfs_btree_cur            *cur,
4930         xfs_btree_query_range_fn        fn,
4931         void                            *priv)
4932 {
4933         union xfs_btree_key             low_key;
4934         union xfs_btree_key             high_key;
4935
4936         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
4937         memset(&low_key, 0, sizeof(low_key));
4938         memset(&high_key, 0xFF, sizeof(high_key));
4939
4940         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
4941 }
4942
4943 /*
4944  * Calculate the number of blocks needed to store a given number of records
4945  * in a short-format (per-AG metadata) btree.
4946  */
4947 xfs_extlen_t
4948 xfs_btree_calc_size(
4949         struct xfs_mount        *mp,
4950         uint                    *limits,
4951         unsigned long long      len)
4952 {
4953         int                     level;
4954         int                     maxrecs;
4955         xfs_extlen_t            rval;
4956
4957         maxrecs = limits[0];
4958         for (level = 0, rval = 0; len > 1; level++) {
4959                 len += maxrecs - 1;
4960                 do_div(len, maxrecs);
4961                 maxrecs = limits[1];
4962                 rval += len;
4963         }
4964         return rval;
4965 }
4966
4967 static int
4968 xfs_btree_count_blocks_helper(
4969         struct xfs_btree_cur    *cur,
4970         int                     level,
4971         void                    *data)
4972 {
4973         xfs_extlen_t            *blocks = data;
4974         (*blocks)++;
4975
4976         return 0;
4977 }
4978
4979 /* Count the blocks in a btree and return the result in *blocks. */
4980 int
4981 xfs_btree_count_blocks(
4982         struct xfs_btree_cur    *cur,
4983         xfs_extlen_t            *blocks)
4984 {
4985         *blocks = 0;
4986         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4987                         blocks);
4988 }
4989
4990 /* Compare two btree pointers. */
4991 int64_t
4992 xfs_btree_diff_two_ptrs(
4993         struct xfs_btree_cur            *cur,
4994         const union xfs_btree_ptr       *a,
4995         const union xfs_btree_ptr       *b)
4996 {
4997         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4998                 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
4999         return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
5000 }
5001
5002 /* If there's an extent, we're done. */
5003 STATIC int
5004 xfs_btree_has_record_helper(
5005         struct xfs_btree_cur            *cur,
5006         union xfs_btree_rec             *rec,
5007         void                            *priv)
5008 {
5009         return XFS_BTREE_QUERY_RANGE_ABORT;
5010 }
5011
5012 /* Is there a record covering a given range of keys? */
5013 int
5014 xfs_btree_has_record(
5015         struct xfs_btree_cur    *cur,
5016         union xfs_btree_irec    *low,
5017         union xfs_btree_irec    *high,
5018         bool                    *exists)
5019 {
5020         int                     error;
5021
5022         error = xfs_btree_query_range(cur, low, high,
5023                         &xfs_btree_has_record_helper, NULL);
5024         if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
5025                 *exists = true;
5026                 return 0;
5027         }
5028         *exists = false;
5029         return error;
5030 }