1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
22 * Check for btree operation errors. See the section about handling
23 * operational errors in common.c.
26 __xchk_btree_process_error(
28 struct xfs_btree_cur *cur,
39 /* Used to restart an op with deadlock avoidance. */
40 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44 /* Note the badness but don't abort. */
45 sc->sm->sm_flags |= errflag;
49 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
50 trace_xchk_ifork_btree_op_error(sc, cur, level,
53 trace_xchk_btree_op_error(sc, cur, level,
61 xchk_btree_process_error(
63 struct xfs_btree_cur *cur,
67 return __xchk_btree_process_error(sc, cur, level, error,
68 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
72 xchk_btree_xref_process_error(
74 struct xfs_btree_cur *cur,
78 return __xchk_btree_process_error(sc, cur, level, error,
79 XFS_SCRUB_OFLAG_XFAIL, __return_address);
82 /* Record btree block corruption. */
84 __xchk_btree_set_corrupt(
86 struct xfs_btree_cur *cur,
91 sc->sm->sm_flags |= errflag;
93 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
94 trace_xchk_ifork_btree_error(sc, cur, level,
97 trace_xchk_btree_error(sc, cur, level,
102 xchk_btree_set_corrupt(
103 struct xfs_scrub *sc,
104 struct xfs_btree_cur *cur,
107 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
112 xchk_btree_xref_set_corrupt(
113 struct xfs_scrub *sc,
114 struct xfs_btree_cur *cur,
117 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
122 * Make sure this record is in order and doesn't stray outside of the parent
127 struct xchk_btree *bs)
129 struct xfs_btree_cur *cur = bs->cur;
130 union xfs_btree_rec *rec;
131 union xfs_btree_key key;
132 union xfs_btree_key hkey;
133 union xfs_btree_key *keyp;
134 struct xfs_btree_block *block;
135 struct xfs_btree_block *keyblock;
138 block = xfs_btree_get_block(cur, 0, &bp);
139 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
141 trace_xchk_btree_rec(bs->sc, cur, 0);
143 /* If this isn't the first record, are they in order? */
144 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
145 xchk_btree_set_corrupt(bs->sc, cur, 0);
146 bs->firstrec = false;
147 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
149 if (cur->bc_nlevels == 1)
152 /* Is this at least as large as the parent low key? */
153 cur->bc_ops->init_key_from_rec(&key, rec);
154 keyblock = xfs_btree_get_block(cur, 1, &bp);
155 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
156 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
157 xchk_btree_set_corrupt(bs->sc, cur, 1);
159 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
162 /* Is this no larger than the parent high key? */
163 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
164 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
165 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
166 xchk_btree_set_corrupt(bs->sc, cur, 1);
170 * Make sure this key is in order and doesn't stray outside of the parent
175 struct xchk_btree *bs,
178 struct xfs_btree_cur *cur = bs->cur;
179 union xfs_btree_key *key;
180 union xfs_btree_key *keyp;
181 struct xfs_btree_block *block;
182 struct xfs_btree_block *keyblock;
185 block = xfs_btree_get_block(cur, level, &bp);
186 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
188 trace_xchk_btree_key(bs->sc, cur, level);
190 /* If this isn't the first key, are they in order? */
191 if (!bs->firstkey[level] &&
192 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
193 xchk_btree_set_corrupt(bs->sc, cur, level);
194 bs->firstkey[level] = false;
195 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
197 if (level + 1 >= cur->bc_nlevels)
200 /* Is this at least as large as the parent low key? */
201 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
202 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
203 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
204 xchk_btree_set_corrupt(bs->sc, cur, level);
206 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
209 /* Is this no larger than the parent high key? */
210 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
211 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
212 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
213 xchk_btree_set_corrupt(bs->sc, cur, level);
217 * Check a btree pointer. Returns true if it's ok to use this pointer.
218 * Callers do not need to set the corrupt flag.
222 struct xchk_btree *bs,
224 union xfs_btree_ptr *ptr)
228 /* A btree rooted in an inode has no block pointer to the root. */
229 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
230 level == bs->cur->bc_nlevels)
233 /* Otherwise, check the pointers. */
234 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
235 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
237 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
239 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
244 /* Check that a btree block's sibling matches what we expect it. */
246 xchk_btree_block_check_sibling(
247 struct xchk_btree *bs,
250 union xfs_btree_ptr *sibling)
252 struct xfs_btree_cur *cur = bs->cur;
253 struct xfs_btree_block *pblock;
255 struct xfs_btree_cur *ncur = NULL;
256 union xfs_btree_ptr *pp;
260 error = xfs_btree_dup_cursor(cur, &ncur);
261 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
266 * If the pointer is null, we shouldn't be able to move the upper
267 * level pointer anywhere.
269 if (xfs_btree_ptr_is_null(cur, sibling)) {
271 error = xfs_btree_increment(ncur, level + 1, &success);
273 error = xfs_btree_decrement(ncur, level + 1, &success);
274 if (error == 0 && success)
275 xchk_btree_set_corrupt(bs->sc, cur, level);
280 /* Increment upper level pointer. */
282 error = xfs_btree_increment(ncur, level + 1, &success);
284 error = xfs_btree_decrement(ncur, level + 1, &success);
285 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
288 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
292 /* Compare upper level pointer to sibling pointer. */
293 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
294 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
295 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
298 xchk_buffer_recheck(bs->sc, pbp);
300 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
301 xchk_btree_set_corrupt(bs->sc, cur, level);
303 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
307 /* Check the siblings of a btree block. */
309 xchk_btree_block_check_siblings(
310 struct xchk_btree *bs,
311 struct xfs_btree_block *block)
313 struct xfs_btree_cur *cur = bs->cur;
314 union xfs_btree_ptr leftsib;
315 union xfs_btree_ptr rightsib;
319 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
320 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
321 level = xfs_btree_get_level(block);
323 /* Root block should never have siblings. */
324 if (level == cur->bc_nlevels - 1) {
325 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
326 !xfs_btree_ptr_is_null(cur, &rightsib))
327 xchk_btree_set_corrupt(bs->sc, cur, level);
332 * Does the left & right sibling pointers match the adjacent
333 * parent level pointers?
334 * (These function absorbs error codes for us.)
336 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
339 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
347 struct list_head list;
353 * Make sure this btree block isn't in the free list and that there's
354 * an rmap record for it.
357 xchk_btree_check_block_owner(
358 struct xchk_btree *bs,
371 btnum = bs->cur->bc_btnum;
372 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
373 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
375 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
377 error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
378 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
383 xchk_xref_is_used_space(bs->sc, agbno, 1);
385 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
386 * have to nullify it (to shut down further block owner checks) if
387 * self-xref encounters problems.
389 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
392 xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
393 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
397 xchk_ag_free(bs->sc, &bs->sc->sa);
402 /* Check the owner of a btree block. */
404 xchk_btree_check_owner(
405 struct xchk_btree *bs,
409 struct xfs_btree_cur *cur = bs->cur;
410 struct check_owner *co;
413 * In theory, xfs_btree_get_block should only give us a null buffer
414 * pointer for the root of a root-in-inode btree type, but we need
415 * to check defensively here in case the cursor state is also screwed
419 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
420 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
425 * We want to cross-reference each btree block with the bnobt
426 * and the rmapbt. We cannot cross-reference the bnobt or
427 * rmapbt while scanning the bnobt or rmapbt, respectively,
428 * because we cannot alter the cursor and we'd prefer not to
429 * duplicate cursors. Therefore, save the buffer daddr for
432 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
433 co = kmem_alloc(sizeof(struct check_owner),
438 co->daddr = XFS_BUF_ADDR(bp);
439 list_add_tail(&co->list, &bs->to_check);
443 return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
446 /* Decide if we want to check minrecs of a btree block in the inode root. */
448 xchk_btree_check_iroot_minrecs(
449 struct xchk_btree *bs)
452 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
453 * would miscalculate the space required for the data fork bmbt root
454 * when adding an attr fork, and promote the iroot contents to an
455 * external block unnecessarily. This went unnoticed for many years
456 * until scrub found filesystems in this state. Inode rooted btrees are
457 * not supposed to have immediate child blocks that are small enough
458 * that the contents could fit in the inode root, but we can't fail
459 * existing filesystems, so instead we disable the check for data fork
460 * bmap btrees when there's an attr fork.
462 if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
463 bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
464 XFS_IFORK_Q(bs->sc->ip))
471 * Check that this btree block has at least minrecs records or is one of the
472 * special blocks that don't require that.
475 xchk_btree_check_minrecs(
476 struct xchk_btree *bs,
478 struct xfs_btree_block *block)
480 struct xfs_btree_cur *cur = bs->cur;
481 unsigned int root_level = cur->bc_nlevels - 1;
482 unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
484 /* More records than minrecs means the block is ok. */
485 if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
489 * For btrees rooted in the inode, it's possible that the root block
490 * contents spilled into a regular ondisk block because there wasn't
491 * enough space in the inode root. The number of records in that
492 * child block might be less than the standard minrecs, but that's ok
493 * provided that there's only one direct child of the root.
495 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
496 level == cur->bc_nlevels - 2) {
497 struct xfs_btree_block *root_block;
498 struct xfs_buf *root_bp;
501 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
502 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
503 if (xchk_btree_check_iroot_minrecs(bs) &&
504 (be16_to_cpu(root_block->bb_numrecs) != 1 ||
505 numrecs <= root_maxrecs))
506 xchk_btree_set_corrupt(bs->sc, cur, level);
511 * Otherwise, only the root level is allowed to have fewer than minrecs
512 * records or keyptrs.
514 if (level < root_level)
515 xchk_btree_set_corrupt(bs->sc, cur, level);
519 * Grab and scrub a btree block given a btree pointer. Returns block
520 * and buffer pointers (if applicable) if they're ok to use.
523 xchk_btree_get_block(
524 struct xchk_btree *bs,
526 union xfs_btree_ptr *pp,
527 struct xfs_btree_block **pblock,
528 struct xfs_buf **pbp)
530 xfs_failaddr_t failed_at;
536 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
537 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
541 xfs_btree_get_block(bs->cur, level, pbp);
542 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
543 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
546 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
549 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
553 xchk_buffer_recheck(bs->sc, *pbp);
555 xchk_btree_check_minrecs(bs, level, *pblock);
558 * Check the block's owner; this function absorbs error codes
561 error = xchk_btree_check_owner(bs, level, *pbp);
566 * Check the block's siblings; this function absorbs error codes
569 return xchk_btree_block_check_siblings(bs, *pblock);
573 * Check that the low and high keys of this block match the keys stored
574 * in the parent block.
577 xchk_btree_block_keys(
578 struct xchk_btree *bs,
580 struct xfs_btree_block *block)
582 union xfs_btree_key block_keys;
583 struct xfs_btree_cur *cur = bs->cur;
584 union xfs_btree_key *high_bk;
585 union xfs_btree_key *parent_keys;
586 union xfs_btree_key *high_pk;
587 struct xfs_btree_block *parent_block;
590 if (level >= cur->bc_nlevels - 1)
593 /* Calculate the keys for this block. */
594 xfs_btree_get_keys(cur, block, &block_keys);
596 /* Obtain the parent's copy of the keys for this block. */
597 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
598 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
601 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
602 xchk_btree_set_corrupt(bs->sc, cur, 1);
604 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
608 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
609 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
612 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
613 xchk_btree_set_corrupt(bs->sc, cur, 1);
617 * Visit all nodes and leaves of a btree. Check that all pointers and
618 * records are in order, that the keys reflect the records, and use a callback
619 * so that the caller can verify individual records.
623 struct xfs_scrub *sc,
624 struct xfs_btree_cur *cur,
625 xchk_btree_rec_fn scrub_fn,
626 const struct xfs_owner_info *oinfo,
629 struct xchk_btree bs = {
631 .scrub_rec = scrub_fn,
637 union xfs_btree_ptr ptr;
638 union xfs_btree_ptr *pp;
639 union xfs_btree_rec *recp;
640 struct xfs_btree_block *block;
643 struct check_owner *co;
644 struct check_owner *n;
648 /* Initialize scrub state */
649 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
650 bs.firstkey[i] = true;
651 INIT_LIST_HEAD(&bs.to_check);
653 /* Don't try to check a tree with a height we can't handle. */
654 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
655 xchk_btree_set_corrupt(sc, cur, 0);
660 * Load the root of the btree. The helper function absorbs
661 * error codes for us.
663 level = cur->bc_nlevels - 1;
664 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
665 if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
667 error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
671 cur->bc_ptrs[level] = 1;
673 while (level < cur->bc_nlevels) {
674 block = xfs_btree_get_block(cur, level, &bp);
677 /* End of leaf, pop back towards the root. */
678 if (cur->bc_ptrs[level] >
679 be16_to_cpu(block->bb_numrecs)) {
680 xchk_btree_block_keys(&bs, level, block);
681 if (level < cur->bc_nlevels - 1)
682 cur->bc_ptrs[level + 1]++;
687 /* Records in order for scrub? */
690 /* Call out to the record checker. */
691 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
692 error = bs.scrub_rec(&bs, recp);
695 if (xchk_should_terminate(sc, &error) ||
696 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
699 cur->bc_ptrs[level]++;
703 /* End of node, pop back towards the root. */
704 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
705 xchk_btree_block_keys(&bs, level, block);
706 if (level < cur->bc_nlevels - 1)
707 cur->bc_ptrs[level + 1]++;
712 /* Keys in order for scrub? */
713 xchk_btree_key(&bs, level);
715 /* Drill another level deeper. */
716 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
717 if (!xchk_btree_ptr_ok(&bs, level, pp)) {
718 cur->bc_ptrs[level]++;
722 error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
726 cur->bc_ptrs[level] = 1;
730 /* Process deferred owner checks on btree blocks. */
731 list_for_each_entry_safe(co, n, &bs.to_check, list) {
732 if (!error && bs.cur)
733 error = xchk_btree_check_block_owner(&bs,
734 co->level, co->daddr);