Linux 6.9-rc1
[linux-2.6-microblaze.git] / fs / xfs / scrub / dabtree.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22
23 /* Directory/Attribute Btree */
24
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31         struct xchk_da_btree    *ds,
32         int                     level,
33         int                     *error)
34 {
35         struct xfs_scrub        *sc = ds->sc;
36
37         if (*error == 0)
38                 return true;
39
40         switch (*error) {
41         case -EDEADLOCK:
42         case -ECHRNG:
43                 /* Used to restart an op with deadlock avoidance. */
44                 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
45                 break;
46         case -EFSBADCRC:
47         case -EFSCORRUPTED:
48                 /* Note the badness but don't abort. */
49                 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
50                 *error = 0;
51                 fallthrough;
52         default:
53                 trace_xchk_file_op_error(sc, ds->dargs.whichfork,
54                                 xfs_dir2_da_to_db(ds->dargs.geo,
55                                         ds->state->path.blk[level].blkno),
56                                 *error, __return_address);
57                 break;
58         }
59         return false;
60 }
61
62 /*
63  * Check for da btree corruption.  See the section about handling
64  * operational errors in common.c.
65  */
66 void
67 xchk_da_set_corrupt(
68         struct xchk_da_btree    *ds,
69         int                     level)
70 {
71         struct xfs_scrub        *sc = ds->sc;
72
73         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
74
75         trace_xchk_fblock_error(sc, ds->dargs.whichfork,
76                         xfs_dir2_da_to_db(ds->dargs.geo,
77                                 ds->state->path.blk[level].blkno),
78                         __return_address);
79 }
80
81 static struct xfs_da_node_entry *
82 xchk_da_btree_node_entry(
83         struct xchk_da_btree            *ds,
84         int                             level)
85 {
86         struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
87         struct xfs_da3_icnode_hdr       hdr;
88
89         ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
90
91         xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
92         return hdr.btree + blk->index;
93 }
94
95 /* Scrub a da btree hash (key). */
96 int
97 xchk_da_btree_hash(
98         struct xchk_da_btree            *ds,
99         int                             level,
100         __be32                          *hashp)
101 {
102         struct xfs_da_node_entry        *entry;
103         xfs_dahash_t                    hash;
104         xfs_dahash_t                    parent_hash;
105
106         /* Is this hash in order? */
107         hash = be32_to_cpu(*hashp);
108         if (hash < ds->hashes[level])
109                 xchk_da_set_corrupt(ds, level);
110         ds->hashes[level] = hash;
111
112         if (level == 0)
113                 return 0;
114
115         /* Is this hash no larger than the parent hash? */
116         entry = xchk_da_btree_node_entry(ds, level - 1);
117         parent_hash = be32_to_cpu(entry->hashval);
118         if (parent_hash < hash)
119                 xchk_da_set_corrupt(ds, level);
120
121         return 0;
122 }
123
124 /*
125  * Check a da btree pointer.  Returns true if it's ok to use this
126  * pointer.
127  */
128 STATIC bool
129 xchk_da_btree_ptr_ok(
130         struct xchk_da_btree    *ds,
131         int                     level,
132         xfs_dablk_t             blkno)
133 {
134         if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
135                 xchk_da_set_corrupt(ds, level);
136                 return false;
137         }
138
139         return true;
140 }
141
142 /*
143  * The da btree scrubber can handle leaf1 blocks as a degenerate
144  * form of leafn blocks.  Since the regular da code doesn't handle
145  * leaf1, we must multiplex the verifiers.
146  */
147 static void
148 xchk_da_btree_read_verify(
149         struct xfs_buf          *bp)
150 {
151         struct xfs_da_blkinfo   *info = bp->b_addr;
152
153         switch (be16_to_cpu(info->magic)) {
154         case XFS_DIR2_LEAF1_MAGIC:
155         case XFS_DIR3_LEAF1_MAGIC:
156                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
157                 bp->b_ops->verify_read(bp);
158                 return;
159         default:
160                 /*
161                  * xfs_da3_node_buf_ops already know how to handle
162                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
163                  */
164                 bp->b_ops = &xfs_da3_node_buf_ops;
165                 bp->b_ops->verify_read(bp);
166                 return;
167         }
168 }
169 static void
170 xchk_da_btree_write_verify(
171         struct xfs_buf          *bp)
172 {
173         struct xfs_da_blkinfo   *info = bp->b_addr;
174
175         switch (be16_to_cpu(info->magic)) {
176         case XFS_DIR2_LEAF1_MAGIC:
177         case XFS_DIR3_LEAF1_MAGIC:
178                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
179                 bp->b_ops->verify_write(bp);
180                 return;
181         default:
182                 /*
183                  * xfs_da3_node_buf_ops already know how to handle
184                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
185                  */
186                 bp->b_ops = &xfs_da3_node_buf_ops;
187                 bp->b_ops->verify_write(bp);
188                 return;
189         }
190 }
191 static void *
192 xchk_da_btree_verify(
193         struct xfs_buf          *bp)
194 {
195         struct xfs_da_blkinfo   *info = bp->b_addr;
196
197         switch (be16_to_cpu(info->magic)) {
198         case XFS_DIR2_LEAF1_MAGIC:
199         case XFS_DIR3_LEAF1_MAGIC:
200                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
201                 return bp->b_ops->verify_struct(bp);
202         default:
203                 bp->b_ops = &xfs_da3_node_buf_ops;
204                 return bp->b_ops->verify_struct(bp);
205         }
206 }
207
208 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
209         .name = "xchk_da_btree",
210         .verify_read = xchk_da_btree_read_verify,
211         .verify_write = xchk_da_btree_write_verify,
212         .verify_struct = xchk_da_btree_verify,
213 };
214
215 /* Check a block's sibling. */
216 STATIC int
217 xchk_da_btree_block_check_sibling(
218         struct xchk_da_btree    *ds,
219         int                     level,
220         int                     direction,
221         xfs_dablk_t             sibling)
222 {
223         struct xfs_da_state_path *path = &ds->state->path;
224         struct xfs_da_state_path *altpath = &ds->state->altpath;
225         int                     retval;
226         int                     plevel;
227         int                     error;
228
229         memcpy(altpath, path, sizeof(ds->state->altpath));
230
231         /*
232          * If the pointer is null, we shouldn't be able to move the upper
233          * level pointer anywhere.
234          */
235         if (sibling == 0) {
236                 error = xfs_da3_path_shift(ds->state, altpath, direction,
237                                 false, &retval);
238                 if (error == 0 && retval == 0)
239                         xchk_da_set_corrupt(ds, level);
240                 error = 0;
241                 goto out;
242         }
243
244         /* Move the alternate cursor one block in the direction given. */
245         error = xfs_da3_path_shift(ds->state, altpath, direction, false,
246                         &retval);
247         if (!xchk_da_process_error(ds, level, &error))
248                 goto out;
249         if (retval) {
250                 xchk_da_set_corrupt(ds, level);
251                 goto out;
252         }
253         if (altpath->blk[level].bp)
254                 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
255
256         /* Compare upper level pointer to sibling pointer. */
257         if (altpath->blk[level].blkno != sibling)
258                 xchk_da_set_corrupt(ds, level);
259
260 out:
261         /* Free all buffers in the altpath that aren't referenced from path. */
262         for (plevel = 0; plevel < altpath->active; plevel++) {
263                 if (altpath->blk[plevel].bp == NULL ||
264                     (plevel < path->active &&
265                      altpath->blk[plevel].bp == path->blk[plevel].bp))
266                         continue;
267
268                 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
269                 altpath->blk[plevel].bp = NULL;
270         }
271
272         return error;
273 }
274
275 /* Check a block's sibling pointers. */
276 STATIC int
277 xchk_da_btree_block_check_siblings(
278         struct xchk_da_btree    *ds,
279         int                     level,
280         struct xfs_da_blkinfo   *hdr)
281 {
282         xfs_dablk_t             forw;
283         xfs_dablk_t             back;
284         int                     error = 0;
285
286         forw = be32_to_cpu(hdr->forw);
287         back = be32_to_cpu(hdr->back);
288
289         /* Top level blocks should not have sibling pointers. */
290         if (level == 0) {
291                 if (forw != 0 || back != 0)
292                         xchk_da_set_corrupt(ds, level);
293                 return 0;
294         }
295
296         /*
297          * Check back (left) and forw (right) pointers.  These functions
298          * absorb error codes for us.
299          */
300         error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
301         if (error)
302                 goto out;
303         error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
304
305 out:
306         memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
307         return error;
308 }
309
310 /* Load a dir/attribute block from a btree. */
311 STATIC int
312 xchk_da_btree_block(
313         struct xchk_da_btree            *ds,
314         int                             level,
315         xfs_dablk_t                     blkno)
316 {
317         struct xfs_da_state_blk         *blk;
318         struct xfs_da_intnode           *node;
319         struct xfs_da_node_entry        *btree;
320         struct xfs_da3_blkinfo          *hdr3;
321         struct xfs_da_args              *dargs = &ds->dargs;
322         struct xfs_inode                *ip = ds->dargs.dp;
323         xfs_ino_t                       owner;
324         int                             *pmaxrecs;
325         struct xfs_da3_icnode_hdr       nodehdr;
326         int                             error = 0;
327
328         blk = &ds->state->path.blk[level];
329         ds->state->path.active = level + 1;
330
331         /* Release old block. */
332         if (blk->bp) {
333                 xfs_trans_brelse(dargs->trans, blk->bp);
334                 blk->bp = NULL;
335         }
336
337         /* Check the pointer. */
338         blk->blkno = blkno;
339         if (!xchk_da_btree_ptr_ok(ds, level, blkno))
340                 goto out_nobuf;
341
342         /* Read the buffer. */
343         error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
344                         XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
345                         &xchk_da_btree_buf_ops);
346         if (!xchk_da_process_error(ds, level, &error))
347                 goto out_nobuf;
348         if (blk->bp)
349                 xchk_buffer_recheck(ds->sc, blk->bp);
350
351         /*
352          * We didn't find a dir btree root block, which means that
353          * there's no LEAF1/LEAFN tree (at least not where it's supposed
354          * to be), so jump out now.
355          */
356         if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
357                         blk->bp == NULL)
358                 goto out_nobuf;
359
360         /* It's /not/ ok for attr trees not to have a da btree. */
361         if (blk->bp == NULL) {
362                 xchk_da_set_corrupt(ds, level);
363                 goto out_nobuf;
364         }
365
366         hdr3 = blk->bp->b_addr;
367         blk->magic = be16_to_cpu(hdr3->hdr.magic);
368         pmaxrecs = &ds->maxrecs[level];
369
370         /* We only started zeroing the header on v5 filesystems. */
371         if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
372                 xchk_da_set_corrupt(ds, level);
373
374         /* Check the owner. */
375         if (xfs_has_crc(ip->i_mount)) {
376                 owner = be64_to_cpu(hdr3->owner);
377                 if (owner != ip->i_ino)
378                         xchk_da_set_corrupt(ds, level);
379         }
380
381         /* Check the siblings. */
382         error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
383         if (error)
384                 goto out;
385
386         /* Interpret the buffer. */
387         switch (blk->magic) {
388         case XFS_ATTR_LEAF_MAGIC:
389         case XFS_ATTR3_LEAF_MAGIC:
390                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
391                                 XFS_BLFT_ATTR_LEAF_BUF);
392                 blk->magic = XFS_ATTR_LEAF_MAGIC;
393                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
394                 if (ds->tree_level != 0)
395                         xchk_da_set_corrupt(ds, level);
396                 break;
397         case XFS_DIR2_LEAFN_MAGIC:
398         case XFS_DIR3_LEAFN_MAGIC:
399                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
400                                 XFS_BLFT_DIR_LEAFN_BUF);
401                 blk->magic = XFS_DIR2_LEAFN_MAGIC;
402                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
403                 if (ds->tree_level != 0)
404                         xchk_da_set_corrupt(ds, level);
405                 break;
406         case XFS_DIR2_LEAF1_MAGIC:
407         case XFS_DIR3_LEAF1_MAGIC:
408                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
409                                 XFS_BLFT_DIR_LEAF1_BUF);
410                 blk->magic = XFS_DIR2_LEAF1_MAGIC;
411                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
412                 if (ds->tree_level != 0)
413                         xchk_da_set_corrupt(ds, level);
414                 break;
415         case XFS_DA_NODE_MAGIC:
416         case XFS_DA3_NODE_MAGIC:
417                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
418                                 XFS_BLFT_DA_NODE_BUF);
419                 blk->magic = XFS_DA_NODE_MAGIC;
420                 node = blk->bp->b_addr;
421                 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
422                 btree = nodehdr.btree;
423                 *pmaxrecs = nodehdr.count;
424                 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
425                 if (level == 0) {
426                         if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
427                                 xchk_da_set_corrupt(ds, level);
428                                 goto out_freebp;
429                         }
430                         ds->tree_level = nodehdr.level;
431                 } else {
432                         if (ds->tree_level != nodehdr.level) {
433                                 xchk_da_set_corrupt(ds, level);
434                                 goto out_freebp;
435                         }
436                 }
437
438                 /* XXX: Check hdr3.pad32 once we know how to fix it. */
439                 break;
440         default:
441                 xchk_da_set_corrupt(ds, level);
442                 goto out_freebp;
443         }
444
445         /*
446          * If we've been handed a block that is below the dabtree root, does
447          * its hashval match what the parent block expected to see?
448          */
449         if (level > 0) {
450                 struct xfs_da_node_entry        *key;
451
452                 key = xchk_da_btree_node_entry(ds, level - 1);
453                 if (be32_to_cpu(key->hashval) != blk->hashval) {
454                         xchk_da_set_corrupt(ds, level);
455                         goto out_freebp;
456                 }
457         }
458
459 out:
460         return error;
461 out_freebp:
462         xfs_trans_brelse(dargs->trans, blk->bp);
463         blk->bp = NULL;
464 out_nobuf:
465         blk->blkno = 0;
466         return error;
467 }
468
469 /* Visit all nodes and leaves of a da btree. */
470 int
471 xchk_da_btree(
472         struct xfs_scrub                *sc,
473         int                             whichfork,
474         xchk_da_btree_rec_fn            scrub_fn,
475         void                            *private)
476 {
477         struct xchk_da_btree            *ds;
478         struct xfs_mount                *mp = sc->mp;
479         struct xfs_da_state_blk         *blks;
480         struct xfs_da_node_entry        *key;
481         xfs_dablk_t                     blkno;
482         int                             level;
483         int                             error;
484
485         /* Skip short format data structures; no btree to scan. */
486         if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
487                 return 0;
488
489         /* Set up initial da state. */
490         ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
491         if (!ds)
492                 return -ENOMEM;
493         ds->dargs.dp = sc->ip;
494         ds->dargs.whichfork = whichfork;
495         ds->dargs.trans = sc->tp;
496         ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
497         ds->state = xfs_da_state_alloc(&ds->dargs);
498         ds->sc = sc;
499         ds->private = private;
500         if (whichfork == XFS_ATTR_FORK) {
501                 ds->dargs.geo = mp->m_attr_geo;
502                 ds->lowest = 0;
503                 ds->highest = 0;
504         } else {
505                 ds->dargs.geo = mp->m_dir_geo;
506                 ds->lowest = ds->dargs.geo->leafblk;
507                 ds->highest = ds->dargs.geo->freeblk;
508         }
509         blkno = ds->lowest;
510         level = 0;
511
512         /* Find the root of the da tree, if present. */
513         blks = ds->state->path.blk;
514         error = xchk_da_btree_block(ds, level, blkno);
515         if (error)
516                 goto out_state;
517         /*
518          * We didn't find a block at ds->lowest, which means that there's
519          * no LEAF1/LEAFN tree (at least not where it's supposed to be),
520          * so jump out now.
521          */
522         if (blks[level].bp == NULL)
523                 goto out_state;
524
525         blks[level].index = 0;
526         while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
527                 /* Handle leaf block. */
528                 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
529                         /* End of leaf, pop back towards the root. */
530                         if (blks[level].index >= ds->maxrecs[level]) {
531                                 if (level > 0)
532                                         blks[level - 1].index++;
533                                 ds->tree_level++;
534                                 level--;
535                                 continue;
536                         }
537
538                         /* Dispatch record scrubbing. */
539                         error = scrub_fn(ds, level);
540                         if (error)
541                                 break;
542                         if (xchk_should_terminate(sc, &error) ||
543                             (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
544                                 break;
545
546                         blks[level].index++;
547                         continue;
548                 }
549
550
551                 /* End of node, pop back towards the root. */
552                 if (blks[level].index >= ds->maxrecs[level]) {
553                         if (level > 0)
554                                 blks[level - 1].index++;
555                         ds->tree_level++;
556                         level--;
557                         continue;
558                 }
559
560                 /* Hashes in order for scrub? */
561                 key = xchk_da_btree_node_entry(ds, level);
562                 error = xchk_da_btree_hash(ds, level, &key->hashval);
563                 if (error)
564                         goto out;
565
566                 /* Drill another level deeper. */
567                 blkno = be32_to_cpu(key->before);
568                 level++;
569                 if (level >= XFS_DA_NODE_MAXDEPTH) {
570                         /* Too deep! */
571                         xchk_da_set_corrupt(ds, level - 1);
572                         break;
573                 }
574                 ds->tree_level--;
575                 error = xchk_da_btree_block(ds, level, blkno);
576                 if (error)
577                         goto out;
578                 if (blks[level].bp == NULL)
579                         goto out;
580
581                 blks[level].index = 0;
582         }
583
584 out:
585         /* Release all the buffers we're tracking. */
586         for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
587                 if (blks[level].bp == NULL)
588                         continue;
589                 xfs_trans_brelse(sc->tp, blks[level].bp);
590                 blks[level].bp = NULL;
591         }
592
593 out_state:
594         xfs_da_state_free(ds->state);
595         kfree(ds);
596         return error;
597 }