Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
[linux-2.6-microblaze.git] / fs / xfs / scrub / dabtree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22
23 /* Directory/Attribute Btree */
24
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31         struct xchk_da_btree    *ds,
32         int                     level,
33         int                     *error)
34 {
35         struct xfs_scrub        *sc = ds->sc;
36
37         if (*error == 0)
38                 return true;
39
40         switch (*error) {
41         case -EDEADLOCK:
42                 /* Used to restart an op with deadlock avoidance. */
43                 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44                 break;
45         case -EFSBADCRC:
46         case -EFSCORRUPTED:
47                 /* Note the badness but don't abort. */
48                 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
49                 *error = 0;
50                 fallthrough;
51         default:
52                 trace_xchk_file_op_error(sc, ds->dargs.whichfork,
53                                 xfs_dir2_da_to_db(ds->dargs.geo,
54                                         ds->state->path.blk[level].blkno),
55                                 *error, __return_address);
56                 break;
57         }
58         return false;
59 }
60
61 /*
62  * Check for da btree corruption.  See the section about handling
63  * operational errors in common.c.
64  */
65 void
66 xchk_da_set_corrupt(
67         struct xchk_da_btree    *ds,
68         int                     level)
69 {
70         struct xfs_scrub        *sc = ds->sc;
71
72         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
73
74         trace_xchk_fblock_error(sc, ds->dargs.whichfork,
75                         xfs_dir2_da_to_db(ds->dargs.geo,
76                                 ds->state->path.blk[level].blkno),
77                         __return_address);
78 }
79
80 static struct xfs_da_node_entry *
81 xchk_da_btree_node_entry(
82         struct xchk_da_btree            *ds,
83         int                             level)
84 {
85         struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
86         struct xfs_da3_icnode_hdr       hdr;
87
88         ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
89
90         xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
91         return hdr.btree + blk->index;
92 }
93
94 /* Scrub a da btree hash (key). */
95 int
96 xchk_da_btree_hash(
97         struct xchk_da_btree            *ds,
98         int                             level,
99         __be32                          *hashp)
100 {
101         struct xfs_da_node_entry        *entry;
102         xfs_dahash_t                    hash;
103         xfs_dahash_t                    parent_hash;
104
105         /* Is this hash in order? */
106         hash = be32_to_cpu(*hashp);
107         if (hash < ds->hashes[level])
108                 xchk_da_set_corrupt(ds, level);
109         ds->hashes[level] = hash;
110
111         if (level == 0)
112                 return 0;
113
114         /* Is this hash no larger than the parent hash? */
115         entry = xchk_da_btree_node_entry(ds, level - 1);
116         parent_hash = be32_to_cpu(entry->hashval);
117         if (parent_hash < hash)
118                 xchk_da_set_corrupt(ds, level);
119
120         return 0;
121 }
122
123 /*
124  * Check a da btree pointer.  Returns true if it's ok to use this
125  * pointer.
126  */
127 STATIC bool
128 xchk_da_btree_ptr_ok(
129         struct xchk_da_btree    *ds,
130         int                     level,
131         xfs_dablk_t             blkno)
132 {
133         if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
134                 xchk_da_set_corrupt(ds, level);
135                 return false;
136         }
137
138         return true;
139 }
140
141 /*
142  * The da btree scrubber can handle leaf1 blocks as a degenerate
143  * form of leafn blocks.  Since the regular da code doesn't handle
144  * leaf1, we must multiplex the verifiers.
145  */
146 static void
147 xchk_da_btree_read_verify(
148         struct xfs_buf          *bp)
149 {
150         struct xfs_da_blkinfo   *info = bp->b_addr;
151
152         switch (be16_to_cpu(info->magic)) {
153         case XFS_DIR2_LEAF1_MAGIC:
154         case XFS_DIR3_LEAF1_MAGIC:
155                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
156                 bp->b_ops->verify_read(bp);
157                 return;
158         default:
159                 /*
160                  * xfs_da3_node_buf_ops already know how to handle
161                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
162                  */
163                 bp->b_ops = &xfs_da3_node_buf_ops;
164                 bp->b_ops->verify_read(bp);
165                 return;
166         }
167 }
168 static void
169 xchk_da_btree_write_verify(
170         struct xfs_buf          *bp)
171 {
172         struct xfs_da_blkinfo   *info = bp->b_addr;
173
174         switch (be16_to_cpu(info->magic)) {
175         case XFS_DIR2_LEAF1_MAGIC:
176         case XFS_DIR3_LEAF1_MAGIC:
177                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
178                 bp->b_ops->verify_write(bp);
179                 return;
180         default:
181                 /*
182                  * xfs_da3_node_buf_ops already know how to handle
183                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
184                  */
185                 bp->b_ops = &xfs_da3_node_buf_ops;
186                 bp->b_ops->verify_write(bp);
187                 return;
188         }
189 }
190 static void *
191 xchk_da_btree_verify(
192         struct xfs_buf          *bp)
193 {
194         struct xfs_da_blkinfo   *info = bp->b_addr;
195
196         switch (be16_to_cpu(info->magic)) {
197         case XFS_DIR2_LEAF1_MAGIC:
198         case XFS_DIR3_LEAF1_MAGIC:
199                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
200                 return bp->b_ops->verify_struct(bp);
201         default:
202                 bp->b_ops = &xfs_da3_node_buf_ops;
203                 return bp->b_ops->verify_struct(bp);
204         }
205 }
206
207 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
208         .name = "xchk_da_btree",
209         .verify_read = xchk_da_btree_read_verify,
210         .verify_write = xchk_da_btree_write_verify,
211         .verify_struct = xchk_da_btree_verify,
212 };
213
214 /* Check a block's sibling. */
215 STATIC int
216 xchk_da_btree_block_check_sibling(
217         struct xchk_da_btree    *ds,
218         int                     level,
219         int                     direction,
220         xfs_dablk_t             sibling)
221 {
222         struct xfs_da_state_path *path = &ds->state->path;
223         struct xfs_da_state_path *altpath = &ds->state->altpath;
224         int                     retval;
225         int                     plevel;
226         int                     error;
227
228         memcpy(altpath, path, sizeof(ds->state->altpath));
229
230         /*
231          * If the pointer is null, we shouldn't be able to move the upper
232          * level pointer anywhere.
233          */
234         if (sibling == 0) {
235                 error = xfs_da3_path_shift(ds->state, altpath, direction,
236                                 false, &retval);
237                 if (error == 0 && retval == 0)
238                         xchk_da_set_corrupt(ds, level);
239                 error = 0;
240                 goto out;
241         }
242
243         /* Move the alternate cursor one block in the direction given. */
244         error = xfs_da3_path_shift(ds->state, altpath, direction, false,
245                         &retval);
246         if (!xchk_da_process_error(ds, level, &error))
247                 goto out;
248         if (retval) {
249                 xchk_da_set_corrupt(ds, level);
250                 goto out;
251         }
252         if (altpath->blk[level].bp)
253                 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
254
255         /* Compare upper level pointer to sibling pointer. */
256         if (altpath->blk[level].blkno != sibling)
257                 xchk_da_set_corrupt(ds, level);
258
259 out:
260         /* Free all buffers in the altpath that aren't referenced from path. */
261         for (plevel = 0; plevel < altpath->active; plevel++) {
262                 if (altpath->blk[plevel].bp == NULL ||
263                     (plevel < path->active &&
264                      altpath->blk[plevel].bp == path->blk[plevel].bp))
265                         continue;
266
267                 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
268                 altpath->blk[plevel].bp = NULL;
269         }
270
271         return error;
272 }
273
274 /* Check a block's sibling pointers. */
275 STATIC int
276 xchk_da_btree_block_check_siblings(
277         struct xchk_da_btree    *ds,
278         int                     level,
279         struct xfs_da_blkinfo   *hdr)
280 {
281         xfs_dablk_t             forw;
282         xfs_dablk_t             back;
283         int                     error = 0;
284
285         forw = be32_to_cpu(hdr->forw);
286         back = be32_to_cpu(hdr->back);
287
288         /* Top level blocks should not have sibling pointers. */
289         if (level == 0) {
290                 if (forw != 0 || back != 0)
291                         xchk_da_set_corrupt(ds, level);
292                 return 0;
293         }
294
295         /*
296          * Check back (left) and forw (right) pointers.  These functions
297          * absorb error codes for us.
298          */
299         error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
300         if (error)
301                 goto out;
302         error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
303
304 out:
305         memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
306         return error;
307 }
308
309 /* Load a dir/attribute block from a btree. */
310 STATIC int
311 xchk_da_btree_block(
312         struct xchk_da_btree            *ds,
313         int                             level,
314         xfs_dablk_t                     blkno)
315 {
316         struct xfs_da_state_blk         *blk;
317         struct xfs_da_intnode           *node;
318         struct xfs_da_node_entry        *btree;
319         struct xfs_da3_blkinfo          *hdr3;
320         struct xfs_da_args              *dargs = &ds->dargs;
321         struct xfs_inode                *ip = ds->dargs.dp;
322         xfs_ino_t                       owner;
323         int                             *pmaxrecs;
324         struct xfs_da3_icnode_hdr       nodehdr;
325         int                             error = 0;
326
327         blk = &ds->state->path.blk[level];
328         ds->state->path.active = level + 1;
329
330         /* Release old block. */
331         if (blk->bp) {
332                 xfs_trans_brelse(dargs->trans, blk->bp);
333                 blk->bp = NULL;
334         }
335
336         /* Check the pointer. */
337         blk->blkno = blkno;
338         if (!xchk_da_btree_ptr_ok(ds, level, blkno))
339                 goto out_nobuf;
340
341         /* Read the buffer. */
342         error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
343                         XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
344                         &xchk_da_btree_buf_ops);
345         if (!xchk_da_process_error(ds, level, &error))
346                 goto out_nobuf;
347         if (blk->bp)
348                 xchk_buffer_recheck(ds->sc, blk->bp);
349
350         /*
351          * We didn't find a dir btree root block, which means that
352          * there's no LEAF1/LEAFN tree (at least not where it's supposed
353          * to be), so jump out now.
354          */
355         if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
356                         blk->bp == NULL)
357                 goto out_nobuf;
358
359         /* It's /not/ ok for attr trees not to have a da btree. */
360         if (blk->bp == NULL) {
361                 xchk_da_set_corrupt(ds, level);
362                 goto out_nobuf;
363         }
364
365         hdr3 = blk->bp->b_addr;
366         blk->magic = be16_to_cpu(hdr3->hdr.magic);
367         pmaxrecs = &ds->maxrecs[level];
368
369         /* We only started zeroing the header on v5 filesystems. */
370         if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
371                 xchk_da_set_corrupt(ds, level);
372
373         /* Check the owner. */
374         if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
375                 owner = be64_to_cpu(hdr3->owner);
376                 if (owner != ip->i_ino)
377                         xchk_da_set_corrupt(ds, level);
378         }
379
380         /* Check the siblings. */
381         error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
382         if (error)
383                 goto out;
384
385         /* Interpret the buffer. */
386         switch (blk->magic) {
387         case XFS_ATTR_LEAF_MAGIC:
388         case XFS_ATTR3_LEAF_MAGIC:
389                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
390                                 XFS_BLFT_ATTR_LEAF_BUF);
391                 blk->magic = XFS_ATTR_LEAF_MAGIC;
392                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
393                 if (ds->tree_level != 0)
394                         xchk_da_set_corrupt(ds, level);
395                 break;
396         case XFS_DIR2_LEAFN_MAGIC:
397         case XFS_DIR3_LEAFN_MAGIC:
398                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
399                                 XFS_BLFT_DIR_LEAFN_BUF);
400                 blk->magic = XFS_DIR2_LEAFN_MAGIC;
401                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
402                 if (ds->tree_level != 0)
403                         xchk_da_set_corrupt(ds, level);
404                 break;
405         case XFS_DIR2_LEAF1_MAGIC:
406         case XFS_DIR3_LEAF1_MAGIC:
407                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
408                                 XFS_BLFT_DIR_LEAF1_BUF);
409                 blk->magic = XFS_DIR2_LEAF1_MAGIC;
410                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
411                 if (ds->tree_level != 0)
412                         xchk_da_set_corrupt(ds, level);
413                 break;
414         case XFS_DA_NODE_MAGIC:
415         case XFS_DA3_NODE_MAGIC:
416                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
417                                 XFS_BLFT_DA_NODE_BUF);
418                 blk->magic = XFS_DA_NODE_MAGIC;
419                 node = blk->bp->b_addr;
420                 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
421                 btree = nodehdr.btree;
422                 *pmaxrecs = nodehdr.count;
423                 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
424                 if (level == 0) {
425                         if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
426                                 xchk_da_set_corrupt(ds, level);
427                                 goto out_freebp;
428                         }
429                         ds->tree_level = nodehdr.level;
430                 } else {
431                         if (ds->tree_level != nodehdr.level) {
432                                 xchk_da_set_corrupt(ds, level);
433                                 goto out_freebp;
434                         }
435                 }
436
437                 /* XXX: Check hdr3.pad32 once we know how to fix it. */
438                 break;
439         default:
440                 xchk_da_set_corrupt(ds, level);
441                 goto out_freebp;
442         }
443
444         /*
445          * If we've been handed a block that is below the dabtree root, does
446          * its hashval match what the parent block expected to see?
447          */
448         if (level > 0) {
449                 struct xfs_da_node_entry        *key;
450
451                 key = xchk_da_btree_node_entry(ds, level - 1);
452                 if (be32_to_cpu(key->hashval) != blk->hashval) {
453                         xchk_da_set_corrupt(ds, level);
454                         goto out_freebp;
455                 }
456         }
457
458 out:
459         return error;
460 out_freebp:
461         xfs_trans_brelse(dargs->trans, blk->bp);
462         blk->bp = NULL;
463 out_nobuf:
464         blk->blkno = 0;
465         return error;
466 }
467
468 /* Visit all nodes and leaves of a da btree. */
469 int
470 xchk_da_btree(
471         struct xfs_scrub                *sc,
472         int                             whichfork,
473         xchk_da_btree_rec_fn            scrub_fn,
474         void                            *private)
475 {
476         struct xchk_da_btree            ds = {};
477         struct xfs_mount                *mp = sc->mp;
478         struct xfs_da_state_blk         *blks;
479         struct xfs_da_node_entry        *key;
480         xfs_dablk_t                     blkno;
481         int                             level;
482         int                             error;
483
484         /* Skip short format data structures; no btree to scan. */
485         if (!xfs_ifork_has_extents(XFS_IFORK_PTR(sc->ip, whichfork)))
486                 return 0;
487
488         /* Set up initial da state. */
489         ds.dargs.dp = sc->ip;
490         ds.dargs.whichfork = whichfork;
491         ds.dargs.trans = sc->tp;
492         ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
493         ds.state = xfs_da_state_alloc(&ds.dargs);
494         ds.sc = sc;
495         ds.private = private;
496         if (whichfork == XFS_ATTR_FORK) {
497                 ds.dargs.geo = mp->m_attr_geo;
498                 ds.lowest = 0;
499                 ds.highest = 0;
500         } else {
501                 ds.dargs.geo = mp->m_dir_geo;
502                 ds.lowest = ds.dargs.geo->leafblk;
503                 ds.highest = ds.dargs.geo->freeblk;
504         }
505         blkno = ds.lowest;
506         level = 0;
507
508         /* Find the root of the da tree, if present. */
509         blks = ds.state->path.blk;
510         error = xchk_da_btree_block(&ds, level, blkno);
511         if (error)
512                 goto out_state;
513         /*
514          * We didn't find a block at ds.lowest, which means that there's
515          * no LEAF1/LEAFN tree (at least not where it's supposed to be),
516          * so jump out now.
517          */
518         if (blks[level].bp == NULL)
519                 goto out_state;
520
521         blks[level].index = 0;
522         while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
523                 /* Handle leaf block. */
524                 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
525                         /* End of leaf, pop back towards the root. */
526                         if (blks[level].index >= ds.maxrecs[level]) {
527                                 if (level > 0)
528                                         blks[level - 1].index++;
529                                 ds.tree_level++;
530                                 level--;
531                                 continue;
532                         }
533
534                         /* Dispatch record scrubbing. */
535                         error = scrub_fn(&ds, level);
536                         if (error)
537                                 break;
538                         if (xchk_should_terminate(sc, &error) ||
539                             (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
540                                 break;
541
542                         blks[level].index++;
543                         continue;
544                 }
545
546
547                 /* End of node, pop back towards the root. */
548                 if (blks[level].index >= ds.maxrecs[level]) {
549                         if (level > 0)
550                                 blks[level - 1].index++;
551                         ds.tree_level++;
552                         level--;
553                         continue;
554                 }
555
556                 /* Hashes in order for scrub? */
557                 key = xchk_da_btree_node_entry(&ds, level);
558                 error = xchk_da_btree_hash(&ds, level, &key->hashval);
559                 if (error)
560                         goto out;
561
562                 /* Drill another level deeper. */
563                 blkno = be32_to_cpu(key->before);
564                 level++;
565                 if (level >= XFS_DA_NODE_MAXDEPTH) {
566                         /* Too deep! */
567                         xchk_da_set_corrupt(&ds, level - 1);
568                         break;
569                 }
570                 ds.tree_level--;
571                 error = xchk_da_btree_block(&ds, level, blkno);
572                 if (error)
573                         goto out;
574                 if (blks[level].bp == NULL)
575                         goto out;
576
577                 blks[level].index = 0;
578         }
579
580 out:
581         /* Release all the buffers we're tracking. */
582         for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
583                 if (blks[level].bp == NULL)
584                         continue;
585                 xfs_trans_brelse(sc->tp, blks[level].bp);
586                 blks[level].bp = NULL;
587         }
588
589 out_state:
590         xfs_da_state_free(ds.state);
591         return error;
592 }