1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_alloc_btree.h"
16 #include "xfs_alloc.h"
17 #include "xfs_extent_busy.h"
18 #include "xfs_error.h"
19 #include "xfs_health.h"
20 #include "xfs_trace.h"
21 #include "xfs_trans.h"
24 static struct kmem_cache *xfs_allocbt_cur_cache;
26 STATIC struct xfs_btree_cur *
28 struct xfs_btree_cur *cur)
30 return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
34 STATIC struct xfs_btree_cur *
36 struct xfs_btree_cur *cur)
38 return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
45 struct xfs_btree_cur *cur,
46 const union xfs_btree_ptr *ptr,
49 struct xfs_buf *agbp = cur->bc_ag.agbp;
50 struct xfs_agf *agf = agbp->b_addr;
54 if (xfs_btree_is_bno(cur->bc_ops)) {
55 agf->agf_bno_root = ptr->s;
56 be32_add_cpu(&agf->agf_bno_level, inc);
57 cur->bc_ag.pag->pagf_bno_level += inc;
59 agf->agf_cnt_root = ptr->s;
60 be32_add_cpu(&agf->agf_cnt_level, inc);
61 cur->bc_ag.pag->pagf_cnt_level += inc;
64 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
68 xfs_allocbt_alloc_block(
69 struct xfs_btree_cur *cur,
70 const union xfs_btree_ptr *start,
71 union xfs_btree_ptr *new,
77 /* Allocate the new block from the freelist. If we can't, give up. */
78 error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp,
79 cur->bc_ag.agbp, &bno, 1);
83 if (bno == NULLAGBLOCK) {
88 atomic64_inc(&cur->bc_mp->m_allocbt_blks);
89 xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false);
91 new->s = cpu_to_be32(bno);
98 xfs_allocbt_free_block(
99 struct xfs_btree_cur *cur,
102 struct xfs_buf *agbp = cur->bc_ag.agbp;
106 bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
107 error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL,
112 atomic64_dec(&cur->bc_mp->m_allocbt_blks);
113 xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1,
114 XFS_EXTENT_BUSY_SKIP_DISCARD);
119 * Update the longest extent in the AGF
122 xfs_allocbt_update_lastrec(
123 struct xfs_btree_cur *cur,
124 const struct xfs_btree_block *block,
125 const union xfs_btree_rec *rec,
129 struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
130 struct xfs_perag *pag;
134 ASSERT(!xfs_btree_is_bno(cur->bc_ops));
139 * If this is the last leaf block and it's the last record,
140 * then update the size of the longest extent in the AG.
142 if (ptr != xfs_btree_get_numrecs(block))
144 len = rec->alloc.ar_blockcount;
147 if (be32_to_cpu(rec->alloc.ar_blockcount) <=
148 be32_to_cpu(agf->agf_longest))
150 len = rec->alloc.ar_blockcount;
153 numrecs = xfs_btree_get_numrecs(block);
156 ASSERT(ptr == numrecs + 1);
159 xfs_alloc_rec_t *rrp;
161 rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
162 len = rrp->ar_blockcount;
173 agf->agf_longest = len;
174 pag = cur->bc_ag.agbp->b_pag;
175 pag->pagf_longest = be32_to_cpu(len);
176 xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST);
180 xfs_allocbt_get_minrecs(
181 struct xfs_btree_cur *cur,
184 return cur->bc_mp->m_alloc_mnr[level != 0];
188 xfs_allocbt_get_maxrecs(
189 struct xfs_btree_cur *cur,
192 return cur->bc_mp->m_alloc_mxr[level != 0];
196 xfs_allocbt_init_key_from_rec(
197 union xfs_btree_key *key,
198 const union xfs_btree_rec *rec)
200 key->alloc.ar_startblock = rec->alloc.ar_startblock;
201 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
205 xfs_bnobt_init_high_key_from_rec(
206 union xfs_btree_key *key,
207 const union xfs_btree_rec *rec)
211 x = be32_to_cpu(rec->alloc.ar_startblock);
212 x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
213 key->alloc.ar_startblock = cpu_to_be32(x);
214 key->alloc.ar_blockcount = 0;
218 xfs_cntbt_init_high_key_from_rec(
219 union xfs_btree_key *key,
220 const union xfs_btree_rec *rec)
222 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
223 key->alloc.ar_startblock = 0;
227 xfs_allocbt_init_rec_from_cur(
228 struct xfs_btree_cur *cur,
229 union xfs_btree_rec *rec)
231 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
232 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
236 xfs_allocbt_init_ptr_from_cur(
237 struct xfs_btree_cur *cur,
238 union xfs_btree_ptr *ptr)
240 struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
242 ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
244 if (xfs_btree_is_bno(cur->bc_ops))
245 ptr->s = agf->agf_bno_root;
247 ptr->s = agf->agf_cnt_root;
252 struct xfs_btree_cur *cur,
253 const union xfs_btree_key *key)
255 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
256 const struct xfs_alloc_rec *kp = &key->alloc;
258 return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
263 struct xfs_btree_cur *cur,
264 const union xfs_btree_key *key)
266 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
267 const struct xfs_alloc_rec *kp = &key->alloc;
270 diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
274 return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
278 xfs_bnobt_diff_two_keys(
279 struct xfs_btree_cur *cur,
280 const union xfs_btree_key *k1,
281 const union xfs_btree_key *k2,
282 const union xfs_btree_key *mask)
284 ASSERT(!mask || mask->alloc.ar_startblock);
286 return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
287 be32_to_cpu(k2->alloc.ar_startblock);
291 xfs_cntbt_diff_two_keys(
292 struct xfs_btree_cur *cur,
293 const union xfs_btree_key *k1,
294 const union xfs_btree_key *k2,
295 const union xfs_btree_key *mask)
299 ASSERT(!mask || (mask->alloc.ar_blockcount &&
300 mask->alloc.ar_startblock));
302 diff = be32_to_cpu(k1->alloc.ar_blockcount) -
303 be32_to_cpu(k2->alloc.ar_blockcount);
307 return be32_to_cpu(k1->alloc.ar_startblock) -
308 be32_to_cpu(k2->alloc.ar_startblock);
311 static xfs_failaddr_t
315 struct xfs_mount *mp = bp->b_mount;
316 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
317 struct xfs_perag *pag = bp->b_pag;
321 if (!xfs_verify_magic(bp, block->bb_magic))
322 return __this_address;
324 if (xfs_has_crc(mp)) {
325 fa = xfs_btree_agblock_v5hdr_verify(bp);
331 * The perag may not be attached during grow operations or fully
332 * initialized from the AGF during log recovery. Therefore we can only
333 * check against maximum tree depth from those contexts.
335 * Otherwise check against the per-tree limit. Peek at one of the
336 * verifier magic values to determine the type of tree we're verifying
339 level = be16_to_cpu(block->bb_level);
340 if (pag && xfs_perag_initialised_agf(pag)) {
341 unsigned int maxlevel, repair_maxlevel = 0;
344 * Online repair could be rewriting the free space btrees, so
345 * we'll validate against the larger of either tree while this
348 if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) {
349 maxlevel = pag->pagf_cnt_level;
350 #ifdef CONFIG_XFS_ONLINE_REPAIR
351 repair_maxlevel = pag->pagf_repair_cnt_level;
354 maxlevel = pag->pagf_bno_level;
355 #ifdef CONFIG_XFS_ONLINE_REPAIR
356 repair_maxlevel = pag->pagf_repair_bno_level;
360 if (level >= max(maxlevel, repair_maxlevel))
361 return __this_address;
362 } else if (level >= mp->m_alloc_maxlevels)
363 return __this_address;
365 return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]);
369 xfs_allocbt_read_verify(
374 if (!xfs_btree_agblock_verify_crc(bp))
375 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
377 fa = xfs_allocbt_verify(bp);
379 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
383 trace_xfs_btree_corrupt(bp, _RET_IP_);
387 xfs_allocbt_write_verify(
392 fa = xfs_allocbt_verify(bp);
394 trace_xfs_btree_corrupt(bp, _RET_IP_);
395 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
398 xfs_btree_agblock_calc_crc(bp);
402 const struct xfs_buf_ops xfs_bnobt_buf_ops = {
404 .magic = { cpu_to_be32(XFS_ABTB_MAGIC),
405 cpu_to_be32(XFS_ABTB_CRC_MAGIC) },
406 .verify_read = xfs_allocbt_read_verify,
407 .verify_write = xfs_allocbt_write_verify,
408 .verify_struct = xfs_allocbt_verify,
411 const struct xfs_buf_ops xfs_cntbt_buf_ops = {
413 .magic = { cpu_to_be32(XFS_ABTC_MAGIC),
414 cpu_to_be32(XFS_ABTC_CRC_MAGIC) },
415 .verify_read = xfs_allocbt_read_verify,
416 .verify_write = xfs_allocbt_write_verify,
417 .verify_struct = xfs_allocbt_verify,
421 xfs_bnobt_keys_inorder(
422 struct xfs_btree_cur *cur,
423 const union xfs_btree_key *k1,
424 const union xfs_btree_key *k2)
426 return be32_to_cpu(k1->alloc.ar_startblock) <
427 be32_to_cpu(k2->alloc.ar_startblock);
431 xfs_bnobt_recs_inorder(
432 struct xfs_btree_cur *cur,
433 const union xfs_btree_rec *r1,
434 const union xfs_btree_rec *r2)
436 return be32_to_cpu(r1->alloc.ar_startblock) +
437 be32_to_cpu(r1->alloc.ar_blockcount) <=
438 be32_to_cpu(r2->alloc.ar_startblock);
442 xfs_cntbt_keys_inorder(
443 struct xfs_btree_cur *cur,
444 const union xfs_btree_key *k1,
445 const union xfs_btree_key *k2)
447 return be32_to_cpu(k1->alloc.ar_blockcount) <
448 be32_to_cpu(k2->alloc.ar_blockcount) ||
449 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
450 be32_to_cpu(k1->alloc.ar_startblock) <
451 be32_to_cpu(k2->alloc.ar_startblock));
455 xfs_cntbt_recs_inorder(
456 struct xfs_btree_cur *cur,
457 const union xfs_btree_rec *r1,
458 const union xfs_btree_rec *r2)
460 return be32_to_cpu(r1->alloc.ar_blockcount) <
461 be32_to_cpu(r2->alloc.ar_blockcount) ||
462 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
463 be32_to_cpu(r1->alloc.ar_startblock) <
464 be32_to_cpu(r2->alloc.ar_startblock));
467 STATIC enum xbtree_key_contig
468 xfs_allocbt_keys_contiguous(
469 struct xfs_btree_cur *cur,
470 const union xfs_btree_key *key1,
471 const union xfs_btree_key *key2,
472 const union xfs_btree_key *mask)
474 ASSERT(!mask || mask->alloc.ar_startblock);
476 return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock),
477 be32_to_cpu(key2->alloc.ar_startblock));
480 const struct xfs_btree_ops xfs_bnobt_ops = {
482 .type = XFS_BTREE_TYPE_AG,
484 .rec_len = sizeof(xfs_alloc_rec_t),
485 .key_len = sizeof(xfs_alloc_key_t),
486 .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
488 .lru_refs = XFS_ALLOC_BTREE_REF,
489 .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2),
490 .sick_mask = XFS_SICK_AG_BNOBT,
492 .dup_cursor = xfs_bnobt_dup_cursor,
493 .set_root = xfs_allocbt_set_root,
494 .alloc_block = xfs_allocbt_alloc_block,
495 .free_block = xfs_allocbt_free_block,
496 .update_lastrec = xfs_allocbt_update_lastrec,
497 .get_minrecs = xfs_allocbt_get_minrecs,
498 .get_maxrecs = xfs_allocbt_get_maxrecs,
499 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
500 .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
501 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
502 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
503 .key_diff = xfs_bnobt_key_diff,
504 .buf_ops = &xfs_bnobt_buf_ops,
505 .diff_two_keys = xfs_bnobt_diff_two_keys,
506 .keys_inorder = xfs_bnobt_keys_inorder,
507 .recs_inorder = xfs_bnobt_recs_inorder,
508 .keys_contiguous = xfs_allocbt_keys_contiguous,
511 const struct xfs_btree_ops xfs_cntbt_ops = {
513 .type = XFS_BTREE_TYPE_AG,
514 .geom_flags = XFS_BTGEO_LASTREC_UPDATE,
516 .rec_len = sizeof(xfs_alloc_rec_t),
517 .key_len = sizeof(xfs_alloc_key_t),
518 .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
520 .lru_refs = XFS_ALLOC_BTREE_REF,
521 .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2),
522 .sick_mask = XFS_SICK_AG_CNTBT,
524 .dup_cursor = xfs_cntbt_dup_cursor,
525 .set_root = xfs_allocbt_set_root,
526 .alloc_block = xfs_allocbt_alloc_block,
527 .free_block = xfs_allocbt_free_block,
528 .update_lastrec = xfs_allocbt_update_lastrec,
529 .get_minrecs = xfs_allocbt_get_minrecs,
530 .get_maxrecs = xfs_allocbt_get_maxrecs,
531 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
532 .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
533 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
534 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
535 .key_diff = xfs_cntbt_key_diff,
536 .buf_ops = &xfs_cntbt_buf_ops,
537 .diff_two_keys = xfs_cntbt_diff_two_keys,
538 .keys_inorder = xfs_cntbt_keys_inorder,
539 .recs_inorder = xfs_cntbt_recs_inorder,
540 .keys_contiguous = NULL, /* not needed right now */
544 * Allocate a new bnobt cursor.
546 * For staging cursors tp and agbp are NULL.
548 struct xfs_btree_cur *
549 xfs_bnobt_init_cursor(
550 struct xfs_mount *mp,
551 struct xfs_trans *tp,
552 struct xfs_buf *agbp,
553 struct xfs_perag *pag)
555 struct xfs_btree_cur *cur;
557 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops,
558 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
559 cur->bc_ag.pag = xfs_perag_hold(pag);
560 cur->bc_ag.agbp = agbp;
562 struct xfs_agf *agf = agbp->b_addr;
564 cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level);
570 * Allocate a new cntbt cursor.
572 * For staging cursors tp and agbp are NULL.
574 struct xfs_btree_cur *
575 xfs_cntbt_init_cursor(
576 struct xfs_mount *mp,
577 struct xfs_trans *tp,
578 struct xfs_buf *agbp,
579 struct xfs_perag *pag)
581 struct xfs_btree_cur *cur;
583 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops,
584 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
585 cur->bc_ag.pag = xfs_perag_hold(pag);
586 cur->bc_ag.agbp = agbp;
588 struct xfs_agf *agf = agbp->b_addr;
590 cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level);
596 * Install a new free space btree root. Caller is responsible for invalidating
597 * and freeing the old btree blocks.
600 xfs_allocbt_commit_staged_btree(
601 struct xfs_btree_cur *cur,
602 struct xfs_trans *tp,
603 struct xfs_buf *agbp)
605 struct xfs_agf *agf = agbp->b_addr;
606 struct xbtree_afakeroot *afake = cur->bc_ag.afake;
608 ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
610 if (xfs_btree_is_bno(cur->bc_ops)) {
611 agf->agf_bno_root = cpu_to_be32(afake->af_root);
612 agf->agf_bno_level = cpu_to_be32(afake->af_levels);
614 agf->agf_cnt_root = cpu_to_be32(afake->af_root);
615 agf->agf_cnt_level = cpu_to_be32(afake->af_levels);
617 xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
619 xfs_btree_commit_afakeroot(cur, tp, agbp);
622 /* Calculate number of records in an alloc btree block. */
623 static inline unsigned int
624 xfs_allocbt_block_maxrecs(
625 unsigned int blocklen,
629 return blocklen / sizeof(xfs_alloc_rec_t);
630 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
634 * Calculate number of records in an alloc btree block.
638 struct xfs_mount *mp,
642 blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
643 return xfs_allocbt_block_maxrecs(blocklen, leaf);
646 /* Free space btrees are at their largest when every other block is free. */
647 #define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2)
649 /* Compute the max possible height for free space btrees. */
651 xfs_allocbt_maxlevels_ondisk(void)
653 unsigned int minrecs[2];
654 unsigned int blocklen;
656 blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN,
657 XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN);
659 minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2;
660 minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2;
662 return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS);
665 /* Calculate the freespace btree size for some records. */
667 xfs_allocbt_calc_size(
668 struct xfs_mount *mp,
669 unsigned long long len)
671 return xfs_btree_calc_size(mp->m_alloc_mnr, len);
675 xfs_allocbt_init_cur_cache(void)
677 xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur",
678 xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()),
681 if (!xfs_allocbt_cur_cache)
687 xfs_allocbt_destroy_cur_cache(void)
689 kmem_cache_destroy(xfs_allocbt_cur_cache);
690 xfs_allocbt_cur_cache = NULL;