fs/xfs/xfs_mount.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_bit.h"
  13 #include "xfs_sb.h"
  14 #include "xfs_mount.h"
  15 #include "xfs_inode.h"
  16 #include "xfs_dir2.h"
  17 #include "xfs_ialloc.h"
  18 #include "xfs_alloc.h"
  19 #include "xfs_rtalloc.h"
  20 #include "xfs_bmap.h"
  21 #include "xfs_trans.h"
  22 #include "xfs_trans_priv.h"
  23 #include "xfs_log.h"
  24 #include "xfs_error.h"
  25 #include "xfs_quota.h"
  26 #include "xfs_fsops.h"
  27 #include "xfs_icache.h"
  28 #include "xfs_sysfs.h"
  29 #include "xfs_rmap_btree.h"
  30 #include "xfs_refcount_btree.h"
  31 #include "xfs_reflink.h"
  32 #include "xfs_extent_busy.h"
  33 #include "xfs_health.h"
  34 #include "xfs_trace.h"
  35
  36 static DEFINE_MUTEX(xfs_uuid_table_mutex);
  37 static int xfs_uuid_table_size;
  38 static uuid_t *xfs_uuid_table;
  39
  40 void
  41 xfs_uuid_table_free(void)
  42 {
  43         if (xfs_uuid_table_size == 0)
  44                 return;
  45         kmem_free(xfs_uuid_table);
  46         xfs_uuid_table = NULL;
  47         xfs_uuid_table_size = 0;
  48 }
  49
  50 /*
  51  * See if the UUID is unique among mounted XFS filesystems.
  52  * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
  53  */
  54 STATIC int
  55 xfs_uuid_mount(
  56         struct xfs_mount        *mp)
  57 {
  58         uuid_t                  *uuid = &mp->m_sb.sb_uuid;
  59         int                     hole, i;
  60
  61         /* Publish UUID in struct super_block */
  62         uuid_copy(&mp->m_super->s_uuid, uuid);
  63
  64         if (mp->m_flags & XFS_MOUNT_NOUUID)
  65                 return 0;
  66
  67         if (uuid_is_null(uuid)) {
  68                 xfs_warn(mp, "Filesystem has null UUID - can't mount");
  69                 return -EINVAL;
  70         }
  71
  72         mutex_lock(&xfs_uuid_table_mutex);
  73         for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
  74                 if (uuid_is_null(&xfs_uuid_table[i])) {
  75                         hole = i;
  76                         continue;
  77                 }
  78                 if (uuid_equal(uuid, &xfs_uuid_table[i]))
  79                         goto out_duplicate;
  80         }
  81
  82         if (hole < 0) {
  83                 xfs_uuid_table = krealloc(xfs_uuid_table,
  84                         (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
  85                         GFP_KERNEL | __GFP_NOFAIL);
  86                 hole = xfs_uuid_table_size++;
  87         }
  88         xfs_uuid_table[hole] = *uuid;
  89         mutex_unlock(&xfs_uuid_table_mutex);
  90
  91         return 0;
  92
  93  out_duplicate:
  94         mutex_unlock(&xfs_uuid_table_mutex);
  95         xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
  96         return -EINVAL;
  97 }
  98
  99 STATIC void
 100 xfs_uuid_unmount(
 101         struct xfs_mount        *mp)
 102 {
 103         uuid_t                  *uuid = &mp->m_sb.sb_uuid;
 104         int                     i;
 105
 106         if (mp->m_flags & XFS_MOUNT_NOUUID)
 107                 return;
 108
 109         mutex_lock(&xfs_uuid_table_mutex);
 110         for (i = 0; i < xfs_uuid_table_size; i++) {
 111                 if (uuid_is_null(&xfs_uuid_table[i]))
 112                         continue;
 113                 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
 114                         continue;
 115                 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
 116                 break;
 117         }
 118         ASSERT(i < xfs_uuid_table_size);
 119         mutex_unlock(&xfs_uuid_table_mutex);
 120 }
 121
 122
 123 STATIC void
 124 __xfs_free_perag(
 125         struct rcu_head *head)
 126 {
 127         struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 128
 129         ASSERT(atomic_read(&pag->pag_ref) == 0);
 130         kmem_free(pag);
 131 }
 132
 133 /*
 134  * Free up the per-ag resources associated with the mount structure.
 135  */
 136 STATIC void
 137 xfs_free_perag(
 138         xfs_mount_t     *mp)
 139 {
 140         xfs_agnumber_t  agno;
 141         struct xfs_perag *pag;
 142
 143         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 144                 spin_lock(&mp->m_perag_lock);
 145                 pag = radix_tree_delete(&mp->m_perag_tree, agno);
 146                 spin_unlock(&mp->m_perag_lock);
 147                 ASSERT(pag);
 148                 ASSERT(atomic_read(&pag->pag_ref) == 0);
 149                 xfs_iunlink_destroy(pag);
 150                 xfs_buf_hash_destroy(pag);
 151                 call_rcu(&pag->rcu_head, __xfs_free_perag);
 152         }
 153 }
 154
 155 /*
 156  * Check size of device based on the (data/realtime) block count.
 157  * Note: this check is used by the growfs code as well as mount.
 158  */
 159 int
 160 xfs_sb_validate_fsb_count(
 161         xfs_sb_t        *sbp,
 162         uint64_t        nblocks)
 163 {
 164         ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 165         ASSERT(sbp->sb_blocklog >= BBSHIFT);
 166
 167         /* Limited by ULONG_MAX of page cache index */
 168         if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
 169                 return -EFBIG;
 170         return 0;
 171 }
 172
 173 int
 174 xfs_initialize_perag(
 175         xfs_mount_t     *mp,
 176         xfs_agnumber_t  agcount,
 177         xfs_agnumber_t  *maxagi)
 178 {
 179         xfs_agnumber_t  index;
 180         xfs_agnumber_t  first_initialised = NULLAGNUMBER;
 181         xfs_perag_t     *pag;
 182         int             error = -ENOMEM;
 183
 184         /*
 185          * Walk the current per-ag tree so we don't try to initialise AGs
 186          * that already exist (growfs case). Allocate and insert all the
 187          * AGs we don't find ready for initialisation.
 188          */
 189         for (index = 0; index < agcount; index++) {
 190                 pag = xfs_perag_get(mp, index);
 191                 if (pag) {
 192                         xfs_perag_put(pag);
 193                         continue;
 194                 }
 195
 196                 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
 197                 if (!pag)
 198                         goto out_unwind_new_pags;
 199                 pag->pag_agno = index;
 200                 pag->pag_mount = mp;
 201                 spin_lock_init(&pag->pag_ici_lock);
 202                 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 203                 if (xfs_buf_hash_init(pag))
 204                         goto out_free_pag;
 205                 init_waitqueue_head(&pag->pagb_wait);
 206                 spin_lock_init(&pag->pagb_lock);
 207                 pag->pagb_count = 0;
 208                 pag->pagb_tree = RB_ROOT;
 209
 210                 if (radix_tree_preload(GFP_NOFS))
 211                         goto out_hash_destroy;
 212
 213                 spin_lock(&mp->m_perag_lock);
 214                 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
 215                         WARN_ON_ONCE(1);
 216                         spin_unlock(&mp->m_perag_lock);
 217                         radix_tree_preload_end();
 218                         error = -EEXIST;
 219                         goto out_hash_destroy;
 220                 }
 221                 spin_unlock(&mp->m_perag_lock);
 222                 radix_tree_preload_end();
 223                 /* first new pag is fully initialized */
 224                 if (first_initialised == NULLAGNUMBER)
 225                         first_initialised = index;
 226                 error = xfs_iunlink_init(pag);
 227                 if (error)
 228                         goto out_hash_destroy;
 229                 spin_lock_init(&pag->pag_state_lock);
 230         }
 231
 232         index = xfs_set_inode_alloc(mp, agcount);
 233
 234         if (maxagi)
 235                 *maxagi = index;
 236
 237         mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
 238         return 0;
 239
 240 out_hash_destroy:
 241         xfs_buf_hash_destroy(pag);
 242 out_free_pag:
 243         kmem_free(pag);
 244 out_unwind_new_pags:
 245         /* unwind any prior newly initialized pags */
 246         for (index = first_initialised; index < agcount; index++) {
 247                 pag = radix_tree_delete(&mp->m_perag_tree, index);
 248                 if (!pag)
 249                         break;
 250                 xfs_buf_hash_destroy(pag);
 251                 xfs_iunlink_destroy(pag);
 252                 kmem_free(pag);
 253         }
 254         return error;
 255 }
 256
 257 /*
 258  * xfs_readsb
 259  *
 260  * Does the initial read of the superblock.
 261  */
 262 int
 263 xfs_readsb(
 264         struct xfs_mount *mp,
 265         int             flags)
 266 {
 267         unsigned int    sector_size;
 268         struct xfs_buf  *bp;
 269         struct xfs_sb   *sbp = &mp->m_sb;
 270         int             error;
 271         int             loud = !(flags & XFS_MFSI_QUIET);
 272         const struct xfs_buf_ops *buf_ops;
 273
 274         ASSERT(mp->m_sb_bp == NULL);
 275         ASSERT(mp->m_ddev_targp != NULL);
 276
 277         /*
 278          * For the initial read, we must guess at the sector
 279          * size based on the block device.  It's enough to
 280          * get the sb_sectsize out of the superblock and
 281          * then reread with the proper length.
 282          * We don't verify it yet, because it may not be complete.
 283          */
 284         sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
 285         buf_ops = NULL;
 286
 287         /*
 288          * Allocate a (locked) buffer to hold the superblock. This will be kept
 289          * around at all times to optimize access to the superblock. Therefore,
 290          * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
 291          * elevated.
 292          */
 293 reread:
 294         error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
 295                                       BTOBB(sector_size), XBF_NO_IOACCT, &bp,
 296                                       buf_ops);
 297         if (error) {
 298                 if (loud)
 299                         xfs_warn(mp, "SB validate failed with error %d.", error);
 300                 /* bad CRC means corrupted metadata */
 301                 if (error == -EFSBADCRC)
 302                         error = -EFSCORRUPTED;
 303                 return error;
 304         }
 305
 306         /*
 307          * Initialize the mount structure from the superblock.
 308          */
 309         xfs_sb_from_disk(sbp, bp->b_addr);
 310
 311         /*
 312          * If we haven't validated the superblock, do so now before we try
 313          * to check the sector size and reread the superblock appropriately.
 314          */
 315         if (sbp->sb_magicnum != XFS_SB_MAGIC) {
 316                 if (loud)
 317                         xfs_warn(mp, "Invalid superblock magic number");
 318                 error = -EINVAL;
 319                 goto release_buf;
 320         }
 321
 322         /*
 323          * We must be able to do sector-sized and sector-aligned IO.
 324          */
 325         if (sector_size > sbp->sb_sectsize) {
 326                 if (loud)
 327                         xfs_warn(mp, "device supports %u byte sectors (not %u)",
 328                                 sector_size, sbp->sb_sectsize);
 329                 error = -ENOSYS;
 330                 goto release_buf;
 331         }
 332
 333         if (buf_ops == NULL) {
 334                 /*
 335                  * Re-read the superblock so the buffer is correctly sized,
 336                  * and properly verified.
 337                  */
 338                 xfs_buf_relse(bp);
 339                 sector_size = sbp->sb_sectsize;
 340                 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
 341                 goto reread;
 342         }
 343
 344         xfs_reinit_percpu_counters(mp);
 345
 346         /* no need to be quiet anymore, so reset the buf ops */
 347         bp->b_ops = &xfs_sb_buf_ops;
 348
 349         mp->m_sb_bp = bp;
 350         xfs_buf_unlock(bp);
 351         return 0;
 352
 353 release_buf:
 354         xfs_buf_relse(bp);
 355         return error;
 356 }
 357
 358 /*
 359  * If the sunit/swidth change would move the precomputed root inode value, we
 360  * must reject the ondisk change because repair will stumble over that.
 361  * However, we allow the mount to proceed because we never rejected this
 362  * combination before.  Returns true to update the sb, false otherwise.
 363  */
 364 static inline int
 365 xfs_check_new_dalign(
 366         struct xfs_mount        *mp,
 367         int                     new_dalign,
 368         bool                    *update_sb)
 369 {
 370         struct xfs_sb           *sbp = &mp->m_sb;
 371         xfs_ino_t               calc_ino;
 372
 373         calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
 374         trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
 375
 376         if (sbp->sb_rootino == calc_ino) {
 377                 *update_sb = true;
 378                 return 0;
 379         }
 380
 381         xfs_warn(mp,
 382 "Cannot change stripe alignment; would require moving root inode.");
 383
 384         /*
 385          * XXX: Next time we add a new incompat feature, this should start
 386          * returning -EINVAL to fail the mount.  Until then, spit out a warning
 387          * that we're ignoring the administrator's instructions.
 388          */
 389         xfs_warn(mp, "Skipping superblock stripe alignment update.");
 390         *update_sb = false;
 391         return 0;
 392 }
 393
 394 /*
 395  * If we were provided with new sunit/swidth values as mount options, make sure
 396  * that they pass basic alignment and superblock feature checks, and convert
 397  * them into the same units (FSB) that everything else expects.  This step
 398  * /must/ be done before computing the inode geometry.
 399  */
 400 STATIC int
 401 xfs_validate_new_dalign(
 402         struct xfs_mount        *mp)
 403 {
 404         if (mp->m_dalign == 0)
 405                 return 0;
 406
 407         /*
 408          * If stripe unit and stripe width are not multiples
 409          * of the fs blocksize turn off alignment.
 410          */
 411         if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
 412             (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
 413                 xfs_warn(mp,
 414         "alignment check failed: sunit/swidth vs. blocksize(%d)",
 415                         mp->m_sb.sb_blocksize);
 416                 return -EINVAL;
 417         } else {
 418                 /*
 419                  * Convert the stripe unit and width to FSBs.
 420                  */
 421                 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
 422                 if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
 423                         xfs_warn(mp,
 424                 "alignment check failed: sunit/swidth vs. agsize(%d)",
 425                                  mp->m_sb.sb_agblocks);
 426                         return -EINVAL;
 427                 } else if (mp->m_dalign) {
 428                         mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
 429                 } else {
 430                         xfs_warn(mp,
 431                 "alignment check failed: sunit(%d) less than bsize(%d)",
 432                                  mp->m_dalign, mp->m_sb.sb_blocksize);
 433                         return -EINVAL;
 434                 }
 435         }
 436
 437         if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
 438                 xfs_warn(mp,
 439 "cannot change alignment: superblock does not support data alignment");
 440                 return -EINVAL;
 441         }
 442
 443         return 0;
 444 }
 445
 446 /* Update alignment values based on mount options and sb values. */
 447 STATIC int
 448 xfs_update_alignment(
 449         struct xfs_mount        *mp)
 450 {
 451         struct xfs_sb           *sbp = &mp->m_sb;
 452
 453         if (mp->m_dalign) {
 454                 bool            update_sb;
 455                 int             error;
 456
 457                 if (sbp->sb_unit == mp->m_dalign &&
 458                     sbp->sb_width == mp->m_swidth)
 459                         return 0;
 460
 461                 error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
 462                 if (error || !update_sb)
 463                         return error;
 464
 465                 sbp->sb_unit = mp->m_dalign;
 466                 sbp->sb_width = mp->m_swidth;
 467                 mp->m_update_sb = true;
 468         } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
 469                     xfs_sb_version_hasdalign(&mp->m_sb)) {
 470                 mp->m_dalign = sbp->sb_unit;
 471                 mp->m_swidth = sbp->sb_width;
 472         }
 473
 474         return 0;
 475 }
 476
 477 /*
 478  * precalculate the low space thresholds for dynamic speculative preallocation.
 479  */
 480 void
 481 xfs_set_low_space_thresholds(
 482         struct xfs_mount        *mp)
 483 {
 484         int i;
 485
 486         for (i = 0; i < XFS_LOWSP_MAX; i++) {
 487                 uint64_t space = mp->m_sb.sb_dblocks;
 488
 489                 do_div(space, 100);
 490                 mp->m_low_space[i] = space * (i + 1);
 491         }
 492 }
 493
 494 /*
 495  * Check that the data (and log if separate) is an ok size.
 496  */
 497 STATIC int
 498 xfs_check_sizes(
 499         struct xfs_mount *mp)
 500 {
 501         struct xfs_buf  *bp;
 502         xfs_daddr_t     d;
 503         int             error;
 504
 505         d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
 506         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
 507                 xfs_warn(mp, "filesystem size mismatch detected");
 508                 return -EFBIG;
 509         }
 510         error = xfs_buf_read_uncached(mp->m_ddev_targp,
 511                                         d - XFS_FSS_TO_BB(mp, 1),
 512                                         XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
 513         if (error) {
 514                 xfs_warn(mp, "last sector read failed");
 515                 return error;
 516         }
 517         xfs_buf_relse(bp);
 518
 519         if (mp->m_logdev_targp == mp->m_ddev_targp)
 520                 return 0;
 521
 522         d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
 523         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
 524                 xfs_warn(mp, "log size mismatch detected");
 525                 return -EFBIG;
 526         }
 527         error = xfs_buf_read_uncached(mp->m_logdev_targp,
 528                                         d - XFS_FSB_TO_BB(mp, 1),
 529                                         XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
 530         if (error) {
 531                 xfs_warn(mp, "log device read failed");
 532                 return error;
 533         }
 534         xfs_buf_relse(bp);
 535         return 0;
 536 }
 537
 538 /*
 539  * Clear the quotaflags in memory and in the superblock.
 540  */
 541 int
 542 xfs_mount_reset_sbqflags(
 543         struct xfs_mount        *mp)
 544 {
 545         mp->m_qflags = 0;
 546
 547         /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
 548         if (mp->m_sb.sb_qflags == 0)
 549                 return 0;
 550         spin_lock(&mp->m_sb_lock);
 551         mp->m_sb.sb_qflags = 0;
 552         spin_unlock(&mp->m_sb_lock);
 553
 554         if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
 555                 return 0;
 556
 557         return xfs_sync_sb(mp, false);
 558 }
 559
 560 uint64_t
 561 xfs_default_resblks(xfs_mount_t *mp)
 562 {
 563         uint64_t resblks;
 564
 565         /*
 566          * We default to 5% or 8192 fsbs of space reserved, whichever is
 567          * smaller.  This is intended to cover concurrent allocation
 568          * transactions when we initially hit enospc. These each require a 4
 569          * block reservation. Hence by default we cover roughly 2000 concurrent
 570          * allocation reservations.
 571          */
 572         resblks = mp->m_sb.sb_dblocks;
 573         do_div(resblks, 20);
 574         resblks = min_t(uint64_t, resblks, 8192);
 575         return resblks;
 576 }
 577
 578 /* Ensure the summary counts are correct. */
 579 STATIC int
 580 xfs_check_summary_counts(
 581         struct xfs_mount        *mp)
 582 {
 583         /*
 584          * The AG0 superblock verifier rejects in-progress filesystems,
 585          * so we should never see the flag set this far into mounting.
 586          */
 587         if (mp->m_sb.sb_inprogress) {
 588                 xfs_err(mp, "sb_inprogress set after log recovery??");
 589                 WARN_ON(1);
 590                 return -EFSCORRUPTED;
 591         }
 592
 593         /*
 594          * Now the log is mounted, we know if it was an unclean shutdown or
 595          * not. If it was, with the first phase of recovery has completed, we
 596          * have consistent AG blocks on disk. We have not recovered EFIs yet,
 597          * but they are recovered transactionally in the second recovery phase
 598          * later.
 599          *
 600          * If the log was clean when we mounted, we can check the summary
 601          * counters.  If any of them are obviously incorrect, we can recompute
 602          * them from the AGF headers in the next step.
 603          */
 604         if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
 605             (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
 606              !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
 607              mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
 608                 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
 609
 610         /*
 611          * We can safely re-initialise incore superblock counters from the
 612          * per-ag data. These may not be correct if the filesystem was not
 613          * cleanly unmounted, so we waited for recovery to finish before doing
 614          * this.
 615          *
 616          * If the filesystem was cleanly unmounted or the previous check did
 617          * not flag anything weird, then we can trust the values in the
 618          * superblock to be correct and we don't need to do anything here.
 619          * Otherwise, recalculate the summary counters.
 620          */
 621         if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
 622              XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
 623             !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
 624                 return 0;
 625
 626         return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
 627 }
 628
 629 /*
 630  * This function does the following on an initial mount of a file system:
 631  *      - reads the superblock from disk and init the mount struct
 632  *      - if we're a 32-bit kernel, do a size check on the superblock
 633  *              so we don't mount terabyte filesystems
 634  *      - init mount struct realtime fields
 635  *      - allocate inode hash table for fs
 636  *      - init directory manager
 637  *      - perform recovery and init the log manager
 638  */
 639 int
 640 xfs_mountfs(
 641         struct xfs_mount        *mp)
 642 {
 643         struct xfs_sb           *sbp = &(mp->m_sb);
 644         struct xfs_inode        *rip;
 645         struct xfs_ino_geometry *igeo = M_IGEO(mp);
 646         uint64_t                resblks;
 647         uint                    quotamount = 0;
 648         uint                    quotaflags = 0;
 649         int                     error = 0;
 650
 651         xfs_sb_mount_common(mp, sbp);
 652
 653         /*
 654          * Check for a mismatched features2 values.  Older kernels read & wrote
 655          * into the wrong sb offset for sb_features2 on some platforms due to
 656          * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
 657          * which made older superblock reading/writing routines swap it as a
 658          * 64-bit value.
 659          *
 660          * For backwards compatibility, we make both slots equal.
 661          *
 662          * If we detect a mismatched field, we OR the set bits into the existing
 663          * features2 field in case it has already been modified; we don't want
 664          * to lose any features.  We then update the bad location with the ORed
 665          * value so that older kernels will see any features2 flags. The
 666          * superblock writeback code ensures the new sb_features2 is copied to
 667          * sb_bad_features2 before it is logged or written to disk.
 668          */
 669         if (xfs_sb_has_mismatched_features2(sbp)) {
 670                 xfs_warn(mp, "correcting sb_features alignment problem");
 671                 sbp->sb_features2 |= sbp->sb_bad_features2;
 672                 mp->m_update_sb = true;
 673
 674                 /*
 675                  * Re-check for ATTR2 in case it was found in bad_features2
 676                  * slot.
 677                  */
 678                 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 679                    !(mp->m_flags & XFS_MOUNT_NOATTR2))
 680                         mp->m_flags |= XFS_MOUNT_ATTR2;
 681         }
 682
 683         if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 684            (mp->m_flags & XFS_MOUNT_NOATTR2)) {
 685                 xfs_sb_version_removeattr2(&mp->m_sb);
 686                 mp->m_update_sb = true;
 687
 688                 /* update sb_versionnum for the clearing of the morebits */
 689                 if (!sbp->sb_features2)
 690                         mp->m_update_sb = true;
 691         }
 692
 693         /* always use v2 inodes by default now */
 694         if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
 695                 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
 696                 mp->m_update_sb = true;
 697         }
 698
 699         /*
 700          * If we were given new sunit/swidth options, do some basic validation
 701          * checks and convert the incore dalign and swidth values to the
 702          * same units (FSB) that everything else uses.  This /must/ happen
 703          * before computing the inode geometry.
 704          */
 705         error = xfs_validate_new_dalign(mp);
 706         if (error)
 707                 goto out;
 708
 709         xfs_alloc_compute_maxlevels(mp);
 710         xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
 711         xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
 712         xfs_ialloc_setup_geometry(mp);
 713         xfs_rmapbt_compute_maxlevels(mp);
 714         xfs_refcountbt_compute_maxlevels(mp);
 715
 716         /*
 717          * Check if sb_agblocks is aligned at stripe boundary.  If sb_agblocks
 718          * is NOT aligned turn off m_dalign since allocator alignment is within
 719          * an ag, therefore ag has to be aligned at stripe boundary.  Note that
 720          * we must compute the free space and rmap btree geometry before doing
 721          * this.
 722          */
 723         error = xfs_update_alignment(mp);
 724         if (error)
 725                 goto out;
 726
 727         /* enable fail_at_unmount as default */
 728         mp->m_fail_unmount = true;
 729
 730         error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
 731                                NULL, mp->m_super->s_id);
 732         if (error)
 733                 goto out;
 734
 735         error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
 736                                &mp->m_kobj, "stats");
 737         if (error)
 738                 goto out_remove_sysfs;
 739
 740         error = xfs_error_sysfs_init(mp);
 741         if (error)
 742                 goto out_del_stats;
 743
 744         error = xfs_errortag_init(mp);
 745         if (error)
 746                 goto out_remove_error_sysfs;
 747
 748         error = xfs_uuid_mount(mp);
 749         if (error)
 750                 goto out_remove_errortag;
 751
 752         /*
 753          * Update the preferred write size based on the information from the
 754          * on-disk superblock.
 755          */
 756         mp->m_allocsize_log =
 757                 max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
 758         mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
 759
 760         /* set the low space thresholds for dynamic preallocation */
 761         xfs_set_low_space_thresholds(mp);
 762
 763         /*
 764          * If enabled, sparse inode chunk alignment is expected to match the
 765          * cluster size. Full inode chunk alignment must match the chunk size,
 766          * but that is checked on sb read verification...
 767          */
 768         if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
 769             mp->m_sb.sb_spino_align !=
 770                         XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
 771                 xfs_warn(mp,
 772         "Sparse inode block alignment (%u) must match cluster size (%llu).",
 773                          mp->m_sb.sb_spino_align,
 774                          XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
 775                 error = -EINVAL;
 776                 goto out_remove_uuid;
 777         }
 778
 779         /*
 780          * Check that the data (and log if separate) is an ok size.
 781          */
 782         error = xfs_check_sizes(mp);
 783         if (error)
 784                 goto out_remove_uuid;
 785
 786         /*
 787          * Initialize realtime fields in the mount structure
 788          */
 789         error = xfs_rtmount_init(mp);
 790         if (error) {
 791                 xfs_warn(mp, "RT mount failed");
 792                 goto out_remove_uuid;
 793         }
 794
 795         /*
 796          *  Copies the low order bits of the timestamp and the randomly
 797          *  set "sequence" number out of a UUID.
 798          */
 799         mp->m_fixedfsid[0] =
 800                 (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
 801                  get_unaligned_be16(&sbp->sb_uuid.b[4]);
 802         mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 803
 804         error = xfs_da_mount(mp);
 805         if (error) {
 806                 xfs_warn(mp, "Failed dir/attr init: %d", error);
 807                 goto out_remove_uuid;
 808         }
 809
 810         /*
 811          * Initialize the precomputed transaction reservations values.
 812          */
 813         xfs_trans_init(mp);
 814
 815         /*
 816          * Allocate and initialize the per-ag data.
 817          */
 818         error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
 819         if (error) {
 820                 xfs_warn(mp, "Failed per-ag init: %d", error);
 821                 goto out_free_dir;
 822         }
 823
 824         if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
 825                 xfs_warn(mp, "no log defined");
 826                 error = -EFSCORRUPTED;
 827                 goto out_free_perag;
 828         }
 829
 830         /*
 831          * Log's mount-time initialization. The first part of recovery can place
 832          * some items on the AIL, to be handled when recovery is finished or
 833          * cancelled.
 834          */
 835         error = xfs_log_mount(mp, mp->m_logdev_targp,
 836                               XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
 837                               XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
 838         if (error) {
 839                 xfs_warn(mp, "log mount failed");
 840                 goto out_fail_wait;
 841         }
 842
 843         /* Make sure the summary counts are ok. */
 844         error = xfs_check_summary_counts(mp);
 845         if (error)
 846                 goto out_log_dealloc;
 847
 848         /*
 849          * Get and sanity-check the root inode.
 850          * Save the pointer to it in the mount structure.
 851          */
 852         error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
 853                          XFS_ILOCK_EXCL, &rip);
 854         if (error) {
 855                 xfs_warn(mp,
 856                         "Failed to read root inode 0x%llx, error %d",
 857                         sbp->sb_rootino, -error);
 858                 goto out_log_dealloc;
 859         }
 860
 861         ASSERT(rip != NULL);
 862
 863         if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
 864                 xfs_warn(mp, "corrupted root inode %llu: not a directory",
 865                         (unsigned long long)rip->i_ino);
 866                 xfs_iunlock(rip, XFS_ILOCK_EXCL);
 867                 error = -EFSCORRUPTED;
 868                 goto out_rele_rip;
 869         }
 870         mp->m_rootip = rip;     /* save it */
 871
 872         xfs_iunlock(rip, XFS_ILOCK_EXCL);
 873
 874         /*
 875          * Initialize realtime inode pointers in the mount structure
 876          */
 877         error = xfs_rtmount_inodes(mp);
 878         if (error) {
 879                 /*
 880                  * Free up the root inode.
 881                  */
 882                 xfs_warn(mp, "failed to read RT inodes");
 883                 goto out_rele_rip;
 884         }
 885
 886         /*
 887          * If this is a read-only mount defer the superblock updates until
 888          * the next remount into writeable mode.  Otherwise we would never
 889          * perform the update e.g. for the root filesystem.
 890          */
 891         if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 892                 error = xfs_sync_sb(mp, false);
 893                 if (error) {
 894                         xfs_warn(mp, "failed to write sb changes");
 895                         goto out_rtunmount;
 896                 }
 897         }
 898
 899         /*
 900          * Initialise the XFS quota management subsystem for this mount
 901          */
 902         if (XFS_IS_QUOTA_RUNNING(mp)) {
 903                 error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
 904                 if (error)
 905                         goto out_rtunmount;
 906         } else {
 907                 ASSERT(!XFS_IS_QUOTA_ON(mp));
 908
 909                 /*
 910                  * If a file system had quotas running earlier, but decided to
 911                  * mount without -o uquota/pquota/gquota options, revoke the
 912                  * quotachecked license.
 913                  */
 914                 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
 915                         xfs_notice(mp, "resetting quota flags");
 916                         error = xfs_mount_reset_sbqflags(mp);
 917                         if (error)
 918                                 goto out_rtunmount;
 919                 }
 920         }
 921
 922         /*
 923          * Finish recovering the file system.  This part needed to be delayed
 924          * until after the root and real-time bitmap inodes were consistently
 925          * read in.
 926          */
 927         error = xfs_log_mount_finish(mp);
 928         if (error) {
 929                 xfs_warn(mp, "log mount finish failed");
 930                 goto out_rtunmount;
 931         }
 932
 933         /*
 934          * Now the log is fully replayed, we can transition to full read-only
 935          * mode for read-only mounts. This will sync all the metadata and clean
 936          * the log so that the recovery we just performed does not have to be
 937          * replayed again on the next mount.
 938          *
 939          * We use the same quiesce mechanism as the rw->ro remount, as they are
 940          * semantically identical operations.
 941          */
 942         if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
 943                                                         XFS_MOUNT_RDONLY) {
 944                 xfs_quiesce_attr(mp);
 945         }
 946
 947         /*
 948          * Complete the quota initialisation, post-log-replay component.
 949          */
 950         if (quotamount) {
 951                 ASSERT(mp->m_qflags == 0);
 952                 mp->m_qflags = quotaflags;
 953
 954                 xfs_qm_mount_quotas(mp);
 955         }
 956
 957         /*
 958          * Now we are mounted, reserve a small amount of unused space for
 959          * privileged transactions. This is needed so that transaction
 960          * space required for critical operations can dip into this pool
 961          * when at ENOSPC. This is needed for operations like create with
 962          * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
 963          * are not allowed to use this reserved space.
 964          *
 965          * This may drive us straight to ENOSPC on mount, but that implies
 966          * we were already there on the last unmount. Warn if this occurs.
 967          */
 968         if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
 969                 resblks = xfs_default_resblks(mp);
 970                 error = xfs_reserve_blocks(mp, &resblks, NULL);
 971                 if (error)
 972                         xfs_warn(mp,
 973         "Unable to allocate reserve blocks. Continuing without reserve pool.");
 974
 975                 /* Recover any CoW blocks that never got remapped. */
 976                 error = xfs_reflink_recover_cow(mp);
 977                 if (error) {
 978                         xfs_err(mp,
 979         "Error %d recovering leftover CoW allocations.", error);
 980                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 981                         goto out_quota;
 982                 }
 983
 984                 /* Reserve AG blocks for future btree expansion. */
 985                 error = xfs_fs_reserve_ag_blocks(mp);
 986                 if (error && error != -ENOSPC)
 987                         goto out_agresv;
 988         }
 989
 990         return 0;
 991
 992  out_agresv:
 993         xfs_fs_unreserve_ag_blocks(mp);
 994  out_quota:
 995         xfs_qm_unmount_quotas(mp);
 996  out_rtunmount:
 997         xfs_rtunmount_inodes(mp);
 998  out_rele_rip:
 999         xfs_irele(rip);
1000         /* Clean out dquots that might be in memory after quotacheck. */
1001         xfs_qm_unmount(mp);
1002         /*
1003          * Cancel all delayed reclaim work and reclaim the inodes directly.
1004          * We have to do this /after/ rtunmount and qm_unmount because those
1005          * two will have scheduled delayed reclaim for the rt/quota inodes.
1006          *
1007          * This is slightly different from the unmountfs call sequence
1008          * because we could be tearing down a partially set up mount.  In
1009          * particular, if log_mount_finish fails we bail out without calling
1010          * qm_unmount_quotas and therefore rely on qm_unmount to release the
1011          * quota inodes.
1012          */
1013         cancel_delayed_work_sync(&mp->m_reclaim_work);
1014         xfs_reclaim_inodes(mp);
1015         xfs_health_unmount(mp);
1016  out_log_dealloc:
1017         mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1018         xfs_log_mount_cancel(mp);
1019  out_fail_wait:
1020         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1021                 xfs_wait_buftarg(mp->m_logdev_targp);
1022         xfs_wait_buftarg(mp->m_ddev_targp);
1023  out_free_perag:
1024         xfs_free_perag(mp);
1025  out_free_dir:
1026         xfs_da_unmount(mp);
1027  out_remove_uuid:
1028         xfs_uuid_unmount(mp);
1029  out_remove_errortag:
1030         xfs_errortag_del(mp);
1031  out_remove_error_sysfs:
1032         xfs_error_sysfs_del(mp);
1033  out_del_stats:
1034         xfs_sysfs_del(&mp->m_stats.xs_kobj);
1035  out_remove_sysfs:
1036         xfs_sysfs_del(&mp->m_kobj);
1037  out:
1038         return error;
1039 }
1040
1041 /*
1042  * This flushes out the inodes,dquots and the superblock, unmounts the
1043  * log and makes sure that incore structures are freed.
1044  */
1045 void
1046 xfs_unmountfs(
1047         struct xfs_mount        *mp)
1048 {
1049         uint64_t                resblks;
1050         int                     error;
1051
1052         xfs_stop_block_reaping(mp);
1053         xfs_fs_unreserve_ag_blocks(mp);
1054         xfs_qm_unmount_quotas(mp);
1055         xfs_rtunmount_inodes(mp);
1056         xfs_irele(mp->m_rootip);
1057
1058         /*
1059          * We can potentially deadlock here if we have an inode cluster
1060          * that has been freed has its buffer still pinned in memory because
1061          * the transaction is still sitting in a iclog. The stale inodes
1062          * on that buffer will be pinned to the buffer until the
1063          * transaction hits the disk and the callbacks run. Pushing the AIL will
1064          * skip the stale inodes and may never see the pinned buffer, so
1065          * nothing will push out the iclog and unpin the buffer. Hence we
1066          * need to force the log here to ensure all items are flushed into the
1067          * AIL before we go any further.
1068          */
1069         xfs_log_force(mp, XFS_LOG_SYNC);
1070
1071         /*
1072          * Wait for all busy extents to be freed, including completion of
1073          * any discard operation.
1074          */
1075         xfs_extent_busy_wait_all(mp);
1076         flush_workqueue(xfs_discard_wq);
1077
1078         /*
1079          * We now need to tell the world we are unmounting. This will allow
1080          * us to detect that the filesystem is going away and we should error
1081          * out anything that we have been retrying in the background. This will
1082          * prevent neverending retries in AIL pushing from hanging the unmount.
1083          */
1084         mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1085
1086         /*
1087          * Flush all pending changes from the AIL.
1088          */
1089         xfs_ail_push_all_sync(mp->m_ail);
1090
1091         /*
1092          * Reclaim all inodes. At this point there should be no dirty inodes and
1093          * none should be pinned or locked. Stop background inode reclaim here
1094          * if it is still running.
1095          */
1096         cancel_delayed_work_sync(&mp->m_reclaim_work);
1097         xfs_reclaim_inodes(mp);
1098         xfs_health_unmount(mp);
1099
1100         xfs_qm_unmount(mp);
1101
1102         /*
1103          * Unreserve any blocks we have so that when we unmount we don't account
1104          * the reserved free space as used. This is really only necessary for
1105          * lazy superblock counting because it trusts the incore superblock
1106          * counters to be absolutely correct on clean unmount.
1107          *
1108          * We don't bother correcting this elsewhere for lazy superblock
1109          * counting because on mount of an unclean filesystem we reconstruct the
1110          * correct counter value and this is irrelevant.
1111          *
1112          * For non-lazy counter filesystems, this doesn't matter at all because
1113          * we only every apply deltas to the superblock and hence the incore
1114          * value does not matter....
1115          */
1116         resblks = 0;
1117         error = xfs_reserve_blocks(mp, &resblks, NULL);
1118         if (error)
1119                 xfs_warn(mp, "Unable to free reserved block pool. "
1120                                 "Freespace may not be correct on next mount.");
1121
1122         error = xfs_log_sbcount(mp);
1123         if (error)
1124                 xfs_warn(mp, "Unable to update superblock counters. "
1125                                 "Freespace may not be correct on next mount.");
1126
1127
1128         xfs_log_unmount(mp);
1129         xfs_da_unmount(mp);
1130         xfs_uuid_unmount(mp);
1131
1132 #if defined(DEBUG)
1133         xfs_errortag_clearall(mp);
1134 #endif
1135         xfs_free_perag(mp);
1136
1137         xfs_errortag_del(mp);
1138         xfs_error_sysfs_del(mp);
1139         xfs_sysfs_del(&mp->m_stats.xs_kobj);
1140         xfs_sysfs_del(&mp->m_kobj);
1141 }
1142
1143 /*
1144  * Determine whether modifications can proceed. The caller specifies the minimum
1145  * freeze level for which modifications should not be allowed. This allows
1146  * certain operations to proceed while the freeze sequence is in progress, if
1147  * necessary.
1148  */
1149 bool
1150 xfs_fs_writable(
1151         struct xfs_mount        *mp,
1152         int                     level)
1153 {
1154         ASSERT(level > SB_UNFROZEN);
1155         if ((mp->m_super->s_writers.frozen >= level) ||
1156             XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
1157                 return false;
1158
1159         return true;
1160 }
1161
1162 /*
1163  * xfs_log_sbcount
1164  *
1165  * Sync the superblock counters to disk.
1166  *
1167  * Note this code can be called during the process of freezing, so we use the
1168  * transaction allocator that does not block when the transaction subsystem is
1169  * in its frozen state.
1170  */
1171 int
1172 xfs_log_sbcount(xfs_mount_t *mp)
1173 {
1174         /* allow this to proceed during the freeze sequence... */
1175         if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1176                 return 0;
1177
1178         /*
1179          * we don't need to do this if we are updating the superblock
1180          * counters on every modification.
1181          */
1182         if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1183                 return 0;
1184
1185         return xfs_sync_sb(mp, true);
1186 }
1187
1188 /*
1189  * Deltas for the block count can vary from 1 to very large, but lock contention
1190  * only occurs on frequent small block count updates such as in the delayed
1191  * allocation path for buffered writes (page a time updates). Hence we set
1192  * a large batch count (1024) to minimise global counter updates except when
1193  * we get near to ENOSPC and we have to be very accurate with our updates.
1194  */
1195 #define XFS_FDBLOCKS_BATCH      1024
1196 int
1197 xfs_mod_fdblocks(
1198         struct xfs_mount        *mp,
1199         int64_t                 delta,
1200         bool                    rsvd)
1201 {
1202         int64_t                 lcounter;
1203         long long               res_used;
1204         s32                     batch;
1205
1206         if (delta > 0) {
1207                 /*
1208                  * If the reserve pool is depleted, put blocks back into it
1209                  * first. Most of the time the pool is full.
1210                  */
1211                 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1212                         percpu_counter_add(&mp->m_fdblocks, delta);
1213                         return 0;
1214                 }
1215
1216                 spin_lock(&mp->m_sb_lock);
1217                 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1218
1219                 if (res_used > delta) {
1220                         mp->m_resblks_avail += delta;
1221                 } else {
1222                         delta -= res_used;
1223                         mp->m_resblks_avail = mp->m_resblks;
1224                         percpu_counter_add(&mp->m_fdblocks, delta);
1225                 }
1226                 spin_unlock(&mp->m_sb_lock);
1227                 return 0;
1228         }
1229
1230         /*
1231          * Taking blocks away, need to be more accurate the closer we
1232          * are to zero.
1233          *
1234          * If the counter has a value of less than 2 * max batch size,
1235          * then make everything serialise as we are real close to
1236          * ENOSPC.
1237          */
1238         if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
1239                                      XFS_FDBLOCKS_BATCH) < 0)
1240                 batch = 1;
1241         else
1242                 batch = XFS_FDBLOCKS_BATCH;
1243
1244         percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
1245         if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
1246                                      XFS_FDBLOCKS_BATCH) >= 0) {
1247                 /* we had space! */
1248                 return 0;
1249         }
1250
1251         /*
1252          * lock up the sb for dipping into reserves before releasing the space
1253          * that took us to ENOSPC.
1254          */
1255         spin_lock(&mp->m_sb_lock);
1256         percpu_counter_add(&mp->m_fdblocks, -delta);
1257         if (!rsvd)
1258                 goto fdblocks_enospc;
1259
1260         lcounter = (long long)mp->m_resblks_avail + delta;
1261         if (lcounter >= 0) {
1262                 mp->m_resblks_avail = lcounter;
1263                 spin_unlock(&mp->m_sb_lock);
1264                 return 0;
1265         }
1266         xfs_warn_once(mp,
1267 "Reserve blocks depleted! Consider increasing reserve pool size.");
1268
1269 fdblocks_enospc:
1270         spin_unlock(&mp->m_sb_lock);
1271         return -ENOSPC;
1272 }
1273
1274 int
1275 xfs_mod_frextents(
1276         struct xfs_mount        *mp,
1277         int64_t                 delta)
1278 {
1279         int64_t                 lcounter;
1280         int                     ret = 0;
1281
1282         spin_lock(&mp->m_sb_lock);
1283         lcounter = mp->m_sb.sb_frextents + delta;
1284         if (lcounter < 0)
1285                 ret = -ENOSPC;
1286         else
1287                 mp->m_sb.sb_frextents = lcounter;
1288         spin_unlock(&mp->m_sb_lock);
1289         return ret;
1290 }
1291
1292 /*
1293  * Used to free the superblock along various error paths.
1294  */
1295 void
1296 xfs_freesb(
1297         struct xfs_mount        *mp)
1298 {
1299         struct xfs_buf          *bp = mp->m_sb_bp;
1300
1301         xfs_buf_lock(bp);
1302         mp->m_sb_bp = NULL;
1303         xfs_buf_relse(bp);
1304 }
1305
1306 /*
1307  * If the underlying (data/log/rt) device is readonly, there are some
1308  * operations that cannot proceed.
1309  */
1310 int
1311 xfs_dev_is_read_only(
1312         struct xfs_mount        *mp,
1313         char                    *message)
1314 {
1315         if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1316             xfs_readonly_buftarg(mp->m_logdev_targp) ||
1317             (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
1318                 xfs_notice(mp, "%s required on read-only device.", message);
1319                 xfs_notice(mp, "write access unavailable, cannot proceed.");
1320                 return -EROFS;
1321         }
1322         return 0;
1323 }
1324
1325 /* Force the summary counters to be recalculated at next mount. */
1326 void
1327 xfs_force_summary_recalc(
1328         struct xfs_mount        *mp)
1329 {
1330         if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1331                 return;
1332
1333         xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
1334 }
1335
1336 /*
1337  * Update the in-core delayed block counter.
1338  *
1339  * We prefer to update the counter without having to take a spinlock for every
1340  * counter update (i.e. batching).  Each change to delayed allocation
1341  * reservations can change can easily exceed the default percpu counter
1342  * batching, so we use a larger batch factor here.
1343  *
1344  * Note that we don't currently have any callers requiring fast summation
1345  * (e.g. percpu_counter_read) so we can use a big batch value here.
1346  */
1347 #define XFS_DELALLOC_BATCH      (4096)
1348 void
1349 xfs_mod_delalloc(
1350         struct xfs_mount        *mp,
1351         int64_t                 delta)
1352 {
1353         percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
1354                         XFS_DELALLOC_BATCH);
1355 }