fs/gfs2/util.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9 #include <linux/spinlock.h>
  10 #include <linux/completion.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/crc32.h>
  13 #include <linux/gfs2_ondisk.h>
  14 #include <linux/delay.h>
  15 #include <linux/uaccess.h>
  16
  17 #include "gfs2.h"
  18 #include "incore.h"
  19 #include "glock.h"
  20 #include "glops.h"
  21 #include "log.h"
  22 #include "lops.h"
  23 #include "recovery.h"
  24 #include "rgrp.h"
  25 #include "super.h"
  26 #include "util.h"
  27
  28 struct kmem_cache *gfs2_glock_cachep __read_mostly;
  29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
  30 struct kmem_cache *gfs2_inode_cachep __read_mostly;
  31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
  32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
  33 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
  34 struct kmem_cache *gfs2_qadata_cachep __read_mostly;
  35 struct kmem_cache *gfs2_trans_cachep __read_mostly;
  36 mempool_t *gfs2_page_pool __read_mostly;
  37
  38 void gfs2_assert_i(struct gfs2_sbd *sdp)
  39 {
  40         fs_emerg(sdp, "fatal assertion failed\n");
  41 }
  42
  43 /**
  44  * check_journal_clean - Make sure a journal is clean for a spectator mount
  45  * @sdp: The GFS2 superblock
  46  * @jd: The journal descriptor
  47  * @verbose: Show more prints in the log
  48  *
  49  * Returns: 0 if the journal is clean or locked, else an error
  50  */
  51 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  52                         bool verbose)
  53 {
  54         int error;
  55         struct gfs2_holder j_gh;
  56         struct gfs2_log_header_host head;
  57         struct gfs2_inode *ip;
  58
  59         ip = GFS2_I(jd->jd_inode);
  60         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
  61                                    GL_EXACT | GL_NOCACHE, &j_gh);
  62         if (error) {
  63                 if (verbose)
  64                         fs_err(sdp, "Error %d locking journal for spectator "
  65                                "mount.\n", error);
  66                 return -EPERM;
  67         }
  68         error = gfs2_jdesc_check(jd);
  69         if (error) {
  70                 if (verbose)
  71                         fs_err(sdp, "Error checking journal for spectator "
  72                                "mount.\n");
  73                 goto out_unlock;
  74         }
  75         error = gfs2_find_jhead(jd, &head, false);
  76         if (error) {
  77                 if (verbose)
  78                         fs_err(sdp, "Error parsing journal for spectator "
  79                                "mount.\n");
  80                 goto out_unlock;
  81         }
  82         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  83                 error = -EPERM;
  84                 if (verbose)
  85                         fs_err(sdp, "jid=%u: Journal is dirty, so the first "
  86                                "mounter must not be a spectator.\n",
  87                                jd->jd_jid);
  88         }
  89
  90 out_unlock:
  91         gfs2_glock_dq_uninit(&j_gh);
  92         return error;
  93 }
  94
  95 /**
  96  * gfs2_freeze_lock - hold the freeze glock
  97  * @sdp: the superblock
  98  * @freeze_gh: pointer to the requested holder
  99  * @caller_flags: any additional flags needed by the caller
 100  */
 101 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
 102                      int caller_flags)
 103 {
 104         int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
 105         int error;
 106
 107         error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
 108                                    freeze_gh);
 109         if (error && error != GLR_TRYFAILED)
 110                 fs_err(sdp, "can't lock the freeze lock: %d\n", error);
 111         return error;
 112 }
 113
 114 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
 115 {
 116         if (gfs2_holder_initialized(freeze_gh))
 117                 gfs2_glock_dq_uninit(freeze_gh);
 118 }
 119
 120 static void signal_our_withdraw(struct gfs2_sbd *sdp)
 121 {
 122         struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
 123         struct inode *inode;
 124         struct gfs2_inode *ip;
 125         struct gfs2_glock *i_gl;
 126         u64 no_formal_ino;
 127         int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 128         int ret = 0;
 129         int tries;
 130
 131         if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
 132                 return;
 133
 134         inode = sdp->sd_jdesc->jd_inode;
 135         ip = GFS2_I(inode);
 136         i_gl = ip->i_gl;
 137         no_formal_ino = ip->i_no_formal_ino;
 138
 139         /* Prevent any glock dq until withdraw recovery is complete */
 140         set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 141         /*
 142          * Don't tell dlm we're bailing until we have no more buffers in the
 143          * wind. If journal had an IO error, the log code should just purge
 144          * the outstanding buffers rather than submitting new IO. Making the
 145          * file system read-only will flush the journal, etc.
 146          *
 147          * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
 148          * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
 149          * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
 150          * therefore we need to clear SDF_JOURNAL_LIVE manually.
 151          */
 152         clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 153         if (!sb_rdonly(sdp->sd_vfs)) {
 154                 struct gfs2_holder freeze_gh;
 155
 156                 gfs2_holder_mark_uninitialized(&freeze_gh);
 157                 if (sdp->sd_freeze_gl &&
 158                     !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
 159                         ret = gfs2_freeze_lock(sdp, &freeze_gh,
 160                                        log_write_allowed ? 0 : LM_FLAG_TRY);
 161                         if (ret == GLR_TRYFAILED)
 162                                 ret = 0;
 163                 }
 164                 if (!ret)
 165                         gfs2_make_fs_ro(sdp);
 166                 gfs2_freeze_unlock(&freeze_gh);
 167         }
 168
 169         if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
 170                 if (!ret)
 171                         ret = -EIO;
 172                 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 173                 goto skip_recovery;
 174         }
 175         /*
 176          * Drop the glock for our journal so another node can recover it.
 177          */
 178         if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
 179                 gfs2_glock_dq_wait(&sdp->sd_journal_gh);
 180                 gfs2_holder_uninit(&sdp->sd_journal_gh);
 181         }
 182         sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
 183         gfs2_glock_dq(&sdp->sd_jinode_gh);
 184         if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
 185                 /* Make sure gfs2_unfreeze works if partially-frozen */
 186                 flush_work(&sdp->sd_freeze_work);
 187                 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 188                 thaw_super(sdp->sd_vfs);
 189         } else {
 190                 wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
 191                             TASK_UNINTERRUPTIBLE);
 192         }
 193
 194         /*
 195          * holder_uninit to force glock_put, to force dlm to let go
 196          */
 197         gfs2_holder_uninit(&sdp->sd_jinode_gh);
 198
 199         /*
 200          * Note: We need to be careful here:
 201          * Our iput of jd_inode will evict it. The evict will dequeue its
 202          * glock, but the glock dq will wait for the withdraw unless we have
 203          * exception code in glock_dq.
 204          */
 205         iput(inode);
 206         /*
 207          * Wait until the journal inode's glock is freed. This allows try locks
 208          * on other nodes to be successful, otherwise we remain the owner of
 209          * the glock as far as dlm is concerned.
 210          */
 211         if (i_gl->gl_ops->go_free) {
 212                 set_bit(GLF_FREEING, &i_gl->gl_flags);
 213                 wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
 214         }
 215
 216         /*
 217          * Dequeue the "live" glock, but keep a reference so it's never freed.
 218          */
 219         gfs2_glock_hold(live_gl);
 220         gfs2_glock_dq_wait(&sdp->sd_live_gh);
 221         /*
 222          * We enqueue the "live" glock in EX so that all other nodes
 223          * get a demote request and act on it. We don't really want the
 224          * lock in EX, so we send a "try" lock with 1CB to produce a callback.
 225          */
 226         fs_warn(sdp, "Requesting recovery of jid %d.\n",
 227                 sdp->sd_lockstruct.ls_jid);
 228         gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
 229                            &sdp->sd_live_gh);
 230         msleep(GL_GLOCK_MAX_HOLD);
 231         /*
 232          * This will likely fail in a cluster, but succeed standalone:
 233          */
 234         ret = gfs2_glock_nq(&sdp->sd_live_gh);
 235
 236         /*
 237          * If we actually got the "live" lock in EX mode, there are no other
 238          * nodes available to replay our journal. So we try to replay it
 239          * ourselves. We hold the "live" glock to prevent other mounters
 240          * during recovery, then just dequeue it and reacquire it in our
 241          * normal SH mode. Just in case the problem that caused us to
 242          * withdraw prevents us from recovering our journal (e.g. io errors
 243          * and such) we still check if the journal is clean before proceeding
 244          * but we may wait forever until another mounter does the recovery.
 245          */
 246         if (ret == 0) {
 247                 fs_warn(sdp, "No other mounters found. Trying to recover our "
 248                         "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
 249                 if (gfs2_recover_journal(sdp->sd_jdesc, 1))
 250                         fs_warn(sdp, "Unable to recover our journal jid %d.\n",
 251                                 sdp->sd_lockstruct.ls_jid);
 252                 gfs2_glock_dq_wait(&sdp->sd_live_gh);
 253                 gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
 254                                    &sdp->sd_live_gh);
 255                 gfs2_glock_nq(&sdp->sd_live_gh);
 256         }
 257
 258         gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
 259         clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 260
 261         /*
 262          * At this point our journal is evicted, so we need to get a new inode
 263          * for it. Once done, we need to call gfs2_find_jhead which
 264          * calls gfs2_map_journal_extents to map it for us again.
 265          *
 266          * Note that we don't really want it to look up a FREE block. The
 267          * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
 268          * which would otherwise fail because it requires grabbing an rgrp
 269          * glock, which would fail with -EIO because we're withdrawing.
 270          */
 271         inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
 272                                   sdp->sd_jdesc->jd_no_addr, no_formal_ino,
 273                                   GFS2_BLKST_FREE);
 274         if (IS_ERR(inode)) {
 275                 fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
 276                         sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
 277                 goto skip_recovery;
 278         }
 279         sdp->sd_jdesc->jd_inode = inode;
 280
 281         /*
 282          * Now wait until recovery is complete.
 283          */
 284         for (tries = 0; tries < 10; tries++) {
 285                 ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
 286                 if (!ret)
 287                         break;
 288                 msleep(HZ);
 289                 fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
 290                         sdp->sd_lockstruct.ls_jid);
 291         }
 292 skip_recovery:
 293         if (!ret)
 294                 fs_warn(sdp, "Journal recovery complete for jid %d.\n",
 295                         sdp->sd_lockstruct.ls_jid);
 296         else
 297                 fs_warn(sdp, "Journal recovery skipped for %d until next "
 298                         "mount.\n", sdp->sd_lockstruct.ls_jid);
 299         fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
 300         sdp->sd_glock_dqs_held = 0;
 301         wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
 302 }
 303
 304 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
 305 {
 306         struct va_format vaf;
 307         va_list args;
 308
 309         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 310             test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
 311                 return;
 312
 313         va_start(args, fmt);
 314         vaf.fmt = fmt;
 315         vaf.va = &args;
 316         fs_err(sdp, "%pV", &vaf);
 317         va_end(args);
 318 }
 319
 320 int gfs2_withdraw(struct gfs2_sbd *sdp)
 321 {
 322         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 323         const struct lm_lockops *lm = ls->ls_ops;
 324
 325         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 326             test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
 327                 if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
 328                         return -1;
 329
 330                 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
 331                             TASK_UNINTERRUPTIBLE);
 332                 return -1;
 333         }
 334
 335         set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 336
 337         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 338                 fs_err(sdp, "about to withdraw this file system\n");
 339                 BUG_ON(sdp->sd_args.ar_debug);
 340
 341                 signal_our_withdraw(sdp);
 342
 343                 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 344
 345                 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
 346                         wait_for_completion(&sdp->sd_wdack);
 347
 348                 if (lm->lm_unmount) {
 349                         fs_err(sdp, "telling LM to unmount\n");
 350                         lm->lm_unmount(sdp);
 351                 }
 352                 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 353                 fs_err(sdp, "File system withdrawn\n");
 354                 dump_stack();
 355                 clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 356                 smp_mb__after_atomic();
 357                 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
 358         }
 359
 360         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 361                 panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
 362
 363         return -1;
 364 }
 365
 366 /*
 367  * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
 368  */
 369
 370 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
 371                             const char *function, char *file, unsigned int line,
 372                             bool delayed)
 373 {
 374         if (gfs2_withdrawn(sdp))
 375                 return;
 376
 377         fs_err(sdp,
 378                "fatal: assertion \"%s\" failed\n"
 379                "   function = %s, file = %s, line = %u\n",
 380                assertion, function, file, line);
 381
 382         /*
 383          * If errors=panic was specified on mount, it won't help to delay the
 384          * withdraw.
 385          */
 386         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 387                 delayed = false;
 388
 389         if (delayed)
 390                 gfs2_withdraw_delayed(sdp);
 391         else
 392                 gfs2_withdraw(sdp);
 393         dump_stack();
 394 }
 395
 396 /*
 397  * gfs2_assert_warn_i - Print a message to the console if @assertion is false
 398  */
 399
 400 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 401                         const char *function, char *file, unsigned int line)
 402 {
 403         if (time_before(jiffies,
 404                         sdp->sd_last_warning +
 405                         gfs2_tune_get(sdp, gt_complain_secs) * HZ))
 406                 return;
 407
 408         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
 409                 fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
 410                         assertion, function, file, line);
 411
 412         if (sdp->sd_args.ar_debug)
 413                 BUG();
 414         else
 415                 dump_stack();
 416
 417         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 418                 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
 419                       "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 420                       sdp->sd_fsname, assertion,
 421                       sdp->sd_fsname, function, file, line);
 422
 423         sdp->sd_last_warning = jiffies;
 424 }
 425
 426 /*
 427  * gfs2_consist_i - Flag a filesystem consistency error and withdraw
 428  */
 429
 430 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
 431                     char *file, unsigned int line)
 432 {
 433         gfs2_lm(sdp,
 434                 "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
 435                 function, file, line);
 436         gfs2_withdraw(sdp);
 437 }
 438
 439 /*
 440  * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
 441  */
 442
 443 void gfs2_consist_inode_i(struct gfs2_inode *ip,
 444                           const char *function, char *file, unsigned int line)
 445 {
 446         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 447
 448         gfs2_lm(sdp,
 449                 "fatal: filesystem consistency error\n"
 450                 "  inode = %llu %llu\n"
 451                 "  function = %s, file = %s, line = %u\n",
 452                 (unsigned long long)ip->i_no_formal_ino,
 453                 (unsigned long long)ip->i_no_addr,
 454                 function, file, line);
 455         gfs2_withdraw(sdp);
 456 }
 457
 458 /*
 459  * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
 460  */
 461
 462 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
 463                           const char *function, char *file, unsigned int line)
 464 {
 465         struct gfs2_sbd *sdp = rgd->rd_sbd;
 466         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 467
 468         sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
 469         gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
 470         gfs2_lm(sdp,
 471                 "fatal: filesystem consistency error\n"
 472                 "  RG = %llu\n"
 473                 "  function = %s, file = %s, line = %u\n",
 474                 (unsigned long long)rgd->rd_addr,
 475                 function, file, line);
 476         gfs2_withdraw(sdp);
 477 }
 478
 479 /*
 480  * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
 481  * Returns: -1 if this call withdrew the machine,
 482  *          -2 if it was already withdrawn
 483  */
 484
 485 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 486                        const char *type, const char *function, char *file,
 487                        unsigned int line)
 488 {
 489         int me;
 490
 491         gfs2_lm(sdp,
 492                 "fatal: invalid metadata block\n"
 493                 "  bh = %llu (%s)\n"
 494                 "  function = %s, file = %s, line = %u\n",
 495                 (unsigned long long)bh->b_blocknr, type,
 496                 function, file, line);
 497         me = gfs2_withdraw(sdp);
 498         return (me) ? -1 : -2;
 499 }
 500
 501 /*
 502  * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
 503  * Returns: -1 if this call withdrew the machine,
 504  *          -2 if it was already withdrawn
 505  */
 506
 507 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 508                            u16 type, u16 t, const char *function,
 509                            char *file, unsigned int line)
 510 {
 511         int me;
 512
 513         gfs2_lm(sdp,
 514                 "fatal: invalid metadata block\n"
 515                 "  bh = %llu (type: exp=%u, found=%u)\n"
 516                 "  function = %s, file = %s, line = %u\n",
 517                 (unsigned long long)bh->b_blocknr, type, t,
 518                 function, file, line);
 519         me = gfs2_withdraw(sdp);
 520         return (me) ? -1 : -2;
 521 }
 522
 523 /*
 524  * gfs2_io_error_i - Flag an I/O error and withdraw
 525  * Returns: -1 if this call withdrew the machine,
 526  *          0 if it was already withdrawn
 527  */
 528
 529 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 530                     unsigned int line)
 531 {
 532         gfs2_lm(sdp,
 533                 "fatal: I/O error\n"
 534                 "  function = %s, file = %s, line = %u\n",
 535                 function, file, line);
 536         return gfs2_withdraw(sdp);
 537 }
 538
 539 /*
 540  * gfs2_io_error_bh_i - Flag a buffer I/O error
 541  * @withdraw: withdraw the filesystem
 542  */
 543
 544 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 545                         const char *function, char *file, unsigned int line,
 546                         bool withdraw)
 547 {
 548         if (gfs2_withdrawn(sdp))
 549                 return;
 550
 551         fs_err(sdp, "fatal: I/O error\n"
 552                "  block = %llu\n"
 553                "  function = %s, file = %s, line = %u\n",
 554                (unsigned long long)bh->b_blocknr, function, file, line);
 555         if (withdraw)
 556                 gfs2_withdraw(sdp);
 557 }
 558