fs/xfs/libxfs/xfs_inode_buf.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "xfs_inode.h"
  14 #include "xfs_errortag.h"
  15 #include "xfs_error.h"
  16 #include "xfs_icache.h"
  17 #include "xfs_trans.h"
  18 #include "xfs_ialloc.h"
  19 #include "xfs_dir2.h"
  20
  21 #include <linux/iversion.h>
  22
  23 /*
  24  * If we are doing readahead on an inode buffer, we might be in log recovery
  25  * reading an inode allocation buffer that hasn't yet been replayed, and hence
  26  * has not had the inode cores stamped into it. Hence for readahead, the buffer
  27  * may be potentially invalid.
  28  *
  29  * If the readahead buffer is invalid, we need to mark it with an error and
  30  * clear the DONE status of the buffer so that a followup read will re-read it
  31  * from disk. We don't report the error otherwise to avoid warnings during log
  32  * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
  33  * because all we want to do is say readahead failed; there is no-one to report
  34  * the error to, so this will distinguish it from a non-ra verifier failure.
  35  * Changes to this readahead error behaviour also need to be reflected in
  36  * xfs_dquot_buf_readahead_verify().
  37  */
  38 static void
  39 xfs_inode_buf_verify(
  40         struct xfs_buf  *bp,
  41         bool            readahead)
  42 {
  43         struct xfs_mount *mp = bp->b_mount;
  44         xfs_agnumber_t  agno;
  45         int             i;
  46         int             ni;
  47
  48         /*
  49          * Validate the magic number and version of every inode in the buffer
  50          */
  51         agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  52         ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  53         for (i = 0; i < ni; i++) {
  54                 int             di_ok;
  55                 xfs_dinode_t    *dip;
  56                 xfs_agino_t     unlinked_ino;
  57
  58                 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  59                 unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  60                 di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
  61                         xfs_dinode_good_version(&mp->m_sb, dip->di_version) &&
  62                         xfs_verify_agino_or_null(mp, agno, unlinked_ino);
  63                 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  64                                                 XFS_ERRTAG_ITOBP_INOTOBP))) {
  65                         if (readahead) {
  66                                 bp->b_flags &= ~XBF_DONE;
  67                                 xfs_buf_ioerror(bp, -EIO);
  68                                 return;
  69                         }
  70
  71 #ifdef DEBUG
  72                         xfs_alert(mp,
  73                                 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
  74                                 (unsigned long long)bp->b_bn, i,
  75                                 be16_to_cpu(dip->di_magic));
  76 #endif
  77                         xfs_buf_verifier_error(bp, -EFSCORRUPTED,
  78                                         __func__, dip, sizeof(*dip),
  79                                         NULL);
  80                         return;
  81                 }
  82         }
  83 }
  84
  85
  86 static void
  87 xfs_inode_buf_read_verify(
  88         struct xfs_buf  *bp)
  89 {
  90         xfs_inode_buf_verify(bp, false);
  91 }
  92
  93 static void
  94 xfs_inode_buf_readahead_verify(
  95         struct xfs_buf  *bp)
  96 {
  97         xfs_inode_buf_verify(bp, true);
  98 }
  99
 100 static void
 101 xfs_inode_buf_write_verify(
 102         struct xfs_buf  *bp)
 103 {
 104         xfs_inode_buf_verify(bp, false);
 105 }
 106
 107 const struct xfs_buf_ops xfs_inode_buf_ops = {
 108         .name = "xfs_inode",
 109         .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 110                      cpu_to_be16(XFS_DINODE_MAGIC) },
 111         .verify_read = xfs_inode_buf_read_verify,
 112         .verify_write = xfs_inode_buf_write_verify,
 113 };
 114
 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 116         .name = "xfs_inode_ra",
 117         .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 118                      cpu_to_be16(XFS_DINODE_MAGIC) },
 119         .verify_read = xfs_inode_buf_readahead_verify,
 120         .verify_write = xfs_inode_buf_write_verify,
 121 };
 122
 123
 124 /*
 125  * This routine is called to map an inode to the buffer containing the on-disk
 126  * version of the inode.  It returns a pointer to the buffer containing the
 127  * on-disk inode in the bpp parameter.
 128  */
 129 int
 130 xfs_imap_to_bp(
 131         struct xfs_mount        *mp,
 132         struct xfs_trans        *tp,
 133         struct xfs_imap         *imap,
 134         struct xfs_buf          **bpp)
 135 {
 136         return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 137                                    imap->im_len, XBF_UNMAPPED, bpp,
 138                                    &xfs_inode_buf_ops);
 139 }
 140
 141 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
 142 {
 143         struct timespec64       tv;
 144         uint32_t                n;
 145
 146         tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
 147         tv.tv_nsec = n;
 148
 149         return tv;
 150 }
 151
 152 /* Convert an ondisk timestamp to an incore timestamp. */
 153 struct timespec64
 154 xfs_inode_from_disk_ts(
 155         struct xfs_dinode               *dip,
 156         const xfs_timestamp_t           ts)
 157 {
 158         struct timespec64               tv;
 159         struct xfs_legacy_timestamp     *lts;
 160
 161         if (xfs_dinode_has_bigtime(dip))
 162                 return xfs_inode_decode_bigtime(be64_to_cpu(ts));
 163
 164         lts = (struct xfs_legacy_timestamp *)&ts;
 165         tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
 166         tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
 167
 168         return tv;
 169 }
 170
 171 int
 172 xfs_inode_from_disk(
 173         struct xfs_inode        *ip,
 174         struct xfs_dinode       *from)
 175 {
 176         struct inode            *inode = VFS_I(ip);
 177         int                     error;
 178         xfs_failaddr_t          fa;
 179
 180         ASSERT(ip->i_cowfp == NULL);
 181         ASSERT(ip->i_afp == NULL);
 182
 183         fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
 184         if (fa) {
 185                 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
 186                                 sizeof(*from), fa);
 187                 return -EFSCORRUPTED;
 188         }
 189
 190         /*
 191          * First get the permanent information that is needed to allocate an
 192          * inode. If the inode is unused, mode is zero and we shouldn't mess
 193          * with the uninitialized part of it.
 194          */
 195         if (!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb))
 196                 ip->i_flushiter = be16_to_cpu(from->di_flushiter);
 197         inode->i_generation = be32_to_cpu(from->di_gen);
 198         inode->i_mode = be16_to_cpu(from->di_mode);
 199         if (!inode->i_mode)
 200                 return 0;
 201
 202         /*
 203          * Convert v1 inodes immediately to v2 inode format as this is the
 204          * minimum inode version format we support in the rest of the code.
 205          * They will also be unconditionally written back to disk as v2 inodes.
 206          */
 207         if (unlikely(from->di_version == 1)) {
 208                 set_nlink(inode, be16_to_cpu(from->di_onlink));
 209                 ip->i_projid = 0;
 210         } else {
 211                 set_nlink(inode, be32_to_cpu(from->di_nlink));
 212                 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
 213                                         be16_to_cpu(from->di_projid_lo);
 214         }
 215
 216         i_uid_write(inode, be32_to_cpu(from->di_uid));
 217         i_gid_write(inode, be32_to_cpu(from->di_gid));
 218
 219         /*
 220          * Time is signed, so need to convert to signed 32 bit before
 221          * storing in inode timestamp which may be 64 bit. Otherwise
 222          * a time before epoch is converted to a time long after epoch
 223          * on 64 bit systems.
 224          */
 225         inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
 226         inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
 227         inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
 228
 229         ip->i_disk_size = be64_to_cpu(from->di_size);
 230         ip->i_nblocks = be64_to_cpu(from->di_nblocks);
 231         ip->i_extsize = be32_to_cpu(from->di_extsize);
 232         ip->i_forkoff = from->di_forkoff;
 233         ip->i_diflags   = be16_to_cpu(from->di_flags);
 234
 235         if (from->di_dmevmask || from->di_dmstate)
 236                 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
 237
 238         if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 239                 inode_set_iversion_queried(inode,
 240                                            be64_to_cpu(from->di_changecount));
 241                 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
 242                 ip->i_diflags2 = be64_to_cpu(from->di_flags2);
 243                 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
 244         }
 245
 246         error = xfs_iformat_data_fork(ip, from);
 247         if (error)
 248                 return error;
 249         if (from->di_forkoff) {
 250                 error = xfs_iformat_attr_fork(ip, from);
 251                 if (error)
 252                         goto out_destroy_data_fork;
 253         }
 254         if (xfs_is_reflink_inode(ip))
 255                 xfs_ifork_init_cow(ip);
 256         return 0;
 257
 258 out_destroy_data_fork:
 259         xfs_idestroy_fork(&ip->i_df);
 260         return error;
 261 }
 262
 263 /* Convert an incore timestamp to an ondisk timestamp. */
 264 static inline xfs_timestamp_t
 265 xfs_inode_to_disk_ts(
 266         struct xfs_inode                *ip,
 267         const struct timespec64         tv)
 268 {
 269         struct xfs_legacy_timestamp     *lts;
 270         xfs_timestamp_t                 ts;
 271
 272         if (xfs_inode_has_bigtime(ip))
 273                 return cpu_to_be64(xfs_inode_encode_bigtime(tv));
 274
 275         lts = (struct xfs_legacy_timestamp *)&ts;
 276         lts->t_sec = cpu_to_be32(tv.tv_sec);
 277         lts->t_nsec = cpu_to_be32(tv.tv_nsec);
 278
 279         return ts;
 280 }
 281
 282 void
 283 xfs_inode_to_disk(
 284         struct xfs_inode        *ip,
 285         struct xfs_dinode       *to,
 286         xfs_lsn_t               lsn)
 287 {
 288         struct inode            *inode = VFS_I(ip);
 289
 290         to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 291         to->di_onlink = 0;
 292
 293         to->di_format = xfs_ifork_format(&ip->i_df);
 294         to->di_uid = cpu_to_be32(i_uid_read(inode));
 295         to->di_gid = cpu_to_be32(i_gid_read(inode));
 296         to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
 297         to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
 298
 299         memset(to->di_pad, 0, sizeof(to->di_pad));
 300         to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
 301         to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
 302         to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
 303         to->di_nlink = cpu_to_be32(inode->i_nlink);
 304         to->di_gen = cpu_to_be32(inode->i_generation);
 305         to->di_mode = cpu_to_be16(inode->i_mode);
 306
 307         to->di_size = cpu_to_be64(ip->i_disk_size);
 308         to->di_nblocks = cpu_to_be64(ip->i_nblocks);
 309         to->di_extsize = cpu_to_be32(ip->i_extsize);
 310         to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
 311         to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
 312         to->di_forkoff = ip->i_forkoff;
 313         to->di_aformat = xfs_ifork_format(ip->i_afp);
 314         to->di_flags = cpu_to_be16(ip->i_diflags);
 315
 316         if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 317                 to->di_version = 3;
 318                 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 319                 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
 320                 to->di_flags2 = cpu_to_be64(ip->i_diflags2);
 321                 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
 322                 to->di_ino = cpu_to_be64(ip->i_ino);
 323                 to->di_lsn = cpu_to_be64(lsn);
 324                 memset(to->di_pad2, 0, sizeof(to->di_pad2));
 325                 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 326                 to->di_flushiter = 0;
 327         } else {
 328                 to->di_version = 2;
 329                 to->di_flushiter = cpu_to_be16(ip->i_flushiter);
 330         }
 331 }
 332
 333 static xfs_failaddr_t
 334 xfs_dinode_verify_fork(
 335         struct xfs_dinode       *dip,
 336         struct xfs_mount        *mp,
 337         int                     whichfork)
 338 {
 339         uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 340
 341         switch (XFS_DFORK_FORMAT(dip, whichfork)) {
 342         case XFS_DINODE_FMT_LOCAL:
 343                 /*
 344                  * no local regular files yet
 345                  */
 346                 if (whichfork == XFS_DATA_FORK) {
 347                         if (S_ISREG(be16_to_cpu(dip->di_mode)))
 348                                 return __this_address;
 349                         if (be64_to_cpu(dip->di_size) >
 350                                         XFS_DFORK_SIZE(dip, mp, whichfork))
 351                                 return __this_address;
 352                 }
 353                 if (di_nextents)
 354                         return __this_address;
 355                 break;
 356         case XFS_DINODE_FMT_EXTENTS:
 357                 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 358                         return __this_address;
 359                 break;
 360         case XFS_DINODE_FMT_BTREE:
 361                 if (whichfork == XFS_ATTR_FORK) {
 362                         if (di_nextents > MAXAEXTNUM)
 363                                 return __this_address;
 364                 } else if (di_nextents > MAXEXTNUM) {
 365                         return __this_address;
 366                 }
 367                 break;
 368         default:
 369                 return __this_address;
 370         }
 371         return NULL;
 372 }
 373
 374 static xfs_failaddr_t
 375 xfs_dinode_verify_forkoff(
 376         struct xfs_dinode       *dip,
 377         struct xfs_mount        *mp)
 378 {
 379         if (!dip->di_forkoff)
 380                 return NULL;
 381
 382         switch (dip->di_format)  {
 383         case XFS_DINODE_FMT_DEV:
 384                 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 385                         return __this_address;
 386                 break;
 387         case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 388         case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 389         case XFS_DINODE_FMT_BTREE:
 390                 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
 391                         return __this_address;
 392                 break;
 393         default:
 394                 return __this_address;
 395         }
 396         return NULL;
 397 }
 398
 399 xfs_failaddr_t
 400 xfs_dinode_verify(
 401         struct xfs_mount        *mp,
 402         xfs_ino_t               ino,
 403         struct xfs_dinode       *dip)
 404 {
 405         xfs_failaddr_t          fa;
 406         uint16_t                mode;
 407         uint16_t                flags;
 408         uint64_t                flags2;
 409         uint64_t                di_size;
 410
 411         if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 412                 return __this_address;
 413
 414         /* Verify v3 integrity information first */
 415         if (dip->di_version >= 3) {
 416                 if (!xfs_sb_version_has_v3inode(&mp->m_sb))
 417                         return __this_address;
 418                 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 419                                       XFS_DINODE_CRC_OFF))
 420                         return __this_address;
 421                 if (be64_to_cpu(dip->di_ino) != ino)
 422                         return __this_address;
 423                 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 424                         return __this_address;
 425         }
 426
 427         /* don't allow invalid i_size */
 428         di_size = be64_to_cpu(dip->di_size);
 429         if (di_size & (1ULL << 63))
 430                 return __this_address;
 431
 432         mode = be16_to_cpu(dip->di_mode);
 433         if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 434                 return __this_address;
 435
 436         /* No zero-length symlinks/dirs. */
 437         if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 438                 return __this_address;
 439
 440         /* Fork checks carried over from xfs_iformat_fork */
 441         if (mode &&
 442             be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 443                         be64_to_cpu(dip->di_nblocks))
 444                 return __this_address;
 445
 446         if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 447                 return __this_address;
 448
 449         flags = be16_to_cpu(dip->di_flags);
 450
 451         if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 452                 return __this_address;
 453
 454         /* check for illegal values of forkoff */
 455         fa = xfs_dinode_verify_forkoff(dip, mp);
 456         if (fa)
 457                 return fa;
 458
 459         /* Do we have appropriate data fork formats for the mode? */
 460         switch (mode & S_IFMT) {
 461         case S_IFIFO:
 462         case S_IFCHR:
 463         case S_IFBLK:
 464         case S_IFSOCK:
 465                 if (dip->di_format != XFS_DINODE_FMT_DEV)
 466                         return __this_address;
 467                 break;
 468         case S_IFREG:
 469         case S_IFLNK:
 470         case S_IFDIR:
 471                 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 472                 if (fa)
 473                         return fa;
 474                 break;
 475         case 0:
 476                 /* Uninitialized inode ok. */
 477                 break;
 478         default:
 479                 return __this_address;
 480         }
 481
 482         if (dip->di_forkoff) {
 483                 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 484                 if (fa)
 485                         return fa;
 486         } else {
 487                 /*
 488                  * If there is no fork offset, this may be a freshly-made inode
 489                  * in a new disk cluster, in which case di_aformat is zeroed.
 490                  * Otherwise, such an inode must be in EXTENTS format; this goes
 491                  * for freed inodes as well.
 492                  */
 493                 switch (dip->di_aformat) {
 494                 case 0:
 495                 case XFS_DINODE_FMT_EXTENTS:
 496                         break;
 497                 default:
 498                         return __this_address;
 499                 }
 500                 if (dip->di_anextents)
 501                         return __this_address;
 502         }
 503
 504         /* extent size hint validation */
 505         fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 506                         mode, flags);
 507         if (fa)
 508                 return fa;
 509
 510         /* only version 3 or greater inodes are extensively verified here */
 511         if (dip->di_version < 3)
 512                 return NULL;
 513
 514         flags2 = be64_to_cpu(dip->di_flags2);
 515
 516         /* don't allow reflink/cowextsize if we don't have reflink */
 517         if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 518              !xfs_sb_version_hasreflink(&mp->m_sb))
 519                 return __this_address;
 520
 521         /* only regular files get reflink */
 522         if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 523                 return __this_address;
 524
 525         /* don't let reflink and realtime mix */
 526         if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 527                 return __this_address;
 528
 529         /* COW extent size hint validation */
 530         fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 531                         mode, flags, flags2);
 532         if (fa)
 533                 return fa;
 534
 535         /* bigtime iflag can only happen on bigtime filesystems */
 536         if (xfs_dinode_has_bigtime(dip) &&
 537             !xfs_sb_version_hasbigtime(&mp->m_sb))
 538                 return __this_address;
 539
 540         return NULL;
 541 }
 542
 543 void
 544 xfs_dinode_calc_crc(
 545         struct xfs_mount        *mp,
 546         struct xfs_dinode       *dip)
 547 {
 548         uint32_t                crc;
 549
 550         if (dip->di_version < 3)
 551                 return;
 552
 553         ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
 554         crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 555                               XFS_DINODE_CRC_OFF);
 556         dip->di_crc = xfs_end_cksum(crc);
 557 }
 558
 559 /*
 560  * Validate di_extsize hint.
 561  *
 562  * 1. Extent size hint is only valid for directories and regular files.
 563  * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
 564  * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
 565  * 4. Hint cannot be larger than MAXTEXTLEN.
 566  * 5. Can be changed on directories at any time.
 567  * 6. Hint value of 0 turns off hints, clears inode flags.
 568  * 7. Extent size must be a multiple of the appropriate block size.
 569  *    For realtime files, this is the rt extent size.
 570  * 8. For non-realtime files, the extent size hint must be limited
 571  *    to half the AG size to avoid alignment extending the extent beyond the
 572  *    limits of the AG.
 573  */
 574 xfs_failaddr_t
 575 xfs_inode_validate_extsize(
 576         struct xfs_mount                *mp,
 577         uint32_t                        extsize,
 578         uint16_t                        mode,
 579         uint16_t                        flags)
 580 {
 581         bool                            rt_flag;
 582         bool                            hint_flag;
 583         bool                            inherit_flag;
 584         uint32_t                        extsize_bytes;
 585         uint32_t                        blocksize_bytes;
 586
 587         rt_flag = (flags & XFS_DIFLAG_REALTIME);
 588         hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 589         inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 590         extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 591
 592         /*
 593          * This comment describes a historic gap in this verifier function.
 594          *
 595          * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
 596          * function has never checked that the extent size hint is an integer
 597          * multiple of the realtime extent size.  Since we allow users to set
 598          * this combination  on non-rt filesystems /and/ to change the rt
 599          * extent size when adding a rt device to a filesystem, the net effect
 600          * is that users can configure a filesystem anticipating one rt
 601          * geometry and change their minds later.  Directories do not use the
 602          * extent size hint, so this is harmless for them.
 603          *
 604          * If a directory with a misaligned extent size hint is allowed to
 605          * propagate that hint into a new regular realtime file, the result
 606          * is that the inode cluster buffer verifier will trigger a corruption
 607          * shutdown the next time it is run, because the verifier has always
 608          * enforced the alignment rule for regular files.
 609          *
 610          * Because we allow administrators to set a new rt extent size when
 611          * adding a rt section, we cannot add a check to this verifier because
 612          * that will result a new source of directory corruption errors when
 613          * reading an existing filesystem.  Instead, we rely on callers to
 614          * decide when alignment checks are appropriate, and fix things up as
 615          * needed.
 616          */
 617
 618         if (rt_flag)
 619                 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
 620         else
 621                 blocksize_bytes = mp->m_sb.sb_blocksize;
 622
 623         if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 624                 return __this_address;
 625
 626         if (hint_flag && !S_ISREG(mode))
 627                 return __this_address;
 628
 629         if (inherit_flag && !S_ISDIR(mode))
 630                 return __this_address;
 631
 632         if ((hint_flag || inherit_flag) && extsize == 0)
 633                 return __this_address;
 634
 635         /* free inodes get flags set to zero but extsize remains */
 636         if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 637                 return __this_address;
 638
 639         if (extsize_bytes % blocksize_bytes)
 640                 return __this_address;
 641
 642         if (extsize > MAXEXTLEN)
 643                 return __this_address;
 644
 645         if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 646                 return __this_address;
 647
 648         return NULL;
 649 }
 650
 651 /*
 652  * Validate di_cowextsize hint.
 653  *
 654  * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
 655  *    The inode does not have to have any shared blocks, but it must be a v3.
 656  * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
 657  *    for a directory, the hint is propagated to new files.
 658  * 3. Can be changed on files & directories at any time.
 659  * 4. Hint value of 0 turns off hints, clears inode flags.
 660  * 5. Extent size must be a multiple of the appropriate block size.
 661  * 6. The extent size hint must be limited to half the AG size to avoid
 662  *    alignment extending the extent beyond the limits of the AG.
 663  */
 664 xfs_failaddr_t
 665 xfs_inode_validate_cowextsize(
 666         struct xfs_mount                *mp,
 667         uint32_t                        cowextsize,
 668         uint16_t                        mode,
 669         uint16_t                        flags,
 670         uint64_t                        flags2)
 671 {
 672         bool                            rt_flag;
 673         bool                            hint_flag;
 674         uint32_t                        cowextsize_bytes;
 675
 676         rt_flag = (flags & XFS_DIFLAG_REALTIME);
 677         hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 678         cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 679
 680         if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
 681                 return __this_address;
 682
 683         if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 684                 return __this_address;
 685
 686         if (hint_flag && cowextsize == 0)
 687                 return __this_address;
 688
 689         /* free inodes get flags set to zero but cowextsize remains */
 690         if (mode && !hint_flag && cowextsize != 0)
 691                 return __this_address;
 692
 693         if (hint_flag && rt_flag)
 694                 return __this_address;
 695
 696         if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 697                 return __this_address;
 698
 699         if (cowextsize > MAXEXTLEN)
 700                 return __this_address;
 701
 702         if (cowextsize > mp->m_sb.sb_agblocks / 2)
 703                 return __this_address;
 704
 705         return NULL;
 706 }