Merge tag 'vfs-5.8-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)
diff --combined fs/xfs/xfs_icache.c

index 0a5ac6f,d7deab6..5daef65
--- 1/fs/xfs/xfs_icache.c
--- 2/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@@ -22,7 -22,6 +22,7 @@@
   #include "xfs_dquot_item.h"
   #include "xfs_dquot.h"
   #include "xfs_reflink.h"
+ +#include "xfs_ialloc.h"
   
   #include <linux/iversion.h>
   
@@@ -63,6 -62,8 +63,6 @@@ xfs_inode_alloc
         memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
         ip->i_afp = NULL;
         ip->i_cowfp = NULL;
- -      ip->i_cnextents = 0;
- -      ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
         memset(&ip->i_df, 0, sizeof(ip->i_df));
         ip->i_flags = 0;
         ip->i_delayed_blks = 0;
@@@ -87,18 -88,15 +87,18 @@@ xfs_inode_free_callback
         case S_IFREG:
         case S_IFDIR:
         case S_IFLNK:
- -              xfs_idestroy_fork(ip, XFS_DATA_FORK);
+ +              xfs_idestroy_fork(&ip->i_df);
                 break;
         }
   
- -      if (ip->i_afp)
- -              xfs_idestroy_fork(ip, XFS_ATTR_FORK);
- -      if (ip->i_cowfp)
- -              xfs_idestroy_fork(ip, XFS_COW_FORK);
- -
+ +      if (ip->i_afp) {
+ +              xfs_idestroy_fork(ip->i_afp);
+ +              kmem_cache_free(xfs_ifork_zone, ip->i_afp);
+ +      }
+ +      if (ip->i_cowfp) {
+ +              xfs_idestroy_fork(ip->i_cowfp);
+ +              kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
+ +      }
         if (ip->i_itemp) {
                 ASSERT(!test_bit(XFS_LI_IN_AIL,
                                  &ip->i_itemp->ili_item.li_flags));
@@@ -425,7 -423,6 +425,7 @@@ xfs_iget_cache_hit
                 spin_unlock(&ip->i_flags_lock);
                 rcu_read_unlock();
   
+ +              ASSERT(!rwsem_is_locked(&inode->i_rwsem));
                 error = xfs_reinit_inode(mp, inode);
                 if (error) {
                         bool wake;
@@@ -459,6 -456,9 +459,6 @@@
                 ip->i_sick = 0;
                 ip->i_checked = 0;
   
- -              ASSERT(!rwsem_is_locked(&inode->i_rwsem));
- -              init_rwsem(&inode->i_rwsem);
- -
                 spin_unlock(&ip->i_flags_lock);
                 spin_unlock(&pag->pag_ici_lock);
         } else {
@@@ -479,7 -479,7 +479,7 @@@
                 xfs_ilock(ip, lock_flags);
   
         if (!(flags & XFS_IGET_INCORE))
-               xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+               xfs_iflags_clear(ip, XFS_ISTALE);
         XFS_STATS_INC(mp, xs_ig_found);
   
         return 0;
@@@ -510,42 -510,18 +510,42 @@@ xfs_iget_cache_miss
         if (!ip)
                 return -ENOMEM;
   
- -      error = xfs_iread(mp, tp, ip, flags);
+ +      error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags);
         if (error)
                 goto out_destroy;
   
- -      if (!xfs_inode_verify_forks(ip)) {
- -              error = -EFSCORRUPTED;
- -              goto out_destroy;
+ +      /*
+ +       * For version 5 superblocks, if we are initialising a new inode and we
+ +       * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
+ +       * simply build the new inode core with a random generation number.
+ +       *
+ +       * For version 4 (and older) superblocks, log recovery is dependent on
+ +       * the di_flushiter field being initialised from the current on-disk
+ +       * value and hence we must also read the inode off disk even when
+ +       * initializing new inodes.
+ +       */
+ +      if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
+ +          (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+ +              VFS_I(ip)->i_generation = prandom_u32();
+ +      } else {
+ +              struct xfs_dinode       *dip;
+ +              struct xfs_buf          *bp;
+ +
+ +              error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0);
+ +              if (error)
+ +                      goto out_destroy;
+ +
+ +              error = xfs_inode_from_disk(ip, dip);
+ +              if (!error)
+ +                      xfs_buf_set_ref(bp, XFS_INO_REF);
+ +              xfs_trans_brelse(tp, bp);
+ +
+ +              if (error)
+ +                      goto out_destroy;
         }
   
         trace_xfs_iget_miss(ip);
   
- -
         /*
          * Check the inode free state is valid. This also detects lookup
          * racing with unlinks.
@@@ -585,7 -561,7 +585,7 @@@
          */
         iflags = XFS_INEW;
         if (flags & XFS_IGET_DONTCACHE)
-               iflags |= XFS_IDONTCACHE;
+               d_mark_dontcache(VFS_I(ip));
         ip->i_udquot = NULL;
         ip->i_gdquot = NULL;
         ip->i_pdquot = NULL;
@@@ -761,18 -737,13 +761,18 @@@ xfs_icache_inode_is_allocated
    */
   #define XFS_LOOKUP_BATCH      32
   
- -STATIC int
- -xfs_inode_ag_walk_grab(
+ +/*
+ + * Decide if the given @ip is eligible to be a part of the inode walk, and
+ + * grab it if so.  Returns true if it's ready to go or false if we should just
+ + * ignore it.
+ + */
+ +STATIC bool
+ +xfs_inode_walk_ag_grab(
         struct xfs_inode        *ip,
         int                     flags)
   {
         struct inode            *inode = VFS_I(ip);
- -      bool                    newinos = !!(flags & XFS_AGITER_INEW_WAIT);
+ +      bool                    newinos = !!(flags & XFS_INODE_WALK_INEW_WAIT);
   
         ASSERT(rcu_read_lock_held());
   
@@@ -797,41 -768,39 +797,41 @@@
   
         /* nothing to sync during shutdown */
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- -              return -EFSCORRUPTED;
+ +              return false;
   
         /* If we can't grab the inode, it must on it's way to reclaim. */
         if (!igrab(inode))
- -              return -ENOENT;
+ +              return false;
   
         /* inode is valid */
- -      return 0;
+ +      return true;
   
   out_unlock_noent:
         spin_unlock(&ip->i_flags_lock);
- -      return -ENOENT;
+ +      return false;
   }
   
+ +/*
+ + * For a given per-AG structure @pag, grab, @execute, and rele all incore
+ + * inodes with the given radix tree @tag.
+ + */
   STATIC int
- -xfs_inode_ag_walk(
- -      struct xfs_mount        *mp,
+ +xfs_inode_walk_ag(
         struct xfs_perag        *pag,
- -      int                     (*execute)(struct xfs_inode *ip, int flags,
- -                                         void *args),
- -      int                     flags,
+ +      int                     iter_flags,
+ +      int                     (*execute)(struct xfs_inode *ip, void *args),
         void                    *args,
- -      int                     tag,
- -      int                     iter_flags)
+ +      int                     tag)
   {
+ +      struct xfs_mount        *mp = pag->pag_mount;
         uint32_t                first_index;
         int                     last_error = 0;
         int                     skipped;
- -      int                     done;
+ +      bool                    done;
         int                     nr_found;
   
   restart:
- -      done = 0;
+ +      done = false;
         skipped = 0;
         first_index = 0;
         nr_found = 0;
@@@ -842,7 -811,7 +842,7 @@@
   
                 rcu_read_lock();
   
- -              if (tag == -1)
+ +              if (tag == XFS_ICI_NO_TAG)
                         nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
                                         (void **)batch, first_index,
                                         XFS_LOOKUP_BATCH);
@@@ -864,7 -833,7 +864,7 @@@
                 for (i = 0; i < nr_found; i++) {
                         struct xfs_inode *ip = batch[i];
   
- -                      if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
+ +                      if (done || !xfs_inode_walk_ag_grab(ip, iter_flags))
                                 batch[i] = NULL;
   
                         /*
@@@ -883,7 -852,7 +883,7 @@@
                                 continue;
                         first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
                         if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- -                              done = 1;
+ +                              done = true;
                 }
   
                 /* unlock now we've grabbed the inodes. */
@@@ -892,10 -861,10 +892,10 @@@
                 for (i = 0; i < nr_found; i++) {
                         if (!batch[i])
                                 continue;
- -                      if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
+ +                      if ((iter_flags & XFS_INODE_WALK_INEW_WAIT) &&
                             xfs_iflags_test(batch[i], XFS_INEW))
                                 xfs_inew_wait(batch[i]);
- -                      error = execute(batch[i], flags, args);
+ +                      error = execute(batch[i], args);
                         xfs_irele(batch[i]);
                         if (error == -EAGAIN) {
                                 skipped++;
@@@ -920,49 -889,6 +920,49 @@@
         return last_error;
   }
   
+ +/* Fetch the next (possibly tagged) per-AG structure. */
+ +static inline struct xfs_perag *
+ +xfs_inode_walk_get_perag(
+ +      struct xfs_mount        *mp,
+ +      xfs_agnumber_t          agno,
+ +      int                     tag)
+ +{
+ +      if (tag == XFS_ICI_NO_TAG)
+ +              return xfs_perag_get(mp, agno);
+ +      return xfs_perag_get_tag(mp, agno, tag);
+ +}
+ +
+ +/*
+ + * Call the @execute function on all incore inodes matching the radix tree
+ + * @tag.
+ + */
+ +int
+ +xfs_inode_walk(
+ +      struct xfs_mount        *mp,
+ +      int                     iter_flags,
+ +      int                     (*execute)(struct xfs_inode *ip, void *args),
+ +      void                    *args,
+ +      int                     tag)
+ +{
+ +      struct xfs_perag        *pag;
+ +      int                     error = 0;
+ +      int                     last_error = 0;
+ +      xfs_agnumber_t          ag;
+ +
+ +      ag = 0;
+ +      while ((pag = xfs_inode_walk_get_perag(mp, ag, tag))) {
+ +              ag = pag->pag_agno + 1;
+ +              error = xfs_inode_walk_ag(pag, iter_flags, execute, args, tag);
+ +              xfs_perag_put(pag);
+ +              if (error) {
+ +                      last_error = error;
+ +                      if (error == -EFSCORRUPTED)
+ +                              break;
+ +              }
+ +      }
+ +      return last_error;
+ +}
+ +
   /*
    * Background scanning to trim post-EOF preallocated space. This is queued
    * based on the 'speculative_prealloc_lifetime' tunable (5m by default).
@@@ -1026,6 -952,75 +1026,6 @@@ xfs_cowblocks_worker
         xfs_queue_cowblocks(mp);
   }
   
- -int
- -xfs_inode_ag_iterator_flags(
- -      struct xfs_mount        *mp,
- -      int                     (*execute)(struct xfs_inode *ip, int flags,
- -                                         void *args),
- -      int                     flags,
- -      void                    *args,
- -      int                     iter_flags)
- -{
- -      struct xfs_perag        *pag;
- -      int                     error = 0;
- -      int                     last_error = 0;
- -      xfs_agnumber_t          ag;
- -
- -      ag = 0;
- -      while ((pag = xfs_perag_get(mp, ag))) {
- -              ag = pag->pag_agno + 1;
- -              error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
- -                                        iter_flags);
- -              xfs_perag_put(pag);
- -              if (error) {
- -                      last_error = error;
- -                      if (error == -EFSCORRUPTED)
- -                              break;
- -              }
- -      }
- -      return last_error;
- -}
- -
- -int
- -xfs_inode_ag_iterator(
- -      struct xfs_mount        *mp,
- -      int                     (*execute)(struct xfs_inode *ip, int flags,
- -                                         void *args),
- -      int                     flags,
- -      void                    *args)
- -{
- -      return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
- -}
- -
- -int
- -xfs_inode_ag_iterator_tag(
- -      struct xfs_mount        *mp,
- -      int                     (*execute)(struct xfs_inode *ip, int flags,
- -                                         void *args),
- -      int                     flags,
- -      void                    *args,
- -      int                     tag)
- -{
- -      struct xfs_perag        *pag;
- -      int                     error = 0;
- -      int                     last_error = 0;
- -      xfs_agnumber_t          ag;
- -
- -      ag = 0;
- -      while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
- -              ag = pag->pag_agno + 1;
- -              error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
- -                                        0);
- -              xfs_perag_put(pag);
- -              if (error) {
- -                      last_error = error;
- -                      if (error == -EFSCORRUPTED)
- -                              break;
- -              }
- -      }
- -      return last_error;
- -}
- -
   /*
    * Grab the inode for reclaim exclusively.
    * Return 0 if we grabbed it, non-zero otherwise.
@@@ -1133,7 -1128,7 +1133,7 @@@ restart
         if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                 xfs_iunpin_wait(ip);
                 /* xfs_iflush_abort() drops the flush lock */
- -              xfs_iflush_abort(ip, false);
+ +              xfs_iflush_abort(ip);
                 goto reclaim;
         }
         if (xfs_ipincount(ip)) {
@@@ -1424,90 -1419,59 +1424,90 @@@ xfs_reclaim_inodes_count
         return reclaimable;
   }
   
- -STATIC int
+ +STATIC bool
   xfs_inode_match_id(
         struct xfs_inode        *ip,
         struct xfs_eofblocks    *eofb)
   {
         if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
             !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
- -              return 0;
+ +              return false;
   
         if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
             !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
- -              return 0;
+ +              return false;
   
         if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
             ip->i_d.di_projid != eofb->eof_prid)
- -              return 0;
+ +              return false;
   
- -      return 1;
+ +      return true;
   }
   
   /*
    * A union-based inode filtering algorithm. Process the inode if any of the
    * criteria match. This is for global/internal scans only.
    */
- -STATIC int
+ +STATIC bool
   xfs_inode_match_id_union(
         struct xfs_inode        *ip,
         struct xfs_eofblocks    *eofb)
   {
         if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
             uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
- -              return 1;
+ +              return true;
   
         if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
             gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
- -              return 1;
+ +              return true;
   
         if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
             ip->i_d.di_projid == eofb->eof_prid)
- -              return 1;
+ +              return true;
   
- -      return 0;
+ +      return false;
+ +}
+ +
+ +/*
+ + * Is this inode @ip eligible for eof/cow block reclamation, given some
+ + * filtering parameters @eofb?  The inode is eligible if @eofb is null or
+ + * if the predicate functions match.
+ + */
+ +static bool
+ +xfs_inode_matches_eofb(
+ +      struct xfs_inode        *ip,
+ +      struct xfs_eofblocks    *eofb)
+ +{
+ +      bool                    match;
+ +
+ +      if (!eofb)
+ +              return true;
+ +
+ +      if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+ +              match = xfs_inode_match_id_union(ip, eofb);
+ +      else
+ +              match = xfs_inode_match_id(ip, eofb);
+ +      if (!match)
+ +              return false;
+ +
+ +      /* skip the inode if the file size is too small */
+ +      if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
+ +          XFS_ISIZE(ip) < eofb->eof_min_file_size)
+ +              return false;
+ +
+ +      return true;
   }
   
   STATIC int
   xfs_inode_free_eofblocks(
         struct xfs_inode        *ip,
- -      int                     flags,
         void                    *args)
   {
- -      int ret = 0;
- -      struct xfs_eofblocks *eofb = args;
- -      int match;
+ +      struct xfs_eofblocks    *eofb = args;
+ +      bool                    wait;
+ +      int                     ret;
+ +
+ +      wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC);
   
         if (!xfs_can_free_eofblocks(ip, false)) {
                 /* inode could be preallocated or append-only */
@@@ -1520,34 -1484,62 +1520,34 @@@
          * If the mapping is dirty the operation can block and wait for some
          * time. Unless we are waiting, skip it.
          */
- -      if (!(flags & SYNC_WAIT) &&
- -          mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
+ +      if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
                 return 0;
   
- -      if (eofb) {
- -              if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
- -                      match = xfs_inode_match_id_union(ip, eofb);
- -              else
- -                      match = xfs_inode_match_id(ip, eofb);
- -              if (!match)
- -                      return 0;
- -
- -              /* skip the inode if the file size is too small */
- -              if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
- -                  XFS_ISIZE(ip) < eofb->eof_min_file_size)
- -                      return 0;
- -      }
+ +      if (!xfs_inode_matches_eofb(ip, eofb))
+ +              return 0;
   
         /*
          * If the caller is waiting, return -EAGAIN to keep the background
          * scanner moving and revisit the inode in a subsequent pass.
          */
         if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
- -              if (flags & SYNC_WAIT)
- -                      ret = -EAGAIN;
- -              return ret;
+ +              if (wait)
+ +                      return -EAGAIN;
+ +              return 0;
         }
+ +
         ret = xfs_free_eofblocks(ip);
         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
   
         return ret;
   }
   
- -static int
- -__xfs_icache_free_eofblocks(
- -      struct xfs_mount        *mp,
- -      struct xfs_eofblocks    *eofb,
- -      int                     (*execute)(struct xfs_inode *ip, int flags,
- -                                         void *args),
- -      int                     tag)
- -{
- -      int flags = SYNC_TRYLOCK;
- -
- -      if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC))
- -              flags = SYNC_WAIT;
- -
- -      return xfs_inode_ag_iterator_tag(mp, execute, flags,
- -                                       eofb, tag);
- -}
- -
   int
   xfs_icache_free_eofblocks(
         struct xfs_mount        *mp,
         struct xfs_eofblocks    *eofb)
   {
- -      return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks,
+ +      return xfs_inode_walk(mp, 0, xfs_inode_free_eofblocks, eofb,
                         XFS_ICI_EOFBLOCKS_TAG);
   }
   
@@@ -1764,16 -1756,29 +1764,16 @@@ xfs_prep_free_cowblocks
   STATIC int
   xfs_inode_free_cowblocks(
         struct xfs_inode        *ip,
- -      int                     flags,
         void                    *args)
   {
         struct xfs_eofblocks    *eofb = args;
- -      int                     match;
         int                     ret = 0;
   
         if (!xfs_prep_free_cowblocks(ip))
                 return 0;
   
- -      if (eofb) {
- -              if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
- -                      match = xfs_inode_match_id_union(ip, eofb);
- -              else
- -                      match = xfs_inode_match_id(ip, eofb);
- -              if (!match)
- -                      return 0;
- -
- -              /* skip the inode if the file size is too small */
- -              if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
- -                  XFS_ISIZE(ip) < eofb->eof_min_file_size)
- -                      return 0;
- -      }
+ +      if (!xfs_inode_matches_eofb(ip, eofb))
+ +              return 0;
   
         /* Free the CoW blocks */
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@@ -1797,7 -1802,7 +1797,7 @@@ xfs_icache_free_cowblocks
         struct xfs_mount        *mp,
         struct xfs_eofblocks    *eofb)
   {
- -      return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks,
+ +      return xfs_inode_walk(mp, 0, xfs_inode_free_cowblocks, eofb,
                         XFS_ICI_COWBLOCKS_TAG);
   }
   
diff --combined fs/xfs/xfs_inode.h

index dadcf19,95209e3..47d3b39
--- 1/fs/xfs/xfs_inode.h
--- 2/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@@ -57,6 -57,9 +57,6 @@@ typedef struct xfs_inode 
   
         struct xfs_icdinode     i_d;            /* most of ondisk inode */
   
- -      xfs_extnum_t            i_cnextents;    /* # of extents in cow fork */
- -      unsigned int            i_cformat;      /* format of cow fork */
- -
         /* VFS inode */
         struct inode            i_vnode;        /* embedded VFS inode */
   
@@@ -215,8 -218,7 +215,7 @@@ static inline bool xfs_inode_has_cow_da
   #define XFS_IFLOCK            (1 << __XFS_IFLOCK_BIT)
   #define __XFS_IPINNED_BIT     8        /* wakeup key for zero pin count */
   #define XFS_IPINNED           (1 << __XFS_IPINNED_BIT)
- #define XFS_IDONTCACHE                (1 << 9) /* don't cache the inode long term */
- #define XFS_IEOFBLOCKS                (1 << 10)/* has the preallocblocks tag set */
+ #define XFS_IEOFBLOCKS                (1 << 9) /* has the preallocblocks tag set */
   /*
    * If this unlinked inode is in the middle of recovery, don't let drop_inode
    * truncate and free the inode.  This can happen if we iget the inode during
@@@ -464,7 -466,6 +463,7 @@@ int        xfs_break_layouts(struct inode *ino
   /* from xfs_iops.c */
   extern void xfs_setup_inode(struct xfs_inode *ip);
   extern void xfs_setup_iops(struct xfs_inode *ip);
+ +extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
   
   /*
    * When setting up a newly allocated inode, we need to call
@@@ -495,6 -496,8 +494,6 @@@ extern struct kmem_zone    *xfs_inode_zone
   /* The default CoW extent size hint. */
   #define XFS_DEFAULT_COWEXTSZ_HINT 32
   
- -bool xfs_inode_verify_forks(struct xfs_inode *ip);
- -
   int xfs_iunlink_init(struct xfs_perag *pag);
   void xfs_iunlink_destroy(struct xfs_perag *pag);
   
diff --combined fs/xfs/xfs_super.c

index 23a517a,a7f8e38..379cbff
--- 1/fs/xfs/xfs_super.c
--- 2/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@@ -47,39 -47,6 +47,39 @@@ static struct kset *xfs_kset;               /* top-l
   static struct xfs_kobj xfs_dbg_kobj;  /* global debug sysfs attrs */
   #endif
   
+ +enum xfs_dax_mode {
+ +      XFS_DAX_INODE = 0,
+ +      XFS_DAX_ALWAYS = 1,
+ +      XFS_DAX_NEVER = 2,
+ +};
+ +
+ +static void
+ +xfs_mount_set_dax_mode(
+ +      struct xfs_mount        *mp,
+ +      enum xfs_dax_mode       mode)
+ +{
+ +      switch (mode) {
+ +      case XFS_DAX_INODE:
+ +              mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
+ +              break;
+ +      case XFS_DAX_ALWAYS:
+ +              mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
+ +              mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
+ +              break;
+ +      case XFS_DAX_NEVER:
+ +              mp->m_flags |= XFS_MOUNT_DAX_NEVER;
+ +              mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
+ +              break;
+ +      }
+ +}
+ +
+ +static const struct constant_table dax_param_enums[] = {
+ +      {"inode",       XFS_DAX_INODE },
+ +      {"always",      XFS_DAX_ALWAYS },
+ +      {"never",       XFS_DAX_NEVER },
+ +      {}
+ +};
+ +
   /*
    * Table driven mount option parser.
    */
@@@ -92,7 -59,7 +92,7 @@@ enum 
         Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
         Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
         Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
- -      Opt_discard, Opt_nodiscard, Opt_dax,
+ +      Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
   };
   
   static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@@ -136,7 -103,6 +136,7 @@@
         fsparam_flag("discard",         Opt_discard),
         fsparam_flag("nodiscard",       Opt_nodiscard),
         fsparam_flag("dax",             Opt_dax),
+ +      fsparam_enum("dax",             Opt_dax_enum, dax_param_enums),
         {}
   };
   
@@@ -163,8 -129,7 +163,8 @@@ xfs_fs_show_options
                 { XFS_MOUNT_GRPID,              ",grpid" },
                 { XFS_MOUNT_DISCARD,            ",discard" },
                 { XFS_MOUNT_LARGEIO,            ",largeio" },
- -              { XFS_MOUNT_DAX,                ",dax" },
+ +              { XFS_MOUNT_DAX_ALWAYS,         ",dax=always" },
+ +              { XFS_MOUNT_DAX_NEVER,          ",dax=never" },
                 { 0, NULL }
         };
         struct xfs_mount        *mp = XFS_M(root->d_sb);
@@@ -340,7 -305,7 +340,7 @@@ voi
   xfs_blkdev_issue_flush(
         xfs_buftarg_t           *buftarg)
   {
- -      blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
+ +      blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS);
   }
   
   STATIC void
@@@ -737,7 -702,7 +737,7 @@@ xfs_fs_drop_inode
                 return 0;
         }
   
-       return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
+       return generic_drop_inode(inode);
   }
   
   static void
@@@ -807,8 -772,7 +807,8 @@@ xfs_fs_statfs
         statp->f_blocks = sbp->sb_dblocks - lsize;
         spin_unlock(&mp->m_sb_lock);
   
- -      statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
+ +      /* make sure statp->f_bfree does not underflow */
+ +      statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0);
         statp->f_bavail = statp->f_bfree;
   
         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
@@@ -874,10 -838,8 +874,10 @@@ xfs_restore_resvblks(struct xfs_mount *
    * there is no log replay required to write the inodes to disk - this is the
    * primary difference between a sync and a quiesce.
    *
- - * Note: xfs_log_quiesce() stops background log work - the callers must ensure
- - * it is started again when appropriate.
+ + * We cancel log work early here to ensure all transactions the log worker may
+ + * run have finished before we clean up and log the superblock and write an
+ + * unmount record. The unfreeze process is responsible for restarting the log
+ + * worker correctly.
    */
   void
   xfs_quiesce_attr(
@@@ -885,7 -847,9 +885,7 @@@
   {
         int     error = 0;
   
- -      /* wait for all modifications to complete */
- -      while (atomic_read(&mp->m_active_trans) > 0)
- -              delay(100);
+ +      cancel_delayed_work_sync(&mp->m_log->l_work);
   
         /* force the log to unpin objects from the now complete transactions */
         xfs_log_force(mp, XFS_LOG_SYNC);
@@@ -899,6 -863,12 +899,6 @@@
         if (error)
                 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
                                 "Frozen image may not be consistent.");
- -      /*
- -       * Just warn here till VFS can correctly support
- -       * read-only remount without racing.
- -       */
- -      WARN_ON(atomic_read(&mp->m_active_trans) != 0);
- -
         xfs_log_quiesce(mp);
   }
   
@@@ -1291,10 -1261,7 +1291,10 @@@ xfs_fc_parse_param
                 return 0;
   #ifdef CONFIG_FS_DAX
         case Opt_dax:
- -              mp->m_flags |= XFS_MOUNT_DAX;
+ +              xfs_mount_set_dax_mode(mp, XFS_DAX_ALWAYS);
+ +              return 0;
+ +      case Opt_dax_enum:
+ +              xfs_mount_set_dax_mode(mp, result.uint_32);
                 return 0;
   #endif
         default:
@@@ -1487,7 -1454,7 +1487,7 @@@ xfs_fc_fill_super
         if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
                 sb->s_flags |= SB_I_VERSION;
   
- -      if (mp->m_flags & XFS_MOUNT_DAX) {
+ +      if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
                 bool rtdev_is_dax = false, datadev_is_dax;
   
                 xfs_warn(mp,
@@@ -1501,7 -1468,7 +1501,7 @@@
                 if (!rtdev_is_dax && !datadev_is_dax) {
                         xfs_alert(mp,
                         "DAX unsupported by block device. Turning off DAX.");
- -                      mp->m_flags &= ~XFS_MOUNT_DAX;
+ +                      xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
                 }
                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
                         xfs_alert(mp,
@@@ -1787,6 -1754,7 +1787,6 @@@ static int xfs_init_fs_context
         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
         spin_lock_init(&mp->m_perag_lock);
         mutex_init(&mp->m_growlock);
- -      atomic_set(&mp->m_active_trans, 0);
         INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
diff --combined include/linux/fs.h

index 1ae50ae,7c3e8c0..f3e167f
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -292,7 -292,6 +292,7 @@@ enum positive_aop_returns 
   struct page;
   struct address_space;
   struct writeback_control;
+ +struct readahead_control;
   
   /*
    * Write life time hint values.
@@@ -376,7 -375,6 +376,7 @@@ struct address_space_operations 
          */
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
+ +      void (*readahead)(struct readahead_control *);
   
         int (*write_begin)(struct file *, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
@@@ -978,7 -976,6 +978,7 @@@ struct file 
   #endif /* #ifdef CONFIG_EPOLL */
         struct address_space    *f_mapping;
         errseq_t                f_wb_err;
+ +      errseq_t                f_sb_err; /* for syncfs */
   } __randomize_layout
     __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */
   
@@@ -986,7 -983,7 +986,7 @@@ struct file_handle 
         __u32 handle_bytes;
         int handle_type;
         /* file identifier */
- -      unsigned char f_handle[0];
+ +      unsigned char f_handle[];
   };
   
   static inline struct file *get_file(struct file *f)
@@@ -1523,9 -1520,6 +1523,9 @@@ struct super_block 
         /* Being remounted read-only */
         int s_readonly_remount;
   
+ +      /* per-sb errseq_t for reporting writeback errors via syncfs */
+ +      errseq_t s_wb_err;
+ +
         /* AIO completions deferred from interrupt context */
         struct workqueue_struct *s_dio_done_wq;
         struct hlist_head s_pins;
@@@ -1727,11 -1721,7 +1727,11 @@@ extern int vfs_link(struct dentry *, st
   extern int vfs_rmdir(struct inode *, struct dentry *);
   extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
   extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
- -extern int vfs_whiteout(struct inode *, struct dentry *);
+ +
+ +static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+ +{
+ +      return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+ +}
   
   extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
                                   int open_flag);
@@@ -2166,6 -2156,8 +2166,8 @@@ static inline void kiocb_clone(struct k
    *
    * I_CREATING         New object's inode in the middle of setting up.
    *
+  * I_DONTCACHE                Evict inode as soon as it is not used anymore.
+  *
    * Q: What is the difference between I_WILL_FREE and I_FREEING?
    */
   #define I_DIRTY_SYNC          (1 << 0)
@@@ -2188,6 -2180,7 +2190,7 @@@
   #define I_WB_SWITCH           (1 << 13)
   #define I_OVL_INUSE           (1 << 14)
   #define I_CREATING            (1 << 15)
+ #define I_DONTCACHE           (1 << 16)
   
   #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
   #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@@ -2591,6 -2584,7 +2594,6 @@@ extern struct kmem_cache *names_cachep
   #ifdef CONFIG_BLOCK
   extern int register_blkdev(unsigned int, const char *);
   extern void unregister_blkdev(unsigned int, const char *);
- -extern void bdev_unhash_inode(dev_t dev);
   extern struct block_device *bdget(dev_t);
   extern struct block_device *bdgrab(struct block_device *bdev);
   extern void bd_set_size(struct block_device *, loff_t size);
@@@ -2646,6 -2640,7 +2649,6 @@@ extern int sync_filesystem(struct super
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
   #ifdef CONFIG_BLOCK
- -extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
   extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
   extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
   extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
@@@ -2731,6 -2726,7 +2734,6 @@@ extern bool is_bad_inode(struct inode *
   extern int revalidate_disk(struct gendisk *);
   extern int check_disk_change(struct block_device *);
   extern int __invalidate_device(struct block_device *, bool);
- -extern int invalidate_partition(struct gendisk *, int);
   #endif
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
@@@ -2834,18 -2830,6 +2837,18 @@@ static inline errseq_t filemap_sample_w
         return errseq_sample(&mapping->wb_err);
   }
   
+ +/**
+ + * file_sample_sb_err - sample the current errseq_t to test for later errors
+ + * @mapping: mapping to be sampled
+ + *
+ + * Grab the most current superblock-level errseq_t value for the given
+ + * struct file.
+ + */
+ +static inline errseq_t file_sample_sb_err(struct file *file)
+ +{
+ +      return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+ +}
+ +
   static inline int filemap_nr_thps(struct address_space *mapping)
   {
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@@ -3068,8 -3052,10 +3071,10 @@@ extern int inode_needs_sync(struct inod
   extern int generic_delete_inode(struct inode *inode);
   static inline int generic_drop_inode(struct inode *inode)
   {
-       return !inode->i_nlink || inode_unhashed(inode);
+       return !inode->i_nlink || inode_unhashed(inode) ||
+               (inode->i_state & I_DONTCACHE);
   }
+ extern void d_mark_dontcache(struct inode *inode);
   
   extern struct inode *ilookup5_nowait(struct super_block *sb,
                 unsigned long hashval, int (*test)(struct inode *, void *),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jun 2020 02:48:41 +0000 (19:48 -0700)
		1	2
fs/xfs/xfs_icache.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_inode.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history