Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)
diff --git a/Documentation/filesystems/ext4/blockmap.rst b/Documentation/filesystems/ext4/blockmap.rst

index 2bd9904..cc59654 100644 (file)
--- a/Documentation/filesystems/ext4/blockmap.rst
+++ b/Documentation/filesystems/ext4/blockmap.rst
@@ -1,7 +1,7 @@
  .. SPDX-License-Identifier: GPL-2.0
  
  +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| i.i_block Offset   | Where It Points                                                                                                                                                                                                              |
+| i.i_block Offset    | Where It Points                                                                                                                                                                                                              |
  +=====================+==============================================================================================================================================================================================================================+
  | 0 to 11             | Direct map to file blocks 0 to 11.                                                                                                                                                                                           |
  +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c

index 841fa6d..641abfa 100644 (file)
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -517,36 +517,36 @@ bad_block:
         /* Here we know that we can set the new attribute. */
  
         if (header) {
-               /* assert(header == HDR(bh)); */
+               int offset;
+
                 lock_buffer(bh);
                 if (header->h_refcount == cpu_to_le32(1)) {
                         __u32 hash = le32_to_cpu(header->h_hash);
+                       struct mb_cache_entry *oe;
  
-                       ea_bdebug(bh, "modifying in-place");
+                       oe = mb_cache_entry_delete_or_get(EA_BLOCK_CACHE(inode),
+                                       hash, bh->b_blocknr);
+                       if (!oe) {
+                               ea_bdebug(bh, "modifying in-place");
+                               goto update_block;
+                       }
                         /*
-                        * This must happen under buffer lock for
-                        * ext2_xattr_set2() to reliably detect modified block
+                        * Someone is trying to reuse the block, leave it alone
                          */
-                       mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
-                                             bh->b_blocknr);
-
-                       /* keep the buffer locked while modifying it. */
-               } else {
-                       int offset;
-
-                       unlock_buffer(bh);
-                       ea_bdebug(bh, "cloning");
-                       header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
-                       error = -ENOMEM;
-                       if (header == NULL)
-                               goto cleanup;
-                       header->h_refcount = cpu_to_le32(1);
-
-                       offset = (char *)here - bh->b_data;
-                       here = ENTRY((char *)header + offset);
-                       offset = (char *)last - bh->b_data;
-                       last = ENTRY((char *)header + offset);
+                       mb_cache_entry_put(EA_BLOCK_CACHE(inode), oe);
                 }
+               unlock_buffer(bh);
+               ea_bdebug(bh, "cloning");
+               header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
+               error = -ENOMEM;
+               if (header == NULL)
+                       goto cleanup;
+               header->h_refcount = cpu_to_le32(1);
+
+               offset = (char *)here - bh->b_data;
+               here = ENTRY((char *)header + offset);
+               offset = (char *)last - bh->b_data;
+               last = ENTRY((char *)header + offset);
         } else {
                 /* Allocate a buffer where we construct the new block. */
                 header = kzalloc(sb->s_blocksize, GFP_KERNEL);
@@ -559,6 +559,7 @@ bad_block:
                 last = here = ENTRY(header+1);
         }
  
+update_block:
         /* Iff we are modifying the block in-place, bh is locked here. */
  
         if (not_found) {
@@ -651,6 +652,55 @@ cleanup:
         return error;
  }
  
+static void ext2_xattr_release_block(struct inode *inode,
+                                    struct buffer_head *bh)
+{
+       struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+
+retry_ref:
+       lock_buffer(bh);
+       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+               __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+               struct mb_cache_entry *oe;
+
+               /*
+                * This must happen under buffer lock to properly
+                * serialize with ext2_xattr_set() reusing the block.
+                */
+               oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+                                                 bh->b_blocknr);
+               if (oe) {
+                       /*
+                        * Someone is trying to reuse the block. Wait
+                        * and retry.
+                        */
+                       unlock_buffer(bh);
+                       mb_cache_entry_wait_unused(oe);
+                       mb_cache_entry_put(ea_block_cache, oe);
+                       goto retry_ref;
+               }
+
+               /* Free the old block. */
+               ea_bdebug(bh, "freeing");
+               ext2_free_blocks(inode, bh->b_blocknr, 1);
+               /* We let our caller release bh, so we
+                * need to duplicate the buffer before. */
+               get_bh(bh);
+               bforget(bh);
+               unlock_buffer(bh);
+       } else {
+               /* Decrement the refcount only. */
+               le32_add_cpu(&HDR(bh)->h_refcount, -1);
+               dquot_free_block(inode, 1);
+               mark_buffer_dirty(bh);
+               unlock_buffer(bh);
+               ea_bdebug(bh, "refcount now=%d",
+                       le32_to_cpu(HDR(bh)->h_refcount));
+               if (IS_SYNC(inode))
+                       sync_dirty_buffer(bh);
+       }
+}
+
  /*
   * Second half of ext2_xattr_set(): Update the file system.
   */
@@ -747,34 +797,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
                  * If there was an old block and we are no longer using it,
                  * release the old block.
                  */
-               lock_buffer(old_bh);
-               if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
-                       __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
-
-                       /*
-                        * This must happen under buffer lock for
-                        * ext2_xattr_set2() to reliably detect freed block
-                        */
-                       mb_cache_entry_delete(ea_block_cache, hash,
-                                             old_bh->b_blocknr);
-                       /* Free the old block. */
-                       ea_bdebug(old_bh, "freeing");
-                       ext2_free_blocks(inode, old_bh->b_blocknr, 1);
-                       mark_inode_dirty(inode);
-                       /* We let our caller release old_bh, so we
-                        * need to duplicate the buffer before. */
-                       get_bh(old_bh);
-                       bforget(old_bh);
-               } else {
-                       /* Decrement the refcount only. */
-                       le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
-                       dquot_free_block_nodirty(inode, 1);
-                       mark_inode_dirty(inode);
-                       mark_buffer_dirty(old_bh);
-                       ea_bdebug(old_bh, "refcount now=%d",
-                               le32_to_cpu(HDR(old_bh)->h_refcount));
-               }
-               unlock_buffer(old_bh);
+               ext2_xattr_release_block(inode, old_bh);
         }
  
  cleanup:
@@ -828,30 +851,7 @@ ext2_xattr_delete_inode(struct inode *inode)
                         EXT2_I(inode)->i_file_acl);
                 goto cleanup;
         }
-       lock_buffer(bh);
-       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-               __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
-
-               /*
-                * This must happen under buffer lock for ext2_xattr_set2() to
-                * reliably detect freed block
-                */
-               mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
-                                     bh->b_blocknr);
-               ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
-               get_bh(bh);
-               bforget(bh);
-               unlock_buffer(bh);
-       } else {
-               le32_add_cpu(&HDR(bh)->h_refcount, -1);
-               ea_bdebug(bh, "refcount now=%d",
-                       le32_to_cpu(HDR(bh)->h_refcount));
-               unlock_buffer(bh);
-               mark_buffer_dirty(bh);
-               if (IS_SYNC(inode))
-                       sync_dirty_buffer(bh);
-               dquot_free_block_nodirty(inode, 1);
-       }
+       ext2_xattr_release_block(inode, bh);
         EXT2_I(inode)->i_file_acl = 0;
  
  cleanup:
@@ -943,7 +943,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
         if (!header->h_hash)
                 return NULL;  /* never share */
         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
+
         ce = mb_cache_entry_find_first(ea_block_cache, hash);
         while (ce) {
                 struct buffer_head *bh;
@@ -955,22 +955,8 @@ again:
                                 inode->i_ino, (unsigned long) ce->e_value);
                 } else {
                         lock_buffer(bh);
-                       /*
-                        * We have to be careful about races with freeing or
-                        * rehashing of xattr block. Once we hold buffer lock
-                        * xattr block's state is stable so we can check
-                        * whether the block got freed / rehashed or not.
-                        * Since we unhash mbcache entry under buffer lock when
-                        * freeing / rehashing xattr block, checking whether
-                        * entry is still hashed is reliable.
-                        */
-                       if (hlist_bl_unhashed(&ce->e_hash_list)) {
-                               mb_cache_entry_put(ea_block_cache, ce);
-                               unlock_buffer(bh);
-                               brelse(bh);
-                               goto again;
-                       } else if (le32_to_cpu(HDR(bh)->h_refcount) >
-                                  EXT2_XATTR_REFCOUNT_MAX) {
+                       if (le32_to_cpu(HDR(bh)->h_refcount) >
+                           EXT2_XATTR_REFCOUNT_MAX) {
                                 ea_idebug(inode, "block %ld refcount %d>%d",
                                           (unsigned long) ce->e_value,
                                           le32_to_cpu(HDR(bh)->h_refcount),
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index 78ee3ef..8ff4b91 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -666,7 +666,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
          * it's possible we've just missed a transaction commit here,
          * so ignore the returned status
          */
-       jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+       ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
         (void) jbd2_journal_force_commit_nested(sbi->s_journal);
         return 1;
  }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 29fc575..9bca556 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -724,6 +724,8 @@ enum {
  #define EXT4_IOC_GETSTATE              _IOW('f', 41, __u32)
  #define EXT4_IOC_GET_ES_CACHE          _IOWR('f', 42, struct fiemap)
  #define EXT4_IOC_CHECKPOINT            _IOW('f', 43, __u32)
+#define EXT4_IOC_GETFSUUID             _IOR('f', 44, struct fsuuid)
+#define EXT4_IOC_SETFSUUID             _IOW('f', 44, struct fsuuid)
  
  #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
  
@@ -753,6 +755,15 @@ enum {
                                                 EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \
                                                 EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
  
+/*
+ * Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID
+ */
+struct fsuuid {
+       __u32       fsu_len;
+       __u32       fsu_flags;
+       __u8        fsu_uuid[];
+};
+
  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
  /*
   * ioctl commands in 32 bit emulation
@@ -3016,7 +3027,7 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
                       struct dentry *dentry, struct fileattr *fa);
  int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa);
  extern void ext4_reset_inode_seed(struct inode *inode);
-int ext4_update_overhead(struct super_block *sb);
+int ext4_update_overhead(struct super_block *sb, bool force);
  
  /* migrate.c */
  extern int ext4_ext_migrate(struct inode *);
@@ -3583,6 +3594,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
  extern int ext4_inline_data_fiemap(struct inode *inode,
                                    struct fiemap_extent_info *fieinfo,
                                    int *has_inline, __u64 start, __u64 len);
+extern void *ext4_read_inline_link(struct inode *inode);
  
  struct iomap;
  extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
@@ -3799,7 +3811,7 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
  extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
  
  extern int ext4_resize_begin(struct super_block *sb);
-extern void ext4_resize_end(struct super_block *sb);
+extern int ext4_resize_end(struct super_block *sb, bool update_backups);
  
  static inline void ext4_set_io_unwritten_flag(struct inode *inode,
                                               struct ext4_io_end *io_end)
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c

index 3477a16..8e1fb18 100644 (file)
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -267,8 +267,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
         trace_ext4_forget(inode, is_metadata, blocknr);
         BUFFER_TRACE(bh, "enter");
  
-       jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
-                 "data mode %x\n",
+       ext4_debug("forgetting bh %p: is_metadata=%d, mode %o, data mode %x\n",
                   bh, is_metadata, inode->i_mode,
                   test_opt(inode->i_sb, DATA_FLAGS));
  
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c

index eb4c8ad..2af962c 100644 (file)
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -917,8 +917,8 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
         mutex_unlock(&ei->i_fc_lock);
  
         cur_lblk_off = old_blk_size;
-       jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
-                 __func__, cur_lblk_off, new_blk_size, inode->i_ino);
+       ext4_debug("will try writing %d to %d for inode %ld\n",
+                  cur_lblk_off, new_blk_size, inode->i_ino);
  
         while (cur_lblk_off <= new_blk_size) {
                 map.m_lblk = cur_lblk_off;
@@ -1168,7 +1168,7 @@ static void ext4_fc_update_stats(struct super_block *sb, int status,
  {
         struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
  
-       jbd_debug(1, "Fast commit ended with status = %d for tid %u",
+       ext4_debug("Fast commit ended with status = %d for tid %u",
                         status, commit_tid);
         if (status == EXT4_FC_STATUS_OK) {
                 stats->fc_num_commits++;
@@ -1375,14 +1375,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
  
         if (IS_ERR(inode)) {
-               jbd_debug(1, "Inode %d not found", darg.ino);
+               ext4_debug("Inode %d not found", darg.ino);
                 return 0;
         }
  
         old_parent = ext4_iget(sb, darg.parent_ino,
                                 EXT4_IGET_NORMAL);
         if (IS_ERR(old_parent)) {
-               jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
+               ext4_debug("Dir with inode %d not found", darg.parent_ino);
                 iput(inode);
                 return 0;
         }
@@ -1407,21 +1407,21 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
  
         dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
         if (IS_ERR(dir)) {
-               jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
+               ext4_debug("Dir with inode %d not found.", darg->parent_ino);
                 dir = NULL;
                 goto out;
         }
  
         dentry_dir = d_obtain_alias(dir);
         if (IS_ERR(dentry_dir)) {
-               jbd_debug(1, "Failed to obtain dentry");
+               ext4_debug("Failed to obtain dentry");
                 dentry_dir = NULL;
                 goto out;
         }
  
         dentry_inode = d_alloc(dentry_dir, &qstr_dname);
         if (!dentry_inode) {
-               jbd_debug(1, "Inode dentry not created.");
+               ext4_debug("Inode dentry not created.");
                 ret = -ENOMEM;
                 goto out;
         }
@@ -1434,7 +1434,7 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
          * could complete.
          */
         if (ret && ret != -EEXIST) {
-               jbd_debug(1, "Failed to link\n");
+               ext4_debug("Failed to link\n");
                 goto out;
         }
  
@@ -1468,7 +1468,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
  
         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
-               jbd_debug(1, "Inode not found.");
+               ext4_debug("Inode not found.");
                 return 0;
         }
  
@@ -1576,7 +1576,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
         /* Given that we just wrote the inode on disk, this SHOULD succeed. */
         inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
-               jbd_debug(1, "Inode not found.");
+               ext4_debug("Inode not found.");
                 return -EFSCORRUPTED;
         }
  
@@ -1630,7 +1630,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
  
         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
-               jbd_debug(1, "inode %d not found.", darg.ino);
+               ext4_debug("inode %d not found.", darg.ino);
                 inode = NULL;
                 ret = -EINVAL;
                 goto out;
@@ -1643,7 +1643,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
                  */
                 dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
                 if (IS_ERR(dir)) {
-                       jbd_debug(1, "Dir %d not found.", darg.ino);
+                       ext4_debug("Dir %d not found.", darg.ino);
                         goto out;
                 }
                 ret = ext4_init_new_dir(NULL, dir, inode);
@@ -1727,7 +1727,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
  
         inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
-               jbd_debug(1, "Inode not found.");
+               ext4_debug("Inode not found.");
                 return 0;
         }
  
@@ -1741,7 +1741,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
  
         cur = start;
         remaining = len;
-       jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
+       ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
                   start, start_pblk, len, ext4_ext_is_unwritten(ex),
                   inode->i_ino);
  
@@ -1802,7 +1802,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
                 }
  
                 /* Range is mapped and needs a state change */
-               jbd_debug(1, "Converting from %ld to %d %lld",
+               ext4_debug("Converting from %ld to %d %lld",
                                 map.m_flags & EXT4_MAP_UNWRITTEN,
                         ext4_ext_is_unwritten(ex), map.m_pblk);
                 ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
@@ -1845,7 +1845,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
  
         inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
         if (IS_ERR(inode)) {
-               jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
+               ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
                 return 0;
         }
  
@@ -1853,7 +1853,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
         if (ret)
                 goto out;
  
-       jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
+       ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
                         inode->i_ino, le32_to_cpu(lrange.fc_lblk),
                         le32_to_cpu(lrange.fc_len));
         while (remaining > 0) {
@@ -1902,7 +1902,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
                 inode = ext4_iget(sb, state->fc_modified_inodes[i],
                         EXT4_IGET_NORMAL);
                 if (IS_ERR(inode)) {
-                       jbd_debug(1, "Inode %d not found.",
+                       ext4_debug("Inode %d not found.",
                                 state->fc_modified_inodes[i]);
                         continue;
                 }
@@ -2031,7 +2031,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
         for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
                 memcpy(&tl, cur, sizeof(tl));
                 val = cur + sizeof(tl);
-               jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
+               ext4_debug("Scan phase, tag:%s, blk %lld\n",
                           tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
                 switch (le16_to_cpu(tl.fc_tag)) {
                 case EXT4_FC_TAG_ADD_RANGE:
@@ -2126,7 +2126,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
                 sbi->s_mount_state |= EXT4_FC_REPLAY;
         }
         if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
-               jbd_debug(1, "Replay stops\n");
+               ext4_debug("Replay stops\n");
                 ext4_fc_set_bitmaps_and_counters(sb);
                 return 0;
         }
@@ -2150,7 +2150,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
                         ext4_fc_set_bitmaps_and_counters(sb);
                         break;
                 }
-               jbd_debug(3, "Replay phase, tag:%s\n",
+               ext4_debug("Replay phase, tag:%s\n",
                                 tag2str(le16_to_cpu(tl.fc_tag)));
                 state->fc_replay_num_tags--;
                 switch (le16_to_cpu(tl.fc_tag)) {
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c

index 07a8c75..860fc51 100644 (file)
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -460,7 +460,7 @@ static int ext4_splice_branch(handle_t *handle,
                  * the new i_size.  But that is not done here - it is done in
                  * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
                  */
-               jbd_debug(5, "splicing indirect only\n");
+               ext4_debug("splicing indirect only\n");
                 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
                 err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
                 if (err)
@@ -472,7 +472,7 @@ static int ext4_splice_branch(handle_t *handle,
                 err = ext4_mark_inode_dirty(handle, ar->inode);
                 if (unlikely(err))
                         goto err_out;
-               jbd_debug(5, "splicing direct\n");
+               ext4_debug("splicing direct\n");
         }
         return err;
  
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c

index cff52ff..a4fbe82 100644 (file)
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -6,6 +6,7 @@
  
  #include <linux/iomap.h>
  #include <linux/fiemap.h>
+#include <linux/namei.h>
  #include <linux/iversion.h>
  #include <linux/sched/mm.h>
  
@@ -35,6 +36,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
         struct ext4_inode *raw_inode;
         int free, min_offs;
  
+       if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return 0;
+
         min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
                         EXT4_GOOD_OLD_INODE_SIZE -
                         EXT4_I(inode)->i_extra_isize -
@@ -1588,6 +1592,35 @@ out:
         return ret;
  }
  
+void *ext4_read_inline_link(struct inode *inode)
+{
+       struct ext4_iloc iloc;
+       int ret, inline_size;
+       void *link;
+
+       ret = ext4_get_inode_loc(inode, &iloc);
+       if (ret)
+               return ERR_PTR(ret);
+
+       ret = -ENOMEM;
+       inline_size = ext4_get_inline_size(inode);
+       link = kmalloc(inline_size + 1, GFP_NOFS);
+       if (!link)
+               goto out;
+
+       ret = ext4_read_inline_data(inode, link, inline_size, &iloc);
+       if (ret < 0) {
+               kfree(link);
+               goto out;
+       }
+       nd_terminate_link(link, inode->i_size, ret);
+out:
+       if (ret < 0)
+               link = ERR_PTR(ret);
+       brelse(iloc.bh);
+       return link;
+}
+
  struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
                                         struct ext4_dir_entry_2 **parent_de,
                                         int *retval)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 9fd60fc..6012144 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
  
         trace_ext4_evict_inode(inode);
  
+       if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+               ext4_evict_ea_inode(inode);
         if (inode->i_nlink) {
                 /*
                  * When journalling data dirty buffers are tracked only in the
@@ -1571,7 +1573,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
                 ext4_lblk_t start, last;
                 start = index << (PAGE_SHIFT - inode->i_blkbits);
                 last = end << (PAGE_SHIFT - inode->i_blkbits);
+
+               /*
+                * avoid racing with extent status tree scans made by
+                * ext4_insert_delayed_block()
+                */
+               down_write(&EXT4_I(inode)->i_data_sem);
                 ext4_es_remove_extent(inode, start, last - start + 1);
+               up_write(&EXT4_I(inode)->i_data_sem);
         }
  
         folio_batch_init(&fbatch);
@@ -3142,13 +3151,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
  {
         struct inode *inode = mapping->host;
         journal_t *journal;
+       sector_t ret = 0;
         int err;
  
+       inode_lock_shared(inode);
         /*
          * We can get here for an inline file via the FIBMAP ioctl
          */
         if (ext4_has_inline_data(inode))
-               return 0;
+               goto out;
  
         if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
                         test_opt(inode->i_sb, DELALLOC)) {
@@ -3187,10 +3198,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
                 jbd2_journal_unlock_updates(journal);
  
                 if (err)
-                       return 0;
+                       goto out;
         }
  
-       return iomap_bmap(mapping, block, &ext4_iomap_ops);
+       ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
+
+out:
+       inode_unlock_shared(inode);
+       return ret;
  }
  
  static int ext4_read_folio(struct file *file, struct folio *folio)
@@ -4687,8 +4702,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
         __le32 *magic = (void *)raw_inode +
                         EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
  
-       if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
-           EXT4_INODE_SIZE(inode->i_sb) &&
+       if (EXT4_INODE_HAS_XATTR_SPACE(inode)  &&
             *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                 return ext4_find_inline_data_nolock(inode);
@@ -5215,7 +5229,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
  
         if (EXT4_SB(inode->i_sb)->s_journal) {
                 if (ext4_journal_current_handle()) {
-                       jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
+                       ext4_debug("called recursively, non-PF_MEMALLOC!\n");
                         dump_stack();
                         return -EIO;
                 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index cb01c1d..3cf3ec4 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -20,6 +20,7 @@
  #include <linux/delay.h>
  #include <linux/iversion.h>
  #include <linux/fileattr.h>
+#include <linux/uuid.h>
  #include "ext4_jbd2.h"
  #include "ext4.h"
  #include <linux/fsmap.h>
@@ -41,6 +42,15 @@ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
         memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
  }
  
+/*
+ * Superblock modification callback function for changing file system
+ * UUID.
+ */
+static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg)
+{
+       memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE);
+}
+
  static
  int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
                            ext4_update_sb_callback func,
@@ -944,7 +954,9 @@ static long ext4_ioctl_group_add(struct file *file,
             test_opt(sb, INIT_INODE_TABLE))
                 err = ext4_register_li_request(sb, input->group);
  group_add_out:
-       ext4_resize_end(sb);
+       err2 = ext4_resize_end(sb, false);
+       if (err == 0)
+               err = err2;
         return err;
  }
  
@@ -1131,6 +1143,73 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
         return 0;
  }
  
+static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi,
+                       struct fsuuid __user *ufsuuid)
+{
+       struct fsuuid fsuuid;
+       __u8 uuid[UUID_SIZE];
+
+       if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid)))
+               return -EFAULT;
+
+       if (fsuuid.fsu_len == 0) {
+               fsuuid.fsu_len = UUID_SIZE;
+               if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid.fsu_len)))
+                       return -EFAULT;
+               return -EINVAL;
+       }
+
+       if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0)
+               return -EINVAL;
+
+       lock_buffer(sbi->s_sbh);
+       memcpy(uuid, sbi->s_es->s_uuid, UUID_SIZE);
+       unlock_buffer(sbi->s_sbh);
+
+       if (copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE))
+               return -EFAULT;
+       return 0;
+}
+
+static int ext4_ioctl_setuuid(struct file *filp,
+                       const struct fsuuid __user *ufsuuid)
+{
+       int ret = 0;
+       struct super_block *sb = file_inode(filp)->i_sb;
+       struct fsuuid fsuuid;
+       __u8 uuid[UUID_SIZE];
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /*
+        * If any checksums (group descriptors or metadata) are being used
+        * then the checksum seed feature is required to change the UUID.
+        */
+       if (((ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb))
+                       && !ext4_has_feature_csum_seed(sb))
+               || ext4_has_feature_stable_inodes(sb))
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid)))
+               return -EFAULT;
+
+       if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0)
+               return -EINVAL;
+
+       if (copy_from_user(uuid, &ufsuuid->fsu_uuid[0], UUID_SIZE))
+               return -EFAULT;
+
+       ret = mnt_want_write_file(filp);
+       if (ret)
+               return ret;
+
+       ret = ext4_update_superblocks_fn(sb, ext4_sb_setuuid, &uuid);
+       mnt_drop_write_file(filp);
+
+       return ret;
+}
+
  static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  {
         struct inode *inode = file_inode(filp);
@@ -1223,7 +1302,9 @@ setversion_out:
                         err = err2;
                 mnt_drop_write_file(filp);
  group_extend_out:
-               ext4_resize_end(sb);
+               err2 = ext4_resize_end(sb, false);
+               if (err == 0)
+                       err = err2;
                 return err;
         }
  
@@ -1371,7 +1452,9 @@ mext_out:
                         err = ext4_register_li_request(sb, o_group);
  
  resizefs_out:
-               ext4_resize_end(sb);
+               err2 = ext4_resize_end(sb, true);
+               if (err == 0)
+                       err = err2;
                 return err;
         }
  
@@ -1509,6 +1592,10 @@ resizefs_out:
                 return ext4_ioctl_setlabel(filp,
                                            (const void __user *)arg);
  
+       case EXT4_IOC_GETFSUUID:
+               return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg);
+       case EXT4_IOC_SETFSUUID:
+               return ext4_ioctl_setuuid(filp, (const void __user *)arg);
         default:
                 return -ENOTTY;
         }
@@ -1586,6 +1673,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         case EXT4_IOC_CHECKPOINT:
         case FS_IOC_GETFSLABEL:
         case FS_IOC_SETFSLABEL:
+       case EXT4_IOC_GETFSUUID:
+       case EXT4_IOC_SETFSUUID:
                 break;
         default:
                 return -ENOIOCTLCMD;
@@ -1599,13 +1688,15 @@ static void set_overhead(struct ext4_super_block *es, const void *arg)
         es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
  }
  
-int ext4_update_overhead(struct super_block *sb)
+int ext4_update_overhead(struct super_block *sb, bool force)
  {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       if (sb_rdonly(sb) || sbi->s_overhead == 0 ||
-           sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))
+       if (sb_rdonly(sb))
+               return 0;
+       if (!force &&
+           (sbi->s_overhead == 0 ||
+            sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters)))
                 return 0;
-
         return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead);
  }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index 9e06334..bd8f8b5 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1933,6 +1933,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
         unsigned ret = 0;
         int len0 = len;
         void *buddy;
+       bool split = false;
  
         BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
         BUG_ON(e4b->bd_group != ex->fe_group);
@@ -1957,12 +1958,16 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
  
         /* let's maintain buddy itself */
         while (len) {
-               ord = mb_find_order_for_block(e4b, start);
+               if (!split)
+                       ord = mb_find_order_for_block(e4b, start);
  
                 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
                         /* the whole chunk may be allocated at once! */
                         mlen = 1 << ord;
-                       buddy = mb_find_buddy(e4b, ord, &max);
+                       if (!split)
+                               buddy = mb_find_buddy(e4b, ord, &max);
+                       else
+                               split = false;
                         BUG_ON((start >> ord) >= max);
                         mb_set_bit(start >> ord, buddy);
                         e4b->bd_info->bb_counters[ord]--;
@@ -1989,6 +1994,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
                 mb_clear_bit(cur + 1, buddy);
                 e4b->bd_info->bb_counters[ord]++;
                 e4b->bd_info->bb_counters[ord]++;
+               split = true;
         }
         mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
  
@@ -5928,6 +5934,15 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
  
         sbi = EXT4_SB(sb);
  
+       if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+           !ext4_inode_block_valid(inode, block, count)) {
+               ext4_error(sb, "Freeing blocks in system zone - "
+                          "Block = %llu, count = %lu", block, count);
+               /* err = 0. ext4_std_error should be a no op */
+               goto error_return;
+       }
+       flags |= EXT4_FREE_BLOCKS_VALIDATED;
+
  do_more:
         overflow = 0;
         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -5944,6 +5959,8 @@ do_more:
                 overflow = EXT4_C2B(sbi, bit) + count -
                         EXT4_BLOCKS_PER_GROUP(sb);
                 count -= overflow;
+               /* The range changed so it's no longer validated */
+               flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
         }
         count_clusters = EXT4_NUM_B2C(sbi, count);
         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
@@ -5958,7 +5975,8 @@ do_more:
                 goto error_return;
         }
  
-       if (!ext4_inode_block_valid(inode, block, count)) {
+       if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+           !ext4_inode_block_valid(inode, block, count)) {
                 ext4_error(sb, "Freeing blocks in system zone - "
                            "Block = %llu, count = %lu", block, count);
                 /* err = 0. ext4_std_error should be a no op */
@@ -6081,6 +6099,8 @@ do_more:
                 block += count;
                 count = overflow;
                 put_bh(bitmap_bh);
+               /* The range changed so it's no longer validated */
+               flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
                 goto do_more;
         }
  error_return:
@@ -6127,6 +6147,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                            "block = %llu, count = %lu", block, count);
                 return;
         }
+       flags |= EXT4_FREE_BLOCKS_VALIDATED;
  
         ext4_debug("freeing block %llu\n", block);
         trace_ext4_free_blocks(inode, block, count, flags);
@@ -6158,6 +6179,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                         block -= overflow;
                         count += overflow;
                 }
+               /* The range changed so it's no longer validated */
+               flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
         }
         overflow = EXT4_LBLK_COFF(sbi, count);
         if (overflow) {
@@ -6168,6 +6191,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                                 return;
                 } else
                         count += sbi->s_cluster_ratio - overflow;
+               /* The range changed so it's no longer validated */
+               flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
         }
  
         if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c

index 42f5905..54e7d3c 100644 (file)
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -417,7 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
         struct inode *tmp_inode = NULL;
         struct migrate_struct lb;
         unsigned long max_entries;
-       __u32 goal;
+       __u32 goal, tmp_csum_seed;
         uid_t owner[2];
  
         /*
@@ -465,6 +465,7 @@ int ext4_ext_migrate(struct inode *inode)
          * the migration.
          */
         ei = EXT4_I(inode);
+       tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed;
         EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
         i_size_write(tmp_inode, i_size_read(inode));
         /*
@@ -575,6 +576,7 @@ err_out:
          * the inode is not visible to user space.
          */
         tmp_inode->i_blocks = 0;
+       EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed;
  
         /* Reset the extent details */
         ext4_ext_tree_init(handle, tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index db4ba99..3a31b66 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
                                         struct inode *inode,
                                         ext4_lblk_t *block)
  {
+       struct ext4_map_blocks map;
         struct buffer_head *bh;
         int err;
  
@@ -63,6 +64,21 @@ static struct buffer_head *ext4_append(handle_t *handle,
                 return ERR_PTR(-ENOSPC);
  
         *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+       map.m_lblk = *block;
+       map.m_len = 1;
+
+       /*
+        * We're appending new directory block. Make sure the block is not
+        * allocated yet, otherwise we will end up corrupting the
+        * directory.
+        */
+       err = ext4_map_blocks(NULL, inode, &map, 0);
+       if (err < 0)
+               return ERR_PTR(err);
+       if (err) {
+               EXT4_ERROR_INODE(inode, "Logical block already allocated");
+               return ERR_PTR(-EFSCORRUPTED);
+       }
  
         bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
         if (IS_ERR(bh))
@@ -110,6 +126,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
         struct ext4_dir_entry *dirent;
         int is_dx_block = 0;
  
+       if (block >= inode->i_size) {
+               ext4_error_inode(inode, func, line, block,
+                      "Attempting to read directory block (%u) that is past i_size (%llu)",
+                      block, inode->i_size);
+               return ERR_PTR(-EFSCORRUPTED);
+       }
+
         if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
                 bh = ERR_PTR(-EIO);
         else
@@ -3067,11 +3090,8 @@ bool ext4_empty_dir(struct inode *inode)
                 de = (struct ext4_dir_entry_2 *) (bh->b_data +
                                         (offset & (sb->s_blocksize - 1)));
                 if (ext4_check_dir_entry(inode, NULL, de, bh,
-                                        bh->b_data, bh->b_size, offset)) {
-                       offset = (offset | (sb->s_blocksize - 1)) + 1;
-                       continue;
-               }
-               if (le32_to_cpu(de->inode)) {
+                                        bh->b_data, bh->b_size, offset) ||
+                   le32_to_cpu(de->inode)) {
                         brelse(bh);
                         return false;
                 }
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c

index 7de0612..69a9cf9 100644 (file)
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -181,8 +181,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
         } else
                 brelse(iloc.bh);
  
-       jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
-       jbd_debug(4, "orphan inode %lu will point to %d\n",
+       ext4_debug("superblock will point to %lu\n", inode->i_ino);
+       ext4_debug("orphan inode %lu will point to %d\n",
                         inode->i_ino, NEXT_ORPHAN(inode));
  out:
         ext4_std_error(sb, err);
@@ -251,7 +251,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
         }
  
         mutex_lock(&sbi->s_orphan_lock);
-       jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
+       ext4_debug("remove inode %lu from orphan list\n", inode->i_ino);
  
         prev = ei->i_orphan.prev;
         list_del_init(&ei->i_orphan);
@@ -267,7 +267,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
  
         ino_next = NEXT_ORPHAN(inode);
         if (prev == &sbi->s_orphan) {
-               jbd_debug(4, "superblock will point to %u\n", ino_next);
+               ext4_debug("superblock will point to %u\n", ino_next);
                 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
                 err = ext4_journal_get_write_access(handle, inode->i_sb,
                                                     sbi->s_sbh, EXT4_JTR_NONE);
@@ -286,7 +286,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                 struct inode *i_prev =
                         &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
  
-               jbd_debug(4, "orphan inode %lu will point to %u\n",
+               ext4_debug("orphan inode %lu will point to %u\n",
                           i_prev->i_ino, ino_next);
                 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
                 if (err) {
@@ -332,8 +332,8 @@ static void ext4_process_orphan(struct inode *inode,
                         ext4_msg(sb, KERN_DEBUG,
                                 "%s: truncating inode %lu to %lld bytes",
                                 __func__, inode->i_ino, inode->i_size);
-               jbd_debug(2, "truncating inode %lu to %lld bytes\n",
-                         inode->i_ino, inode->i_size);
+               ext4_debug("truncating inode %lu to %lld bytes\n",
+                          inode->i_ino, inode->i_size);
                 inode_lock(inode);
                 truncate_inode_pages(inode->i_mapping, inode->i_size);
                 ret = ext4_truncate(inode);
@@ -353,8 +353,8 @@ static void ext4_process_orphan(struct inode *inode,
                         ext4_msg(sb, KERN_DEBUG,
                                 "%s: deleting unreferenced inode %lu",
                                 __func__, inode->i_ino);
-               jbd_debug(2, "deleting unreferenced inode %lu\n",
-                         inode->i_ino);
+               ext4_debug("deleting unreferenced inode %lu\n",
+                          inode->i_ino);
                 (*nr_orphans)++;
         }
         iput(inode);  /* The delete magic happens here! */
@@ -391,7 +391,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
         int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
  
         if (!es->s_last_orphan && !oi->of_blocks) {
-               jbd_debug(4, "no orphan inodes to clean up\n");
+               ext4_debug("no orphan inodes to clean up\n");
                 return;
         }
  
@@ -415,7 +415,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
                                   "clearing orphan list.\n");
                         es->s_last_orphan = 0;
                 }
-               jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+               ext4_debug("Skipping orphan recovery on fs with errors.\n");
                 return;
         }
  
@@ -459,7 +459,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
                  * so, skip the rest.
                  */
                 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
-                       jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+                       ext4_debug("Skipping orphan recovery on fs with errors.\n");
                         es->s_last_orphan = 0;
                         break;
                 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index 8b70a47..fea2a68 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -97,10 +97,13 @@ int ext4_resize_begin(struct super_block *sb)
         return ret;
  }
  
-void ext4_resize_end(struct super_block *sb)
+int ext4_resize_end(struct super_block *sb, bool update_backups)
  {
         clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
         smp_mb__after_atomic();
+       if (update_backups)
+               return ext4_update_overhead(sb, true);
+       return 0;
  }
  
  static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
@@ -1380,6 +1383,17 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
         return err;
  }
  
+static void ext4_add_overhead(struct super_block *sb,
+                              const ext4_fsblk_t overhead)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+
+       sbi->s_overhead += overhead;
+       es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
+       smp_wmb();
+}
+
  /*
   * ext4_update_super() updates the super block so that the newly added
   * groups can be seen by the filesystem.
@@ -1481,9 +1495,18 @@ static void ext4_update_super(struct super_block *sb,
         }
  
         /*
-        * Update the fs overhead information
+        * Update the fs overhead information.
+        *
+        * For bigalloc, if the superblock already has a properly calculated
+        * overhead, update it with a value based on numbers already computed
+        * above for the newly allocated capacity.
          */
-       ext4_calculate_overhead(sb);
+       if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0))
+               ext4_add_overhead(sb,
+                       EXT4_NUM_B2C(sbi, blocks_count - free_blocks));
+       else
+               ext4_calculate_overhead(sb);
+       es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
  
         if (test_opt(sb, DEBUG))
                 printk(KERN_DEBUG "EXT4-fs: added group %u:"
@@ -1988,6 +2011,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
         }
         brelse(bh);
  
+       /*
+        * For bigalloc, trim the requested size to the nearest cluster
+        * boundary to avoid creating an unusable filesystem. We do this
+        * silently, instead of returning an error, to avoid breaking
+        * callers that blindly resize the filesystem to the full size of
+        * the underlying block device.
+        */
+       if (ext4_has_feature_bigalloc(sb))
+               n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1);
+
  retry:
         o_blocks_count = ext4_blocks_count(es);
  
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 2c68dec..8f907e9 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3011,6 +3011,15 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
         } else if (test_opt2(sb, DAX_INODE)) {
                 SEQ_OPTS_PUTS("dax=inode");
         }
+
+       if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+                       !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+               SEQ_OPTS_PUTS("mb_optimize_scan=0");
+       } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+                       test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+               SEQ_OPTS_PUTS("mb_optimize_scan=1");
+       }
+
         ext4_show_quota_options(seq, sb);
         return 0;
  }
@@ -5523,7 +5532,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
                          "Quota mode: %s.", descr, ext4_quota_mode(sb));
  
         /* Update the s_overhead_clusters if necessary */
-       ext4_update_overhead(sb);
+       ext4_update_overhead(sb, false);
         return 0;
  
  free_sbi:
@@ -5585,7 +5594,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
                 return NULL;
         }
  
-       jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
+       ext4_debug("Journal inode found at %p: %lld bytes\n",
                   journal_inode, journal_inode->i_size);
         if (!S_ISREG(journal_inode->i_mode)) {
                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index d281f5b..3d3ed3c 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -74,6 +74,21 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
                                  struct delayed_call *callback)
  {
         struct buffer_head *bh;
+       char *inline_link;
+
+       /*
+        * Create a new inlined symlink is not supported, just provide a
+        * method to read the leftovers.
+        */
+       if (ext4_has_inline_data(inode)) {
+               if (!dentry)
+                       return ERR_PTR(-ECHILD);
+
+               inline_link = ext4_read_inline_link(inode);
+               if (!IS_ERR(inline_link))
+                       set_delayed_call(callback, kfree_link, inline_link);
+               return inline_link;
+       }
  
         if (!dentry) {
                 bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 564e28a..533216e 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -436,6 +436,21 @@ error:
         return err;
  }
  
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+       struct mb_cache_entry *oe;
+
+       if (!EA_INODE_CACHE(inode))
+               return;
+       /* Wait for entry to get unused so that we can remove it */
+       while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+                       ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+               mb_cache_entry_wait_unused(oe);
+               mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+       }
+}
+
  static int
  ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
                                struct ext4_xattr_entry *entry, void *buffer,
@@ -976,10 +991,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
  static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
                                        int ref_change)
  {
-       struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
         struct ext4_iloc iloc;
         s64 ref_count;
-       u32 hash;
         int ret;
  
         inode_lock(ea_inode);
@@ -1002,14 +1015,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
  
                         set_nlink(ea_inode, 1);
                         ext4_orphan_del(handle, ea_inode);
-
-                       if (ea_inode_cache) {
-                               hash = ext4_xattr_inode_get_hash(ea_inode);
-                               mb_cache_entry_create(ea_inode_cache,
-                                                     GFP_NOFS, hash,
-                                                     ea_inode->i_ino,
-                                                     true /* reusable */);
-                       }
                 }
         } else {
                 WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1022,12 +1027,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
  
                         clear_nlink(ea_inode);
                         ext4_orphan_add(handle, ea_inode);
-
-                       if (ea_inode_cache) {
-                               hash = ext4_xattr_inode_get_hash(ea_inode);
-                               mb_cache_entry_delete(ea_inode_cache, hash,
-                                                     ea_inode->i_ino);
-                       }
                 }
         }
  
@@ -1237,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
         if (error)
                 goto out;
  
+retry_ref:
         lock_buffer(bh);
         hash = le32_to_cpu(BHDR(bh)->h_hash);
         ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1246,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                  * This must happen under buffer lock for
                  * ext4_xattr_block_set() to reliably detect freed block
                  */
-               if (ea_block_cache)
-                       mb_cache_entry_delete(ea_block_cache, hash,
-                                             bh->b_blocknr);
+               if (ea_block_cache) {
+                       struct mb_cache_entry *oe;
+
+                       oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+                                                         bh->b_blocknr);
+                       if (oe) {
+                               unlock_buffer(bh);
+                               mb_cache_entry_wait_unused(oe);
+                               mb_cache_entry_put(ea_block_cache, oe);
+                               goto retry_ref;
+                       }
+               }
                 get_bh(bh);
                 unlock_buffer(bh);
  
@@ -1858,6 +1867,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
  #define header(x) ((struct ext4_xattr_header *)(x))
  
         if (s->base) {
+               int offset = (char *)s->here - bs->bh->b_data;
+
                 BUFFER_TRACE(bs->bh, "get_write_access");
                 error = ext4_journal_get_write_access(handle, sb, bs->bh,
                                                       EXT4_JTR_NONE);
@@ -1873,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                          * ext4_xattr_block_set() to reliably detect modified
                          * block
                          */
-                       if (ea_block_cache)
-                               mb_cache_entry_delete(ea_block_cache, hash,
-                                                     bs->bh->b_blocknr);
+                       if (ea_block_cache) {
+                               struct mb_cache_entry *oe;
+
+                               oe = mb_cache_entry_delete_or_get(ea_block_cache,
+                                       hash, bs->bh->b_blocknr);
+                               if (oe) {
+                                       /*
+                                        * Xattr block is getting reused. Leave
+                                        * it alone.
+                                        */
+                                       mb_cache_entry_put(ea_block_cache, oe);
+                                       goto clone_block;
+                               }
+                       }
                         ea_bdebug(bs->bh, "modifying in-place");
                         error = ext4_xattr_set_entry(i, s, handle, inode,
                                                      true /* is_block */);
@@ -1890,49 +1912,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                         if (error)
                                 goto cleanup;
                         goto inserted;
-               } else {
-                       int offset = (char *)s->here - bs->bh->b_data;
+               }
+clone_block:
+               unlock_buffer(bs->bh);
+               ea_bdebug(bs->bh, "cloning");
+               s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+               error = -ENOMEM;
+               if (s->base == NULL)
+                       goto cleanup;
+               s->first = ENTRY(header(s->base)+1);
+               header(s->base)->h_refcount = cpu_to_le32(1);
+               s->here = ENTRY(s->base + offset);
+               s->end = s->base + bs->bh->b_size;
  
-                       unlock_buffer(bs->bh);
-                       ea_bdebug(bs->bh, "cloning");
-                       s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
-                       error = -ENOMEM;
-                       if (s->base == NULL)
+               /*
+                * If existing entry points to an xattr inode, we need
+                * to prevent ext4_xattr_set_entry() from decrementing
+                * ref count on it because the reference belongs to the
+                * original block. In this case, make the entry look
+                * like it has an empty value.
+                */
+               if (!s->not_found && s->here->e_value_inum) {
+                       ea_ino = le32_to_cpu(s->here->e_value_inum);
+                       error = ext4_xattr_inode_iget(inode, ea_ino,
+                                     le32_to_cpu(s->here->e_hash),
+                                     &tmp_inode);
+                       if (error)
                                 goto cleanup;
-                       s->first = ENTRY(header(s->base)+1);
-                       header(s->base)->h_refcount = cpu_to_le32(1);
-                       s->here = ENTRY(s->base + offset);
-                       s->end = s->base + bs->bh->b_size;
-
-                       /*
-                        * If existing entry points to an xattr inode, we need
-                        * to prevent ext4_xattr_set_entry() from decrementing
-                        * ref count on it because the reference belongs to the
-                        * original block. In this case, make the entry look
-                        * like it has an empty value.
-                        */
-                       if (!s->not_found && s->here->e_value_inum) {
-                               ea_ino = le32_to_cpu(s->here->e_value_inum);
-                               error = ext4_xattr_inode_iget(inode, ea_ino,
-                                             le32_to_cpu(s->here->e_hash),
-                                             &tmp_inode);
-                               if (error)
-                                       goto cleanup;
-
-                               if (!ext4_test_inode_state(tmp_inode,
-                                               EXT4_STATE_LUSTRE_EA_INODE)) {
-                                       /*
-                                        * Defer quota free call for previous
-                                        * inode until success is guaranteed.
-                                        */
-                                       old_ea_inode_quota = le32_to_cpu(
-                                                       s->here->e_value_size);
-                               }
-                               iput(tmp_inode);
  
-                               s->here->e_value_inum = 0;
-                               s->here->e_value_size = 0;
+                       if (!ext4_test_inode_state(tmp_inode,
+                                       EXT4_STATE_LUSTRE_EA_INODE)) {
+                               /*
+                                * Defer quota free call for previous
+                                * inode until success is guaranteed.
+                                */
+                               old_ea_inode_quota = le32_to_cpu(
+                                               s->here->e_value_size);
                         }
+                       iput(tmp_inode);
+
+                       s->here->e_value_inum = 0;
+                       s->here->e_value_size = 0;
                 }
         } else {
                 /* Allocate a buffer where we construct the new block. */
@@ -1999,18 +2019,13 @@ inserted:
                                 lock_buffer(new_bh);
                                 /*
                                  * We have to be careful about races with
-                                * freeing, rehashing or adding references to
-                                * xattr block. Once we hold buffer lock xattr
-                                * block's state is stable so we can check
-                                * whether the block got freed / rehashed or
-                                * not.  Since we unhash mbcache entry under
-                                * buffer lock when freeing / rehashing xattr
-                                * block, checking whether entry is still
-                                * hashed is reliable. Same rules hold for
-                                * e_reusable handling.
+                                * adding references to xattr block. Once we
+                                * hold buffer lock xattr block's state is
+                                * stable so we can check the additional
+                                * reference fits.
                                  */
-                               if (hlist_bl_unhashed(&ce->e_hash_list) ||
-                                   !ce->e_reusable) {
+                               ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+                               if (ref > EXT4_XATTR_REFCOUNT_MAX) {
                                         /*
                                          * Undo everything and check mbcache
                                          * again.
@@ -2025,9 +2040,8 @@ inserted:
                                         new_bh = NULL;
                                         goto inserted;
                                 }
-                               ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
                                 BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
-                               if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+                               if (ref == EXT4_XATTR_REFCOUNT_MAX)
                                         ce->e_reusable = 0;
                                 ea_bdebug(new_bh, "reusing; refcount now=%d",
                                           ref);
@@ -2175,8 +2189,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
         struct ext4_inode *raw_inode;
         int error;
  
-       if (EXT4_I(inode)->i_extra_isize == 0)
+       if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
                 return 0;
+
         raw_inode = ext4_raw_inode(&is->iloc);
         header = IHDR(inode, raw_inode);
         is->s.base = is->s.first = IFIRST(header);
@@ -2204,8 +2219,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
         struct ext4_xattr_search *s = &is->s;
         int error;
  
-       if (EXT4_I(inode)->i_extra_isize == 0)
+       if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
                 return -ENOSPC;
+
         error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
         if (error)
                 return error;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h

index 77efb9a..824faf0 100644 (file)
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -84,7 +84,7 @@ struct ext4_xattr_entry {
  /*
   * The minimum size of EA value when you start storing it in an external inode
   * size of block - size of header - size of 1 entry - 4 null bytes
-*/
+ */
  #define EXT4_XATTR_MIN_LARGE_EA_SIZE(b)                                        \
         ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
  
@@ -95,6 +95,19 @@ struct ext4_xattr_entry {
  
  #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
  
+/*
+ * If we want to add an xattr to the inode, we should make sure that
+ * i_extra_isize is not 0 and that the inode size is not less than
+ * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad.
+ *   EXT4_GOOD_OLD_INODE_SIZE   extra_isize header   entry   pad  data
+ * |--------------------------|------------|------|---------|---|-------|
+ */
+#define EXT4_INODE_HAS_XATTR_SPACE(inode)                              \
+       ((EXT4_I(inode)->i_extra_isize != 0) &&                         \
+        (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize +     \
+         sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <=    \
+         EXT4_INODE_SIZE((inode)->i_sb)))
+
  struct ext4_xattr_info {
         const char *name;
         const void *value;
@@ -178,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
  
  extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
                             struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
  
  extern const struct xattr_handler *ext4_xattr_handlers[];
  
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 7461329..51bd38d 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -203,7 +203,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
         tid_t                   this_tid;
         int                     result, batch_count = 0;
  
-       jbd_debug(1, "Start checkpoint\n");
+       jbd2_debug(1, "Start checkpoint\n");
  
         /*
          * First thing: if there are any transactions in the log which
@@ -212,7 +212,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
          */
         result = jbd2_cleanup_journal_tail(journal);
         trace_jbd2_checkpoint(journal, result);
-       jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
+       jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
         if (result <= 0)
                 return result;
  
@@ -804,5 +804,5 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
  
         trace_jbd2_drop_transaction(journal, transaction);
  
-       jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
+       jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
  }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index 890b554..b2b2bc9 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -421,7 +421,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
  
         /* Do we need to erase the effects of a prior jbd2_journal_flush? */
         if (journal->j_flags & JBD2_FLUSHED) {
-               jbd_debug(3, "super block updated\n");
+               jbd2_debug(3, "super block updated\n");
                 mutex_lock_io(&journal->j_checkpoint_mutex);
                 /*
                  * We hold j_checkpoint_mutex so tail cannot change under us.
@@ -435,7 +435,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                                                 REQ_SYNC);
                 mutex_unlock(&journal->j_checkpoint_mutex);
         } else {
-               jbd_debug(3, "superblock not updated\n");
+               jbd2_debug(3, "superblock not updated\n");
         }
  
         J_ASSERT(journal->j_running_transaction != NULL);
@@ -467,7 +467,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         commit_transaction = journal->j_running_transaction;
  
         trace_jbd2_start_commit(journal, commit_transaction);
-       jbd_debug(1, "JBD2: starting commit of transaction %d\n",
+       jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
                         commit_transaction->t_tid);
  
         write_lock(&journal->j_state_lock);
@@ -540,7 +540,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         __jbd2_journal_clean_checkpoint_list(journal, false);
         spin_unlock(&journal->j_list_lock);
  
-       jbd_debug(3, "JBD2: commit phase 1\n");
+       jbd2_debug(3, "JBD2: commit phase 1\n");
  
         /*
          * Clear revoked flag to reflect there is no revoked buffers
@@ -553,13 +553,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
          */
         jbd2_journal_switch_revoke_table(journal);
  
+       write_lock(&journal->j_state_lock);
         /*
          * Reserved credits cannot be claimed anymore, free them
          */
         atomic_sub(atomic_read(&journal->j_reserved_credits),
                    &commit_transaction->t_outstanding_credits);
  
-       write_lock(&journal->j_state_lock);
         trace_jbd2_commit_flushing(journal, commit_transaction);
         stats.run.rs_flushing = jiffies;
         stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
@@ -573,7 +573,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         wake_up(&journal->j_wait_transaction_locked);
         write_unlock(&journal->j_state_lock);
  
-       jbd_debug(3, "JBD2: commit phase 2a\n");
+       jbd2_debug(3, "JBD2: commit phase 2a\n");
  
         /*
          * Now start flushing things to disk, in the order they appear
@@ -586,7 +586,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         blk_start_plug(&plug);
         jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
  
-       jbd_debug(3, "JBD2: commit phase 2b\n");
+       jbd2_debug(3, "JBD2: commit phase 2b\n");
  
         /*
          * Way to go: we have now written out all of the data for a
@@ -642,7 +642,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                 if (!descriptor) {
                         J_ASSERT (bufs == 0);
  
-                       jbd_debug(4, "JBD2: get descriptor\n");
+                       jbd2_debug(4, "JBD2: get descriptor\n");
  
                         descriptor = jbd2_journal_get_descriptor_buffer(
                                                         commit_transaction,
@@ -652,7 +652,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                                 continue;
                         }
  
-                       jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
+                       jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
                                 (unsigned long long)descriptor->b_blocknr,
                                 descriptor->b_data);
                         tagp = &descriptor->b_data[sizeof(journal_header_t)];
@@ -737,7 +737,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                     commit_transaction->t_buffers == NULL ||
                     space_left < tag_bytes + 16 + csum_size) {
  
-                       jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
+                       jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
  
                         /* Write an end-of-descriptor marker before
                             submitting the IOs.  "tag" still points to
@@ -839,7 +839,7 @@ start_journal_io:
            so we incur less scheduling load.
         */
  
-       jbd_debug(3, "JBD2: commit phase 3\n");
+       jbd2_debug(3, "JBD2: commit phase 3\n");
  
         while (!list_empty(&io_bufs)) {
                 struct buffer_head *bh = list_entry(io_bufs.prev,
@@ -882,7 +882,7 @@ start_journal_io:
  
         J_ASSERT (commit_transaction->t_shadow_list == NULL);
  
-       jbd_debug(3, "JBD2: commit phase 4\n");
+       jbd2_debug(3, "JBD2: commit phase 4\n");
  
         /* Here we wait for the revoke record and descriptor record buffers */
         while (!list_empty(&log_bufs)) {
@@ -906,7 +906,7 @@ start_journal_io:
         if (err)
                 jbd2_journal_abort(journal, err);
  
-       jbd_debug(3, "JBD2: commit phase 5\n");
+       jbd2_debug(3, "JBD2: commit phase 5\n");
         write_lock(&journal->j_state_lock);
         J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
         commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -945,7 +945,7 @@ start_journal_io:
             transaction can be removed from any checkpoint list it was on
             before. */
  
-       jbd_debug(3, "JBD2: commit phase 6\n");
+       jbd2_debug(3, "JBD2: commit phase 6\n");
  
         J_ASSERT(list_empty(&commit_transaction->t_inode_list));
         J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -1122,7 +1122,7 @@ restart_loop:
  
         /* Done with this transaction! */
  
-       jbd_debug(3, "JBD2: commit phase 7\n");
+       jbd2_debug(3, "JBD2: commit phase 7\n");
  
         J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
  
@@ -1164,7 +1164,7 @@ restart_loop:
                 journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
  
         trace_jbd2_end_commit(journal, commit_transaction);
-       jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+       jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
                   journal->j_commit_sequence, journal->j_tail_sequence);
  
         write_lock(&journal->j_state_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index 2a1b9da..b083961 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -49,8 +49,7 @@
  #include <asm/page.h>
  
  #ifdef CONFIG_JBD2_DEBUG
-ushort jbd2_journal_enable_debug __read_mostly;
-EXPORT_SYMBOL(jbd2_journal_enable_debug);
+static ushort jbd2_journal_enable_debug __read_mostly;
  
  module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
  MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
@@ -81,7 +80,6 @@ EXPORT_SYMBOL(jbd2_journal_errno);
  EXPORT_SYMBOL(jbd2_journal_ack_err);
  EXPORT_SYMBOL(jbd2_journal_clear_err);
  EXPORT_SYMBOL(jbd2_log_wait_commit);
-EXPORT_SYMBOL(jbd2_log_start_commit);
  EXPORT_SYMBOL(jbd2_journal_start_commit);
  EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
  EXPORT_SYMBOL(jbd2_journal_wipe);
@@ -115,7 +113,6 @@ void __jbd2_debug(int level, const char *file, const char *func,
         printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
         va_end(args);
  }
-EXPORT_SYMBOL(__jbd2_debug);
  #endif
  
  /* Checksumming functions */
@@ -203,11 +200,11 @@ loop:
         if (journal->j_flags & JBD2_UNMOUNT)
                 goto end_loop;
  
-       jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
+       jbd2_debug(1, "commit_sequence=%u, commit_request=%u\n",
                 journal->j_commit_sequence, journal->j_commit_request);
  
         if (journal->j_commit_sequence != journal->j_commit_request) {
-               jbd_debug(1, "OK, requests differ\n");
+               jbd2_debug(1, "OK, requests differ\n");
                 write_unlock(&journal->j_state_lock);
                 del_timer_sync(&journal->j_commit_timer);
                 jbd2_journal_commit_transaction(journal);
@@ -222,7 +219,7 @@ loop:
                  * good idea, because that depends on threads that may
                  * be already stopped.
                  */
-               jbd_debug(1, "Now suspending kjournald2\n");
+               jbd2_debug(1, "Now suspending kjournald2\n");
                 write_unlock(&journal->j_state_lock);
                 try_to_freeze();
                 write_lock(&journal->j_state_lock);
@@ -252,7 +249,7 @@ loop:
                 finish_wait(&journal->j_wait_commit, &wait);
         }
  
-       jbd_debug(1, "kjournald2 wakes\n");
+       jbd2_debug(1, "kjournald2 wakes\n");
  
         /*
          * Were we woken up by a commit wakeup event?
@@ -260,7 +257,7 @@ loop:
         transaction = journal->j_running_transaction;
         if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
                 journal->j_commit_request = transaction->t_tid;
-               jbd_debug(1, "woke because of timeout\n");
+               jbd2_debug(1, "woke because of timeout\n");
         }
         goto loop;
  
@@ -268,7 +265,7 @@ end_loop:
         del_timer_sync(&journal->j_commit_timer);
         journal->j_task = NULL;
         wake_up(&journal->j_wait_done_commit);
-       jbd_debug(1, "Journal thread exiting.\n");
+       jbd2_debug(1, "Journal thread exiting.\n");
         write_unlock(&journal->j_state_lock);
         return 0;
  }
@@ -481,7 +478,7 @@ repeat:
   * Called with j_state_lock locked for writing.
   * Returns true if a transaction commit was started.
   */
-int __jbd2_log_start_commit(journal_t *journal, tid_t target)
+static int __jbd2_log_start_commit(journal_t *journal, tid_t target)
  {
         /* Return if the txn has already requested to be committed */
         if (journal->j_commit_request == target)
@@ -500,7 +497,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
                  */
  
                 journal->j_commit_request = target;
-               jbd_debug(1, "JBD2: requesting commit %u/%u\n",
+               jbd2_debug(1, "JBD2: requesting commit %u/%u\n",
                           journal->j_commit_request,
                           journal->j_commit_sequence);
                 journal->j_running_transaction->t_requested = jiffies;
@@ -705,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
         }
  #endif
         while (tid_gt(tid, journal->j_commit_sequence)) {
-               jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
+               jbd2_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
                                   tid, journal->j_commit_sequence);
                 read_unlock(&journal->j_state_lock);
                 wake_up(&journal->j_wait_commit);
@@ -1117,7 +1114,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
                 freed += journal->j_last - journal->j_first;
  
         trace_jbd2_update_log_tail(journal, tid, block, freed);
-       jbd_debug(1,
+       jbd2_debug(1,
                   "Cleaning journal tail from %u to %u (offset %lu), "
                   "freeing %lu\n",
                   journal->j_tail_sequence, tid, block, freed);
@@ -1497,7 +1494,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
                 return NULL;
         }
  
-       jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
+       jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
                   inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
                   inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
  
@@ -1577,7 +1574,7 @@ static int journal_reset(journal_t *journal)
          * attempting a write to a potential-readonly device.
          */
         if (sb->s_start == 0) {
-               jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
+               jbd2_debug(1, "JBD2: Skipping superblock update on recovered sb "
                         "(start %ld, seq %u, errno %d)\n",
                         journal->j_tail, journal->j_tail_sequence,
                         journal->j_errno);
@@ -1681,7 +1678,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
         }
  
         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
-       jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
+       jbd2_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
                   tail_block, tail_tid);
  
         lock_buffer(journal->j_sb_buffer);
@@ -1722,7 +1719,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
                 return;
         }
  
-       jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
+       jbd2_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
                   journal->j_tail_sequence);
  
         sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1865,7 +1862,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
         errcode = journal->j_errno;
         if (errcode == -ESHUTDOWN)
                 errcode = 0;
-       jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
+       jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
         sb->s_errno    = cpu_to_be32(errcode);
  
         jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
@@ -2337,7 +2334,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
             compat & JBD2_FEATURE_COMPAT_CHECKSUM)
                 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
  
-       jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
+       jbd2_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
                   compat, ro, incompat);
  
         sb = journal->j_superblock;
@@ -2406,7 +2403,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
  {
         journal_superblock_t *sb;
  
-       jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
+       jbd2_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
                   compat, ro, incompat);
  
         sb = journal->j_superblock;
@@ -2863,7 +2860,7 @@ static struct journal_head *journal_alloc_journal_head(void)
  #endif
         ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
         if (!ret) {
-               jbd_debug(1, "out of memory for journal_head\n");
+               jbd2_debug(1, "out of memory for journal_head\n");
                 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
                 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
                                 GFP_NOFS | __GFP_NOFAIL);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c

index e699d6a..f548479 100644 (file)
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -245,11 +245,11 @@ static int fc_do_one_pass(journal_t *journal,
                 return 0;
  
         while (next_fc_block <= journal->j_fc_last) {
-               jbd_debug(3, "Fast commit replay: next block %ld\n",
+               jbd2_debug(3, "Fast commit replay: next block %ld\n",
                           next_fc_block);
                 err = jread(&bh, journal, next_fc_block);
                 if (err) {
-                       jbd_debug(3, "Fast commit replay: read error\n");
+                       jbd2_debug(3, "Fast commit replay: read error\n");
                         break;
                 }
  
@@ -263,7 +263,7 @@ static int fc_do_one_pass(journal_t *journal,
         }
  
         if (err)
-               jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
+               jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
  
         return err;
  }
@@ -297,7 +297,7 @@ int jbd2_journal_recover(journal_t *journal)
          */
  
         if (!sb->s_start) {
-               jbd_debug(1, "No recovery required, last transaction %d\n",
+               jbd2_debug(1, "No recovery required, last transaction %d\n",
                           be32_to_cpu(sb->s_sequence));
                 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
                 return 0;
@@ -309,10 +309,10 @@ int jbd2_journal_recover(journal_t *journal)
         if (!err)
                 err = do_one_pass(journal, &info, PASS_REPLAY);
  
-       jbd_debug(1, "JBD2: recovery, exit status %d, "
+       jbd2_debug(1, "JBD2: recovery, exit status %d, "
                   "recovered transactions %u to %u\n",
                   err, info.start_transaction, info.end_transaction);
-       jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
+       jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
                   info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
  
         /* Restart the log at the next transaction ID, thus invalidating
@@ -362,7 +362,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
  #ifdef CONFIG_JBD2_DEBUG
                 int dropped = info.end_transaction - 
                         be32_to_cpu(journal->j_superblock->s_sequence);
-               jbd_debug(1,
+               jbd2_debug(1,
                           "JBD2: ignoring %d transaction%s from the journal.\n",
                           dropped, (dropped == 1) ? "" : "s");
  #endif
@@ -484,7 +484,7 @@ static int do_one_pass(journal_t *journal,
         if (pass == PASS_SCAN)
                 info->start_transaction = first_commit_ID;
  
-       jbd_debug(1, "Starting recovery pass %d\n", pass);
+       jbd2_debug(1, "Starting recovery pass %d\n", pass);
  
         /*
          * Now we walk through the log, transaction by transaction,
@@ -510,7 +510,7 @@ static int do_one_pass(journal_t *journal,
                         if (tid_geq(next_commit_ID, info->end_transaction))
                                 break;
  
-               jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+               jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
                           next_commit_ID, next_log_block,
                           jbd2_has_feature_fast_commit(journal) ?
                           journal->j_fc_last : journal->j_last);
@@ -519,7 +519,7 @@ static int do_one_pass(journal_t *journal,
                  * either the next descriptor block or the final commit
                  * record. */
  
-               jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
+               jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
                 err = jread(&bh, journal, next_log_block);
                 if (err)
                         goto failed;
@@ -542,7 +542,7 @@ static int do_one_pass(journal_t *journal,
  
                 blocktype = be32_to_cpu(tmp->h_blocktype);
                 sequence = be32_to_cpu(tmp->h_sequence);
-               jbd_debug(3, "Found magic %d, sequence %d\n",
+               jbd2_debug(3, "Found magic %d, sequence %d\n",
                           blocktype, sequence);
  
                 if (sequence != next_commit_ID) {
@@ -575,7 +575,7 @@ static int do_one_pass(journal_t *journal,
                                         goto failed;
                                 }
                                 need_check_commit_time = true;
-                               jbd_debug(1,
+                               jbd2_debug(1,
                                         "invalid descriptor block found in %lu\n",
                                         next_log_block);
                         }
@@ -758,7 +758,7 @@ static int do_one_pass(journal_t *journal,
                                  * It likely does not belong to same journal,
                                  * just end this recovery with success.
                                  */
-                               jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
+                               jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
                                           next_commit_ID);
                                 brelse(bh);
                                 goto done;
@@ -826,7 +826,7 @@ static int do_one_pass(journal_t *journal,
                         if (pass == PASS_SCAN &&
                             !jbd2_descriptor_block_csum_verify(journal,
                                                                bh->b_data)) {
-                               jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
+                               jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
                                           next_log_block);
                                 need_check_commit_time = true;
                         }
@@ -845,7 +845,7 @@ static int do_one_pass(journal_t *journal,
                         continue;
  
                 default:
-                       jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
+                       jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
                                   blocktype);
                         brelse(bh);
                         goto done;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c

index fa60878..4556e46 100644 (file)
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -398,7 +398,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
         }
         handle->h_revoke_credits--;
  
-       jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
+       jbd2_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
         err = insert_revoke_hash(journal, blocknr,
                                 handle->h_transaction->t_tid);
         BUFFER_TRACE(bh_in, "exit");
@@ -428,7 +428,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
         int did_revoke = 0;     /* akpm: debug */
         struct buffer_head *bh = jh2bh(jh);
  
-       jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
+       jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
  
         /* Is the existing Revoke bit valid?  If so, we trust it, and
          * only perform the full cancel if the revoke bit is set.  If
@@ -444,7 +444,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
         if (need_cancel) {
                 record = find_revoke_record(journal, bh->b_blocknr);
                 if (record) {
-                       jbd_debug(4, "cancelled existing revoke on "
+                       jbd2_debug(4, "cancelled existing revoke on "
                                   "blocknr %llu\n", (unsigned long long)bh->b_blocknr);
                         spin_lock(&journal->j_revoke_lock);
                         list_del(&record->hash);
@@ -560,7 +560,7 @@ void jbd2_journal_write_revoke_records(transaction_t *transaction,
         }
         if (descriptor)
                 flush_descriptor(journal, descriptor, offset);
-       jbd_debug(1, "Wrote %d revoke records\n", count);
+       jbd2_debug(1, "Wrote %d revoke records\n", count);
  }
  
  /*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index e9c308a..e1be93c 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -373,7 +373,7 @@ alloc_transaction:
                         return -ENOMEM;
         }
  
-       jbd_debug(3, "New handle %p going live.\n", handle);
+       jbd2_debug(3, "New handle %p going live.\n", handle);
  
         /*
          * We need to hold j_state_lock until t_updates has been incremented,
@@ -453,7 +453,7 @@ repeat:
         handle->h_start_jiffies = jiffies;
         atomic_inc(&transaction->t_updates);
         atomic_inc(&transaction->t_handle_count);
-       jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
+       jbd2_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
                   handle, blocks,
                   atomic_read(&transaction->t_outstanding_credits),
                   jbd2_log_space_left(journal));
@@ -674,7 +674,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
  
         /* Don't extend a locked-down transaction! */
         if (transaction->t_state != T_RUNNING) {
-               jbd_debug(3, "denied handle %p %d blocks: "
+               jbd2_debug(3, "denied handle %p %d blocks: "
                           "transaction not running\n", handle, nblocks);
                 goto error_out;
         }
@@ -689,7 +689,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
                                    &transaction->t_outstanding_credits);
  
         if (wanted > journal->j_max_transaction_buffers) {
-               jbd_debug(3, "denied handle %p %d blocks: "
+               jbd2_debug(3, "denied handle %p %d blocks: "
                           "transaction too large\n", handle, nblocks);
                 atomic_sub(nblocks, &transaction->t_outstanding_credits);
                 goto error_out;
@@ -707,7 +707,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
         handle->h_revoke_credits_requested += revoke_records;
         result = 0;
  
-       jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
+       jbd2_debug(3, "extended handle %p by %d\n", handle, nblocks);
  error_out:
         read_unlock(&journal->j_state_lock);
         return result;
@@ -795,7 +795,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
          * First unlink the handle from its current transaction, and start the
          * commit on that.
          */
-       jbd_debug(2, "restarting handle %p\n", handle);
+       jbd2_debug(2, "restarting handle %p\n", handle);
         stop_this_handle(handle);
         handle->h_transaction = NULL;
  
@@ -979,7 +979,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
  
         journal = transaction->t_journal;
  
-       jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
+       jbd2_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
  
         JBUFFER_TRACE(jh, "entry");
  repeat:
@@ -1271,7 +1271,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
         struct journal_head *jh = jbd2_journal_add_journal_head(bh);
         int err;
  
-       jbd_debug(5, "journal_head %p\n", jh);
+       jbd2_debug(5, "journal_head %p\n", jh);
         err = -EROFS;
         if (is_handle_aborted(handle))
                 goto out;
@@ -1486,8 +1486,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
         struct journal_head *jh;
         int ret = 0;
  
-       if (is_handle_aborted(handle))
-               return -EROFS;
         if (!buffer_jbd(bh))
                 return -EUCLEAN;
  
@@ -1496,7 +1494,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
          * of the running transaction.
          */
         jh = bh2jh(bh);
-       jbd_debug(5, "journal_head %p\n", jh);
+       jbd2_debug(5, "journal_head %p\n", jh);
         JBUFFER_TRACE(jh, "entry");
  
         /*
@@ -1534,6 +1532,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
         journal = transaction->t_journal;
         spin_lock(&jh->b_state_lock);
  
+       if (is_handle_aborted(handle)) {
+               /*
+                * Check journal aborting with @jh->b_state_lock locked,
+                * since 'jh->b_transaction' could be replaced with
+                * 'jh->b_next_transaction' during old transaction
+                * committing if journal aborted, which may fail
+                * assertion on 'jh->b_frozen_data == NULL'.
+                */
+               ret = -EROFS;
+               goto out_unlock_bh;
+       }
+
         if (jh->b_modified == 0) {
                 /*
                  * This buffer's got modified and becoming part
@@ -1818,7 +1828,7 @@ int jbd2_journal_stop(handle_t *handle)
         pid_t pid;
  
         if (--handle->h_ref > 0) {
-               jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+               jbd2_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
                                                  handle->h_ref);
                 if (is_handle_aborted(handle))
                         return -EIO;
@@ -1838,7 +1848,7 @@ int jbd2_journal_stop(handle_t *handle)
         if (is_handle_aborted(handle))
                 err = -EIO;
  
-       jbd_debug(4, "Handle %p going down\n", handle);
+       jbd2_debug(4, "Handle %p going down\n", handle);
         trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
                                 tid, handle->h_type, handle->h_line_no,
                                 jiffies - handle->h_start_jiffies,
@@ -1916,7 +1926,7 @@ int jbd2_journal_stop(handle_t *handle)
                  * completes the commit thread, it just doesn't write
                  * anything to disk. */
  
-               jbd_debug(2, "transaction too old, requesting commit for "
+               jbd2_debug(2, "transaction too old, requesting commit for "
                                         "handle %p\n", handle);
                 /* This is non-blocking */
                 jbd2_log_start_commit(journal, tid);
@@ -2662,7 +2672,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
                 return -EROFS;
         journal = transaction->t_journal;
  
-       jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
+       jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
                         transaction->t_tid);
  
         spin_lock(&journal->j_list_lock);
diff --git a/fs/mbcache.c b/fs/mbcache.c

index 97c54d3..96f1d49 100644 (file)
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -11,7 +11,7 @@
  /*
   * Mbcache is a simple key-value store. Keys need not be unique, however
   * key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete()).
+ * mb_cache_entry_delete_or_get()).
   *
   * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
   * Ext4 also uses it for deduplication of xattr values stored in inodes.
@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
                 return -ENOMEM;
  
         INIT_LIST_HEAD(&entry->e_list);
-       /* One ref for hash, one ref returned */
+       /* Initial hash reference */
         atomic_set(&entry->e_refcnt, 1);
         entry->e_key = key;
         entry->e_value = value;
@@ -106,25 +106,45 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
                 }
         }
         hlist_bl_add_head(&entry->e_hash_list, head);
-       hlist_bl_unlock(head);
-
+       /*
+        * Add entry to LRU list before it can be found by
+        * mb_cache_entry_delete() to avoid races
+        */
         spin_lock(&cache->c_list_lock);
         list_add_tail(&entry->e_list, &cache->c_list);
-       /* Grab ref for LRU list */
-       atomic_inc(&entry->e_refcnt);
         cache->c_entry_count++;
         spin_unlock(&cache->c_list_lock);
+       hlist_bl_unlock(head);
  
         return 0;
  }
  EXPORT_SYMBOL(mb_cache_entry_create);
  
-void __mb_cache_entry_free(struct mb_cache_entry *entry)
+void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
  {
+       struct hlist_bl_head *head;
+
+       head = mb_cache_entry_head(cache, entry->e_key);
+       hlist_bl_lock(head);
+       hlist_bl_del(&entry->e_hash_list);
+       hlist_bl_unlock(head);
         kmem_cache_free(mb_entry_cache, entry);
  }
  EXPORT_SYMBOL(__mb_cache_entry_free);
  
+/*
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
+ *
+ * @entry - entry to work on
+ *
+ * Wait to be the last user of the entry.
+ */
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+{
+       wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
+}
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
  static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
                                            struct mb_cache_entry *entry,
                                            u32 key)
@@ -142,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
         while (node) {
                 entry = hlist_bl_entry(node, struct mb_cache_entry,
                                        e_hash_list);
-               if (entry->e_key == key && entry->e_reusable) {
-                       atomic_inc(&entry->e_refcnt);
+               if (entry->e_key == key && entry->e_reusable &&
+                   atomic_inc_not_zero(&entry->e_refcnt))
                         goto out;
-               }
                 node = node->next;
         }
         entry = NULL;
@@ -205,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
         head = mb_cache_entry_head(cache, key);
         hlist_bl_lock(head);
         hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
-               if (entry->e_key == key && entry->e_value == value) {
-                       atomic_inc(&entry->e_refcnt);
+               if (entry->e_key == key && entry->e_value == value &&
+                   atomic_inc_not_zero(&entry->e_refcnt))
                         goto out;
-               }
         }
         entry = NULL;
  out:
@@ -217,42 +235,42 @@ out:
  }
  EXPORT_SYMBOL(mb_cache_entry_get);
  
-/* mb_cache_entry_delete - remove a cache entry
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
   * @cache - cache we work with
   * @key - key
   * @value - value
   *
- * Remove entry from cache @cache with key @key and value @value.
+ * Remove entry from cache @cache with key @key and value @value. The removal
+ * happens only if the entry is unused. The function returns NULL in case the
+ * entry was successfully removed or there's no entry in cache. Otherwise the
+ * function grabs reference of the entry that we failed to delete because it
+ * still has users and return it.
   */
-void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+                                                   u32 key, u64 value)
  {
-       struct hlist_bl_node *node;
-       struct hlist_bl_head *head;
         struct mb_cache_entry *entry;
  
-       head = mb_cache_entry_head(cache, key);
-       hlist_bl_lock(head);
-       hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
-               if (entry->e_key == key && entry->e_value == value) {
-                       /* We keep hash list reference to keep entry alive */
-                       hlist_bl_del_init(&entry->e_hash_list);
-                       hlist_bl_unlock(head);
-                       spin_lock(&cache->c_list_lock);
-                       if (!list_empty(&entry->e_list)) {
-                               list_del_init(&entry->e_list);
-                               if (!WARN_ONCE(cache->c_entry_count == 0,
-               "mbcache: attempt to decrement c_entry_count past zero"))
-                                       cache->c_entry_count--;
-                               atomic_dec(&entry->e_refcnt);
-                       }
-                       spin_unlock(&cache->c_list_lock);
-                       mb_cache_entry_put(cache, entry);
-                       return;
-               }
-       }
-       hlist_bl_unlock(head);
+       entry = mb_cache_entry_get(cache, key, value);
+       if (!entry)
+               return NULL;
+
+       /*
+        * Drop the ref we got from mb_cache_entry_get() and the initial hash
+        * ref if we are the last user
+        */
+       if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
+               return entry;
+
+       spin_lock(&cache->c_list_lock);
+       if (!list_empty(&entry->e_list))
+               list_del_init(&entry->e_list);
+       cache->c_entry_count--;
+       spin_unlock(&cache->c_list_lock);
+       __mb_cache_entry_free(cache, entry);
+       return NULL;
  }
-EXPORT_SYMBOL(mb_cache_entry_delete);
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
  
  /* mb_cache_entry_touch - cache entry got used
   * @cache - cache the entry belongs to
@@ -281,34 +299,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
                                      unsigned long nr_to_scan)
  {
         struct mb_cache_entry *entry;
-       struct hlist_bl_head *head;
         unsigned long shrunk = 0;
  
         spin_lock(&cache->c_list_lock);
         while (nr_to_scan-- && !list_empty(&cache->c_list)) {
                 entry = list_first_entry(&cache->c_list,
                                          struct mb_cache_entry, e_list);
-               if (entry->e_referenced) {
+               /* Drop initial hash reference if there is no user */
+               if (entry->e_referenced ||
+                   atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
                         entry->e_referenced = 0;
                         list_move_tail(&entry->e_list, &cache->c_list);
                         continue;
                 }
                 list_del_init(&entry->e_list);
                 cache->c_entry_count--;
-               /*
-                * We keep LRU list reference so that entry doesn't go away
-                * from under us.
-                */
                 spin_unlock(&cache->c_list_lock);
-               head = mb_cache_entry_head(cache, entry->e_key);
-               hlist_bl_lock(head);
-               if (!hlist_bl_unhashed(&entry->e_hash_list)) {
-                       hlist_bl_del_init(&entry->e_hash_list);
-                       atomic_dec(&entry->e_refcnt);
-               }
-               hlist_bl_unlock(head);
-               if (mb_cache_entry_put(cache, entry))
-                       shrunk++;
+               __mb_cache_entry_free(cache, entry);
+               shrunk++;
                 cond_resched();
                 spin_lock(&cache->c_list_lock);
         }
@@ -400,11 +408,6 @@ void mb_cache_destroy(struct mb_cache *cache)
          * point.
          */
         list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
-               if (!hlist_bl_unhashed(&entry->e_hash_list)) {
-                       hlist_bl_del_init(&entry->e_hash_list);
-                       atomic_dec(&entry->e_refcnt);
-               } else
-                       WARN_ON(1);
                 list_del(&entry->e_list);
                 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
                 mb_cache_entry_put(cache, entry);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index dc17241..0b72423 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -54,14 +54,13 @@
   * CONFIG_JBD2_DEBUG is on.
   */
  #define JBD2_EXPENSIVE_CHECKING
-extern ushort jbd2_journal_enable_debug;
  void __jbd2_debug(int level, const char *file, const char *func,
                   unsigned int line, const char *fmt, ...);
  
-#define jbd_debug(n, fmt, a...) \
+#define jbd2_debug(n, fmt, a...) \
         __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a)
  #else
-#define jbd_debug(n, fmt, a...)  no_printk(fmt, ##a)
+#define jbd2_debug(n, fmt, a...)  no_printk(fmt, ##a)
  #endif
  
  extern void *jbd2_alloc(size_t size, gfp_t flags);
@@ -1647,7 +1646,6 @@ extern void       jbd2_clear_buffer_revoked_flags(journal_t *journal);
   */
  
  int jbd2_log_start_commit(journal_t *journal, tid_t tid);
-int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
  int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
  int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
  int jbd2_transaction_committed(journal_t *journal, tid_t tid);
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h

index 20f1e3f..2da63fd 100644 (file)
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -13,8 +13,16 @@ struct mb_cache;
  struct mb_cache_entry {
         /* List of entries in cache - protected by cache->c_list_lock */
         struct list_head        e_list;
-       /* Hash table list - protected by hash chain bitlock */
+       /*
+        * Hash table list - protected by hash chain bitlock. The entry is
+        * guaranteed to be hashed while e_refcnt > 0.
+        */
         struct hlist_bl_node    e_hash_list;
+       /*
+        * Entry refcount. Once it reaches zero, entry is unhashed and freed.
+        * While refcount > 0, the entry is guaranteed to stay in the hash and
+        * e.g. mb_cache_entry_try_delete() will fail.
+        */
         atomic_t                e_refcnt;
         /* Key in hash - stable during lifetime of the entry */
         u32                     e_key;
@@ -29,17 +37,24 @@ void mb_cache_destroy(struct mb_cache *cache);
  
  int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
                           u64 value, bool reusable);
-void __mb_cache_entry_free(struct mb_cache_entry *entry);
-static inline int mb_cache_entry_put(struct mb_cache *cache,
-                                    struct mb_cache_entry *entry)
+void __mb_cache_entry_free(struct mb_cache *cache,
+                          struct mb_cache_entry *entry);
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+static inline void mb_cache_entry_put(struct mb_cache *cache,
+                                     struct mb_cache_entry *entry)
  {
-       if (!atomic_dec_and_test(&entry->e_refcnt))
-               return 0;
-       __mb_cache_entry_free(entry);
-       return 1;
+       unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+
+       if (cnt > 0) {
+               if (cnt <= 2)
+                       wake_up_var(&entry->e_refcnt);
+               return;
+       }
+       __mb_cache_entry_free(cache, entry);
  }
  
-void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+                                                   u32 key, u64 value);
  struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
                                           u64 value);
  struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 5 Aug 2022 03:13:46 +0000 (20:13 -0700)
Documentation/filesystems/ext4/blockmap.rst		patch \| blob \| history
fs/ext2/xattr.c		patch \| blob \| history
fs/ext4/balloc.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ext4_jbd2.c		patch \| blob \| history
fs/ext4/fast_commit.c		patch \| blob \| history
fs/ext4/indirect.c		patch \| blob \| history
fs/ext4/inline.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/migrate.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/orphan.c		patch \| blob \| history
fs/ext4/resize.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/ext4/symlink.c		patch \| blob \| history
fs/ext4/xattr.c		patch \| blob \| history
fs/ext4/xattr.h		patch \| blob \| history
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
fs/jbd2/recovery.c		patch \| blob \| history
fs/jbd2/revoke.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
fs/mbcache.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history
include/linux/mbcache.h		patch \| blob \| history