btrfs: subpage: fix a rare race between metadata endio and eb freeing

author Qu Wenruo <wqu@suse.com>

Mon, 7 Jun 2021 09:02:58 +0000 (17:02 +0800)

committer David Sterba <dsterba@suse.com>

Mon, 21 Jun 2021 13:19:10 +0000 (15:19 +0200)
author Qu Wenruo <wqu@suse.com>
Mon, 7 Jun 2021 09:02:58 +0000 (17:02 +0800)
committer David Sterba <dsterba@suse.com>
Mon, 21 Jun 2021 13:19:10 +0000 (15:19 +0200)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 1acbb7f..9e81d25 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2687,21 +2687,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
         ASSERT(page_offset(page) <= start &&
                start + len <= page_offset(page) + PAGE_SIZE);
  
-       /*
-        * For subapge metadata case, all btrfs_page_* helpers need page to
-        * have page::private populated.
-        * But we can have rare case where the last eb in the page is only
-        * referred by the IO, and it gets released immedately after it's
-        * read and verified.
-        *
-        * This can detach the page private completely.
-        * In that case, we can just skip the page status update completely,
-        * as the page has no eb anymore.
-        */
-       if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) {
-               ASSERT(!is_data_inode(page->mapping->host));
-               return;
-       }
         if (uptodate) {
                 btrfs_page_set_uptodate(fs_info, page, start, len);
         } else {
@@ -2711,11 +2696,7 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
  
         if (fs_info->sectorsize == PAGE_SIZE)
                 unlock_page(page);
-       else if (is_data_inode(page->mapping->host))
-               /*
-                * For subpage data, unlock the page if we're the last reader.
-                * For subpage metadata, page lock is not utilized for read.
-                */
+       else
                 btrfs_subpage_end_reader(fs_info, page, start, len);
  }
  
@@ -5603,6 +5584,12 @@ static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
                 subpage = (struct btrfs_subpage *)page->private;
                 if (atomic_read(&subpage->eb_refs))
                         return true;
+               /*
+                * Even there is no eb refs here, we may still have
+                * end_page_read() call relying on page::private.
+                */
+               if (atomic_read(&subpage->readers))
+                       return true;
         }
         return false;
  }
@@ -5663,7 +5650,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
  
         /*
          * We can only detach the page private if there are no other ebs in the
-        * page range.
+        * page range and no unfinished IO.
          */
         if (!page_range_has_eb(fs_info, page))
                 btrfs_detach_subpage(fs_info, page);
@@ -6381,6 +6368,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
         check_buffer_tree_ref(eb);
         btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
  
+       btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
         ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
                                  page, eb->start, eb->len,
                                  eb->start - page_offset(page),
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c

index 7d72eaf..640bcd2 100644 (file)
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -3,6 +3,7 @@
  #include <linux/slab.h>
  #include "ctree.h"
  #include "subpage.h"
+#include "btrfs_inode.h"
  
  /*
   * Subpage (sectorsize < PAGE_SIZE) support overview:
@@ -185,12 +186,10 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
  {
         struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
         const int nbits = len >> fs_info->sectorsize_bits;
-       int ret;
  
         btrfs_subpage_assert(fs_info, page, start, len);
  
-       ret = atomic_add_return(nbits, &subpage->readers);
-       ASSERT(ret == nbits);
+       atomic_add(nbits, &subpage->readers);
  }
  
  void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
@@ -198,10 +197,22 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
  {
         struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
         const int nbits = len >> fs_info->sectorsize_bits;
+       bool is_data;
+       bool last;
  
         btrfs_subpage_assert(fs_info, page, start, len);
+       is_data = is_data_inode(page->mapping->host);
         ASSERT(atomic_read(&subpage->readers) >= nbits);
-       if (atomic_sub_and_test(nbits, &subpage->readers))
+       last = atomic_sub_and_test(nbits, &subpage->readers);
+
+       /*
+        * For data we need to unlock the page if the last read has finished.
+        *
+        * And please don't replace @last with atomic_sub_and_test() call
+        * inside if () condition.
+        * As we want the atomic_sub_and_test() to be always executed.
+        */
+       if (is_data && last)
                 unlock_page(page);
  }
  
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h

index 65298a5..4d7aca8 100644 (file)
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -22,6 +22,14 @@ struct btrfs_subpage {
         u16 error_bitmap;
         u16 dirty_bitmap;
         u16 writeback_bitmap;
+       /*
+        * Both data and metadata needs to track how many readers are for the
+        * page.
+        * Data relies on @readers to unlock the page when last reader finished.
+        * While metadata doesn't need page unlock, it needs to prevent
+        * page::private get cleared before the last end_page_read().
+        */
+       atomic_t readers;
         union {
                 /*
                  * Structures only used by metadata
@@ -32,7 +40,6 @@ struct btrfs_subpage {
                 atomic_t eb_refs;
                 /* Structures only used by data */
                 struct {
-                       atomic_t readers;
                         atomic_t writers;
  
                         /* Tracke pending ordered extent in this sector */
author	Qu Wenruo <wqu@suse.com>
	Mon, 7 Jun 2021 09:02:58 +0000 (17:02 +0800)
committer	David Sterba <dsterba@suse.com>
	Mon, 21 Jun 2021 13:19:10 +0000 (15:19 +0200)
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/subpage.c		patch \| blob \| history
fs/btrfs/subpage.h		patch \| blob \| history