Merge tag 'erofs-for-5.16-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-microblaze.git] / fs / erofs / zdata.c
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 2741196..9a249bf 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -229,7 +229,7 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
  static void preload_compressed_pages(struct z_erofs_collector *clt,
                                      struct address_space *mc,
                                      enum z_erofs_cache_alloctype type,
-                                    struct list_head *pagepool)
+                                    struct page **pagepool)
  {
         struct z_erofs_pcluster *pcl = clt->pcl;
         bool standalone = true;
@@ -276,12 +276,10 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
                 if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
                         continue;
  
-               if (page) {
+               if (page)
                         put_page(page);
-               } else if (newpage) {
-                       set_page_private(newpage, 0);
-                       list_add(&newpage->lru, pagepool);
-               }
+               else if (newpage)
+                       erofs_pagepool_add(pagepool, newpage);
         }
  
         /*
@@ -465,6 +463,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         struct erofs_workgroup *grp;
         int err;
  
+       if (!(map->m_flags & EROFS_MAP_ENCODED)) {
+               DBG_BUGON(1);
+               return -EFSCORRUPTED;
+       }
+
         /* no available pcluster, let's allocate one */
         pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
         if (IS_ERR(pcl))
@@ -472,16 +475,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
  
         atomic_set(&pcl->obj.refcount, 1);
         pcl->obj.index = map->m_pa >> PAGE_SHIFT;
-
+       pcl->algorithmformat = map->m_algorithmformat;
         pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
                 (map->m_flags & EROFS_MAP_FULL_MAPPED ?
                         Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
  
-       if (map->m_flags & EROFS_MAP_ZIPPED)
-               pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
-       else
-               pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
-
         /* new pclusters should be claimed as type 1, primary and followed */
         pcl->next = clt->owned_head;
         clt->mode = COLLECT_PRIMARY_FOLLOWED;
@@ -632,7 +630,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
  }
  
  static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-                               struct page *page, struct list_head *pagepool)
+                               struct page *page, struct page **pagepool)
  {
         struct inode *const inode = fe->inode;
         struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -684,7 +682,7 @@ restart_now:
                 goto err_out;
  
         /* preload all compressed pages (maybe downgrade role if necessary) */
-       if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
+       if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
                 cache_strategy = TRYALLOC;
         else
                 cache_strategy = DONTALLOC;
@@ -786,7 +784,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
         /* Use workqueue and sync decompression for atomic contexts only */
         if (in_atomic() || irqs_disabled()) {
                 queue_work(z_erofs_workqueue, &io->u.work);
-               sbi->ctx.readahead_sync_decompress = true;
+               sbi->opt.readahead_sync_decompress = true;
                 return;
         }
         z_erofs_decompressqueue_work(&io->u.work);
@@ -826,7 +824,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
  
  static int z_erofs_decompress_pcluster(struct super_block *sb,
                                        struct z_erofs_pcluster *pcl,
-                                      struct list_head *pagepool)
+                                      struct page **pagepool)
  {
         struct erofs_sb_info *const sbi = EROFS_SB(sb);
         struct z_erofs_pagevec_ctor ctor;
@@ -1026,7 +1024,7 @@ out:
  }
  
  static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
-                                    struct list_head *pagepool)
+                                    struct page **pagepool)
  {
         z_erofs_next_pcluster_t owned = io->head;
  
@@ -1050,18 +1048,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
  {
         struct z_erofs_decompressqueue *bgq =
                 container_of(work, struct z_erofs_decompressqueue, u.work);
-       LIST_HEAD(pagepool);
+       struct page *pagepool = NULL;
  
         DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
         z_erofs_decompress_queue(bgq, &pagepool);
  
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
         kvfree(bgq);
  }
  
  static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
                                                unsigned int nr,
-                                              struct list_head *pagepool,
+                                              struct page **pagepool,
                                                struct address_space *mc,
                                                gfp_t gfp)
  {
@@ -1154,7 +1152,7 @@ repeat:
  out_allocpage:
         page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
         if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
-               list_add(&page->lru, pagepool);
+               erofs_pagepool_add(pagepool, page);
                 cond_resched();
                 goto repeat;
         }
@@ -1238,7 +1236,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
  
  static void z_erofs_submit_queue(struct super_block *sb,
                                  struct z_erofs_decompress_frontend *f,
-                                struct list_head *pagepool,
+                                struct page **pagepool,
                                  struct z_erofs_decompressqueue *fgq,
                                  bool *force_fg)
  {
@@ -1247,8 +1245,9 @@ static void z_erofs_submit_queue(struct super_block *sb,
         struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
         void *bi_private;
         z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
-       /* since bio will be NULL, no need to initialize last_index */
+       /* bio is NULL initially, so no need to initialize last_{index,bdev} */
         pgoff_t last_index;
+       struct block_device *last_bdev;
         unsigned int nr_bios = 0;
         struct bio *bio = NULL;
  
@@ -1260,6 +1259,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
         q[JQ_SUBMIT]->head = owned_head;
  
         do {
+               struct erofs_map_dev mdev;
                 struct z_erofs_pcluster *pcl;
                 pgoff_t cur, end;
                 unsigned int i = 0;
@@ -1271,7 +1271,13 @@ static void z_erofs_submit_queue(struct super_block *sb,
  
                 pcl = container_of(owned_head, struct z_erofs_pcluster, next);
  
-               cur = pcl->obj.index;
+               /* no device id here, thus it will always succeed */
+               mdev = (struct erofs_map_dev) {
+                       .m_pa = blknr_to_addr(pcl->obj.index),
+               };
+               (void)erofs_map_dev(sb, &mdev);
+
+               cur = erofs_blknr(mdev.m_pa);
                 end = cur + pcl->pclusterpages;
  
                 /* close the main owned chain at first */
@@ -1287,7 +1293,8 @@ static void z_erofs_submit_queue(struct super_block *sb,
                         if (!page)
                                 continue;
  
-                       if (bio && cur != last_index + 1) {
+                       if (bio && (cur != last_index + 1 ||
+                                   last_bdev != mdev.m_bdev)) {
  submit_bio_retry:
                                 submit_bio(bio);
                                 bio = NULL;
@@ -1295,9 +1302,10 @@ submit_bio_retry:
  
                         if (!bio) {
                                 bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
-
                                 bio->bi_end_io = z_erofs_decompressqueue_endio;
-                               bio_set_dev(bio, sb->s_bdev);
+
+                               bio_set_dev(bio, mdev.m_bdev);
+                               last_bdev = mdev.m_bdev;
                                 bio->bi_iter.bi_sector = (sector_t)cur <<
                                         LOG_SECTORS_PER_BLOCK;
                                 bio->bi_private = bi_private;
@@ -1336,7 +1344,7 @@ submit_bio_retry:
  
  static void z_erofs_runqueue(struct super_block *sb,
                              struct z_erofs_decompress_frontend *f,
-                            struct list_head *pagepool, bool force_fg)
+                            struct page **pagepool, bool force_fg)
  {
         struct z_erofs_decompressqueue io[NR_JOBQUEUES];
  
@@ -1358,18 +1366,87 @@ static void z_erofs_runqueue(struct super_block *sb,
         z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
  }
  
+/*
+ * Since partial uptodate is still unimplemented for now, we have to use
+ * approximate readmore strategies as a start.
+ */
+static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+                                     struct readahead_control *rac,
+                                     erofs_off_t end,
+                                     struct page **pagepool,
+                                     bool backmost)
+{
+       struct inode *inode = f->inode;
+       struct erofs_map_blocks *map = &f->map;
+       erofs_off_t cur;
+       int err;
+
+       if (backmost) {
+               map->m_la = end;
+               err = z_erofs_map_blocks_iter(inode, map,
+                                             EROFS_GET_BLOCKS_READMORE);
+               if (err)
+                       return;
+
+               /* expend ra for the trailing edge if readahead */
+               if (rac) {
+                       loff_t newstart = readahead_pos(rac);
+
+                       cur = round_up(map->m_la + map->m_llen, PAGE_SIZE);
+                       readahead_expand(rac, newstart, cur - newstart);
+                       return;
+               }
+               end = round_up(end, PAGE_SIZE);
+       } else {
+               end = round_up(map->m_la, PAGE_SIZE);
+
+               if (!map->m_llen)
+                       return;
+       }
+
+       cur = map->m_la + map->m_llen - 1;
+       while (cur >= end) {
+               pgoff_t index = cur >> PAGE_SHIFT;
+               struct page *page;
+
+               page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
+               if (!page)
+                       goto skip;
+
+               if (PageUptodate(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       goto skip;
+               }
+
+               err = z_erofs_do_read_page(f, page, pagepool);
+               if (err)
+                       erofs_err(inode->i_sb,
+                                 "readmore error at page %lu @ nid %llu",
+                                 index, EROFS_I(inode)->nid);
+               put_page(page);
+skip:
+               if (cur < PAGE_SIZE)
+                       break;
+               cur = (index << PAGE_SHIFT) - 1;
+       }
+}
+
  static int z_erofs_readpage(struct file *file, struct page *page)
  {
         struct inode *const inode = page->mapping->host;
         struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+       struct page *pagepool = NULL;
         int err;
-       LIST_HEAD(pagepool);
  
         trace_erofs_readpage(page, false);
-
         f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
  
+       z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1,
+                                 &pagepool, true);
         err = z_erofs_do_read_page(&f, page, &pagepool);
+       z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
+
         (void)z_erofs_collector_end(&f.clt);
  
         /* if some compressed cluster ready, need submit them anyway */
@@ -1381,8 +1458,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
         if (f.map.mpage)
                 put_page(f.map.mpage);
  
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
         return err;
  }
  
@@ -1390,29 +1466,19 @@ static void z_erofs_readahead(struct readahead_control *rac)
  {
         struct inode *const inode = rac->mapping->host;
         struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-       unsigned int nr_pages = readahead_count(rac);
-       bool sync = (sbi->ctx.readahead_sync_decompress &&
-                       nr_pages <= sbi->ctx.max_sync_decompress_pages);
         struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-       struct page *page, *head = NULL;
-       LIST_HEAD(pagepool);
-
-       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+       struct page *pagepool = NULL, *head = NULL, *page;
+       unsigned int nr_pages;
  
         f.readahead = true;
         f.headoffset = readahead_pos(rac);
  
-       while ((page = readahead_page(rac))) {
-               prefetchw(&page->flags);
-
-               /*
-                * A pure asynchronous readahead is indicated if
-                * a PG_readahead marked page is hitted at first.
-                * Let's also do asynchronous decompression for this case.
-                */
-               sync &= !(PageReadahead(page) && !head);
+       z_erofs_pcluster_readmore(&f, rac, f.headoffset +
+                                 readahead_length(rac) - 1, &pagepool, true);
+       nr_pages = readahead_count(rac);
+       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
  
+       while ((page = readahead_page(rac))) {
                 set_page_private(page, (unsigned long)head);
                 head = page;
         }
@@ -1431,16 +1497,15 @@ static void z_erofs_readahead(struct readahead_control *rac)
                                   page->index, EROFS_I(inode)->nid);
                 put_page(page);
         }
-
+       z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
         (void)z_erofs_collector_end(&f.clt);
  
-       z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
-
+       z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+                        sbi->opt.readahead_sync_decompress &&
+                        nr_pages <= sbi->opt.max_sync_decompress_pages);
         if (f.map.mpage)
                 put_page(f.map.mpage);
-
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
  }
  
  const struct address_space_operations z_erofs_aops = {