Merge tag 'erofs-for-5.16-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / fs / erofs / zdata.c
index 2741196..9a249bf 100644 (file)
@@ -229,7 +229,7 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
 static void preload_compressed_pages(struct z_erofs_collector *clt,
                                     struct address_space *mc,
                                     enum z_erofs_cache_alloctype type,
-                                    struct list_head *pagepool)
+                                    struct page **pagepool)
 {
        struct z_erofs_pcluster *pcl = clt->pcl;
        bool standalone = true;
@@ -276,12 +276,10 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
                if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
                        continue;
 
-               if (page) {
+               if (page)
                        put_page(page);
-               } else if (newpage) {
-                       set_page_private(newpage, 0);
-                       list_add(&newpage->lru, pagepool);
-               }
+               else if (newpage)
+                       erofs_pagepool_add(pagepool, newpage);
        }
 
        /*
@@ -465,6 +463,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
        struct erofs_workgroup *grp;
        int err;
 
+       if (!(map->m_flags & EROFS_MAP_ENCODED)) {
+               DBG_BUGON(1);
+               return -EFSCORRUPTED;
+       }
+
        /* no available pcluster, let's allocate one */
        pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
        if (IS_ERR(pcl))
@@ -472,16 +475,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
 
        atomic_set(&pcl->obj.refcount, 1);
        pcl->obj.index = map->m_pa >> PAGE_SHIFT;
-
+       pcl->algorithmformat = map->m_algorithmformat;
        pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
                (map->m_flags & EROFS_MAP_FULL_MAPPED ?
                        Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
 
-       if (map->m_flags & EROFS_MAP_ZIPPED)
-               pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
-       else
-               pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
-
        /* new pclusters should be claimed as type 1, primary and followed */
        pcl->next = clt->owned_head;
        clt->mode = COLLECT_PRIMARY_FOLLOWED;
@@ -632,7 +630,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
 }
 
 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-                               struct page *page, struct list_head *pagepool)
+                               struct page *page, struct page **pagepool)
 {
        struct inode *const inode = fe->inode;
        struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -684,7 +682,7 @@ restart_now:
                goto err_out;
 
        /* preload all compressed pages (maybe downgrade role if necessary) */
-       if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
+       if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
                cache_strategy = TRYALLOC;
        else
                cache_strategy = DONTALLOC;
@@ -786,7 +784,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
        /* Use workqueue and sync decompression for atomic contexts only */
        if (in_atomic() || irqs_disabled()) {
                queue_work(z_erofs_workqueue, &io->u.work);
-               sbi->ctx.readahead_sync_decompress = true;
+               sbi->opt.readahead_sync_decompress = true;
                return;
        }
        z_erofs_decompressqueue_work(&io->u.work);
@@ -826,7 +824,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
 
 static int z_erofs_decompress_pcluster(struct super_block *sb,
                                       struct z_erofs_pcluster *pcl,
-                                      struct list_head *pagepool)
+                                      struct page **pagepool)
 {
        struct erofs_sb_info *const sbi = EROFS_SB(sb);
        struct z_erofs_pagevec_ctor ctor;
@@ -1026,7 +1024,7 @@ out:
 }
 
 static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
-                                    struct list_head *pagepool)
+                                    struct page **pagepool)
 {
        z_erofs_next_pcluster_t owned = io->head;
 
@@ -1050,18 +1048,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
 {
        struct z_erofs_decompressqueue *bgq =
                container_of(work, struct z_erofs_decompressqueue, u.work);
-       LIST_HEAD(pagepool);
+       struct page *pagepool = NULL;
 
        DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
        z_erofs_decompress_queue(bgq, &pagepool);
 
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
        kvfree(bgq);
 }
 
 static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
                                               unsigned int nr,
-                                              struct list_head *pagepool,
+                                              struct page **pagepool,
                                               struct address_space *mc,
                                               gfp_t gfp)
 {
@@ -1154,7 +1152,7 @@ repeat:
 out_allocpage:
        page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
        if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
-               list_add(&page->lru, pagepool);
+               erofs_pagepool_add(pagepool, page);
                cond_resched();
                goto repeat;
        }
@@ -1238,7 +1236,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
 
 static void z_erofs_submit_queue(struct super_block *sb,
                                 struct z_erofs_decompress_frontend *f,
-                                struct list_head *pagepool,
+                                struct page **pagepool,
                                 struct z_erofs_decompressqueue *fgq,
                                 bool *force_fg)
 {
@@ -1247,8 +1245,9 @@ static void z_erofs_submit_queue(struct super_block *sb,
        struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
        void *bi_private;
        z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
-       /* since bio will be NULL, no need to initialize last_index */
+       /* bio is NULL initially, so no need to initialize last_{index,bdev} */
        pgoff_t last_index;
+       struct block_device *last_bdev;
        unsigned int nr_bios = 0;
        struct bio *bio = NULL;
 
@@ -1260,6 +1259,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
        q[JQ_SUBMIT]->head = owned_head;
 
        do {
+               struct erofs_map_dev mdev;
                struct z_erofs_pcluster *pcl;
                pgoff_t cur, end;
                unsigned int i = 0;
@@ -1271,7 +1271,13 @@ static void z_erofs_submit_queue(struct super_block *sb,
 
                pcl = container_of(owned_head, struct z_erofs_pcluster, next);
 
-               cur = pcl->obj.index;
+               /* no device id here, thus it will always succeed */
+               mdev = (struct erofs_map_dev) {
+                       .m_pa = blknr_to_addr(pcl->obj.index),
+               };
+               (void)erofs_map_dev(sb, &mdev);
+
+               cur = erofs_blknr(mdev.m_pa);
                end = cur + pcl->pclusterpages;
 
                /* close the main owned chain at first */
@@ -1287,7 +1293,8 @@ static void z_erofs_submit_queue(struct super_block *sb,
                        if (!page)
                                continue;
 
-                       if (bio && cur != last_index + 1) {
+                       if (bio && (cur != last_index + 1 ||
+                                   last_bdev != mdev.m_bdev)) {
 submit_bio_retry:
                                submit_bio(bio);
                                bio = NULL;
@@ -1295,9 +1302,10 @@ submit_bio_retry:
 
                        if (!bio) {
                                bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
-
                                bio->bi_end_io = z_erofs_decompressqueue_endio;
-                               bio_set_dev(bio, sb->s_bdev);
+
+                               bio_set_dev(bio, mdev.m_bdev);
+                               last_bdev = mdev.m_bdev;
                                bio->bi_iter.bi_sector = (sector_t)cur <<
                                        LOG_SECTORS_PER_BLOCK;
                                bio->bi_private = bi_private;
@@ -1336,7 +1344,7 @@ submit_bio_retry:
 
 static void z_erofs_runqueue(struct super_block *sb,
                             struct z_erofs_decompress_frontend *f,
-                            struct list_head *pagepool, bool force_fg)
+                            struct page **pagepool, bool force_fg)
 {
        struct z_erofs_decompressqueue io[NR_JOBQUEUES];
 
@@ -1358,18 +1366,87 @@ static void z_erofs_runqueue(struct super_block *sb,
        z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
 }
 
+/*
+ * Since partial uptodate is still unimplemented for now, we have to use
+ * approximate readmore strategies as a start.
+ */
+static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+                                     struct readahead_control *rac,
+                                     erofs_off_t end,
+                                     struct page **pagepool,
+                                     bool backmost)
+{
+       struct inode *inode = f->inode;
+       struct erofs_map_blocks *map = &f->map;
+       erofs_off_t cur;
+       int err;
+
+       if (backmost) {
+               map->m_la = end;
+               err = z_erofs_map_blocks_iter(inode, map,
+                                             EROFS_GET_BLOCKS_READMORE);
+               if (err)
+                       return;
+
+               /* expend ra for the trailing edge if readahead */
+               if (rac) {
+                       loff_t newstart = readahead_pos(rac);
+
+                       cur = round_up(map->m_la + map->m_llen, PAGE_SIZE);
+                       readahead_expand(rac, newstart, cur - newstart);
+                       return;
+               }
+               end = round_up(end, PAGE_SIZE);
+       } else {
+               end = round_up(map->m_la, PAGE_SIZE);
+
+               if (!map->m_llen)
+                       return;
+       }
+
+       cur = map->m_la + map->m_llen - 1;
+       while (cur >= end) {
+               pgoff_t index = cur >> PAGE_SHIFT;
+               struct page *page;
+
+               page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
+               if (!page)
+                       goto skip;
+
+               if (PageUptodate(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       goto skip;
+               }
+
+               err = z_erofs_do_read_page(f, page, pagepool);
+               if (err)
+                       erofs_err(inode->i_sb,
+                                 "readmore error at page %lu @ nid %llu",
+                                 index, EROFS_I(inode)->nid);
+               put_page(page);
+skip:
+               if (cur < PAGE_SIZE)
+                       break;
+               cur = (index << PAGE_SHIFT) - 1;
+       }
+}
+
 static int z_erofs_readpage(struct file *file, struct page *page)
 {
        struct inode *const inode = page->mapping->host;
        struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+       struct page *pagepool = NULL;
        int err;
-       LIST_HEAD(pagepool);
 
        trace_erofs_readpage(page, false);
-
        f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
+       z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1,
+                                 &pagepool, true);
        err = z_erofs_do_read_page(&f, page, &pagepool);
+       z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
+
        (void)z_erofs_collector_end(&f.clt);
 
        /* if some compressed cluster ready, need submit them anyway */
@@ -1381,8 +1458,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
        if (f.map.mpage)
                put_page(f.map.mpage);
 
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
        return err;
 }
 
@@ -1390,29 +1466,19 @@ static void z_erofs_readahead(struct readahead_control *rac)
 {
        struct inode *const inode = rac->mapping->host;
        struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-       unsigned int nr_pages = readahead_count(rac);
-       bool sync = (sbi->ctx.readahead_sync_decompress &&
-                       nr_pages <= sbi->ctx.max_sync_decompress_pages);
        struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-       struct page *page, *head = NULL;
-       LIST_HEAD(pagepool);
-
-       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+       struct page *pagepool = NULL, *head = NULL, *page;
+       unsigned int nr_pages;
 
        f.readahead = true;
        f.headoffset = readahead_pos(rac);
 
-       while ((page = readahead_page(rac))) {
-               prefetchw(&page->flags);
-
-               /*
-                * A pure asynchronous readahead is indicated if
-                * a PG_readahead marked page is hitted at first.
-                * Let's also do asynchronous decompression for this case.
-                */
-               sync &= !(PageReadahead(page) && !head);
+       z_erofs_pcluster_readmore(&f, rac, f.headoffset +
+                                 readahead_length(rac) - 1, &pagepool, true);
+       nr_pages = readahead_count(rac);
+       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
 
+       while ((page = readahead_page(rac))) {
                set_page_private(page, (unsigned long)head);
                head = page;
        }
@@ -1431,16 +1497,15 @@ static void z_erofs_readahead(struct readahead_control *rac)
                                  page->index, EROFS_I(inode)->nid);
                put_page(page);
        }
-
+       z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
        (void)z_erofs_collector_end(&f.clt);
 
-       z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
-
+       z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+                        sbi->opt.readahead_sync_decompress &&
+                        nr_pages <= sbi->opt.max_sync_decompress_pages);
        if (f.map.mpage)
                put_page(f.map.mpage);
-
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
+       erofs_release_pages(&pagepool);
 }
 
 const struct address_space_operations z_erofs_aops = {