erofs: introduce physical cluster slab pools

[linux-2.6-microblaze.git] / fs / erofs / zdata.c
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 3851e1a..db296d3 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -10,6 +10,93 @@
  
  #include <trace/events/erofs.h>
  
+/*
+ * since pclustersize is variable for big pcluster feature, introduce slab
+ * pools implementation for different pcluster sizes.
+ */
+struct z_erofs_pcluster_slab {
+       struct kmem_cache *slab;
+       unsigned int maxpages;
+       char name[48];
+};
+
+#define _PCLP(n) { .maxpages = n }
+
+static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
+       _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
+       _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
+};
+
+static void z_erofs_destroy_pcluster_pool(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
+               if (!pcluster_pool[i].slab)
+                       continue;
+               kmem_cache_destroy(pcluster_pool[i].slab);
+               pcluster_pool[i].slab = NULL;
+       }
+}
+
+static int z_erofs_create_pcluster_pool(void)
+{
+       struct z_erofs_pcluster_slab *pcs;
+       struct z_erofs_pcluster *a;
+       unsigned int size;
+
+       for (pcs = pcluster_pool;
+            pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
+               size = struct_size(a, compressed_pages, pcs->maxpages);
+
+               sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
+               pcs->slab = kmem_cache_create(pcs->name, size, 0,
+                                             SLAB_RECLAIM_ACCOUNT, NULL);
+               if (pcs->slab)
+                       continue;
+
+               z_erofs_destroy_pcluster_pool();
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
+               struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
+               struct z_erofs_pcluster *pcl;
+
+               if (nrpages > pcs->maxpages)
+                       continue;
+
+               pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
+               if (!pcl)
+                       return ERR_PTR(-ENOMEM);
+               pcl->pclusterpages = nrpages;
+               return pcl;
+       }
+       return ERR_PTR(-EINVAL);
+}
+
+static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
+               struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
+
+               if (pcl->pclusterpages > pcs->maxpages)
+                       continue;
+
+               kmem_cache_free(pcs->slab, pcl);
+               return;
+       }
+       DBG_BUGON(1);
+}
+
  /*
   * a compressed_pages[] placeholder in order to avoid
   * being filled with file pages for in-place decompression.
@@ -37,12 +124,11 @@ typedef tagptr1_t compressed_page_t;
         tagptr_fold(compressed_page_t, page, 1)
  
  static struct workqueue_struct *z_erofs_workqueue __read_mostly;
-static struct kmem_cache *pcluster_cachep __read_mostly;
  
  void z_erofs_exit_zip_subsystem(void)
  {
         destroy_workqueue(z_erofs_workqueue);
-       kmem_cache_destroy(pcluster_cachep);
+       z_erofs_destroy_pcluster_pool();
  }
  
  static inline int z_erofs_init_workqueue(void)
@@ -59,32 +145,16 @@ static inline int z_erofs_init_workqueue(void)
         return z_erofs_workqueue ? 0 : -ENOMEM;
  }
  
-static void z_erofs_pcluster_init_once(void *ptr)
-{
-       struct z_erofs_pcluster *pcl = ptr;
-       struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-       unsigned int i;
-
-       mutex_init(&cl->lock);
-       cl->nr_pages = 0;
-       cl->vcnt = 0;
-       for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
-               pcl->compressed_pages[i] = NULL;
-}
-
  int __init z_erofs_init_zip_subsystem(void)
  {
-       pcluster_cachep = kmem_cache_create("erofs_compress",
-                                           Z_EROFS_WORKGROUP_SIZE, 0,
-                                           SLAB_RECLAIM_ACCOUNT,
-                                           z_erofs_pcluster_init_once);
-       if (pcluster_cachep) {
-               if (!z_erofs_init_workqueue())
-                       return 0;
-
-               kmem_cache_destroy(pcluster_cachep);
-       }
-       return -ENOMEM;
+       int err = z_erofs_create_pcluster_pool();
+
+       if (err)
+               return err;
+       err = z_erofs_init_workqueue();
+       if (err)
+               z_erofs_destroy_pcluster_pool();
+       return err;
  }
  
  enum z_erofs_collectmode {
@@ -104,6 +174,12 @@ enum z_erofs_collectmode {
          * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
          */
         COLLECT_PRIMARY_HOOKED,
+       /*
+        * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it
+        * could be dispatched into bypass queue later due to uptodated managed
+        * pages. All related online pages cannot be reused for inplace I/O (or
+        * pagevec) since it can be directly decoded without I/O submission.
+        */
         COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
         /*
          * The current collection has been linked with the owned chain, and
@@ -163,7 +239,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
                                      struct list_head *pagepool)
  {
         const struct z_erofs_pcluster *pcl = clt->pcl;
-       const unsigned int clusterpages = BIT(pcl->clusterbits);
         struct page **pages = clt->compressedpages;
         pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
         bool standalone = true;
@@ -173,7 +248,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
         if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
                 return;
  
-       for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
+       for (; pages < pcl->compressed_pages + pcl->pclusterpages; ++pages) {
                 struct page *page;
                 compressed_page_t t;
                 struct page *newpage = NULL;
@@ -186,21 +261,25 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
  
                 if (page) {
                         t = tag_compressed_page_justfound(page);
-               } else if (type == DELAYEDALLOC) {
-                       t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
-               } else if (type == TRYALLOC) {
-                       newpage = erofs_allocpage(pagepool, gfp);
-                       if (!newpage)
-                               goto dontalloc;
-
-                       set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
-                       t = tag_compressed_page_justfound(newpage);
-               } else {        /* DONTALLOC */
-dontalloc:
-                       if (standalone)
-                               clt->compressedpages = pages;
+               } else {
+                       /* I/O is needed, no possible to decompress directly */
                         standalone = false;
-                       continue;
+                       switch (type) {
+                       case DELAYEDALLOC:
+                               t = tagptr_init(compressed_page_t,
+                                               PAGE_UNALLOCATED);
+                               break;
+                       case TRYALLOC:
+                               newpage = erofs_allocpage(pagepool, gfp);
+                               if (!newpage)
+                                       continue;
+                               set_page_private(newpage,
+                                                Z_EROFS_PREALLOCATED_PAGE);
+                               t = tag_compressed_page_justfound(newpage);
+                               break;
+                       default:        /* DONTALLOC */
+                               continue;
+                       }
                 }
  
                 if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
@@ -214,7 +293,11 @@ dontalloc:
                 }
         }
  
-       if (standalone)         /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
+       /*
+        * don't do inplace I/O if all compressed pages are available in
+        * managed cache since it can be moved to the bypass queue instead.
+        */
+       if (standalone)
                 clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
  }
  
@@ -225,14 +308,13 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
         struct z_erofs_pcluster *const pcl =
                 container_of(grp, struct z_erofs_pcluster, obj);
         struct address_space *const mapping = MNGD_MAPPING(sbi);
-       const unsigned int clusterpages = BIT(pcl->clusterbits);
         int i;
  
         /*
          * refcount of workgroup is now freezed as 1,
          * therefore no need to worry about available decompression users.
          */
-       for (i = 0; i < clusterpages; ++i) {
+       for (i = 0; i < pcl->pclusterpages; ++i) {
                 struct page *page = pcl->compressed_pages[i];
  
                 if (!page)
@@ -257,13 +339,12 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
                                   struct page *page)
  {
         struct z_erofs_pcluster *const pcl = (void *)page_private(page);
-       const unsigned int clusterpages = BIT(pcl->clusterbits);
         int ret = 0;    /* 0 - busy */
  
         if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
                 unsigned int i;
  
-               for (i = 0; i < clusterpages; ++i) {
+               for (i = 0; i < pcl->pclusterpages; ++i) {
                         if (pcl->compressed_pages[i] == page) {
                                 WRITE_ONCE(pcl->compressed_pages[i], NULL);
                                 ret = 1;
@@ -283,9 +364,9 @@ static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
                                           struct page *page)
  {
         struct z_erofs_pcluster *const pcl = clt->pcl;
-       const unsigned int clusterpages = BIT(pcl->clusterbits);
  
-       while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
+       while (clt->compressedpages <
+              pcl->compressed_pages + pcl->pclusterpages) {
                 if (!cmpxchg(clt->compressedpages++, NULL, page))
                         return true;
         }
@@ -399,10 +480,10 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         struct erofs_workgroup *grp;
         int err;
  
-       /* no available workgroup, let's allocate one */
-       pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
-       if (!pcl)
-               return -ENOMEM;
+       /* no available pcluster, let's allocate one */
+       pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
+       if (IS_ERR(pcl))
+               return PTR_ERR(pcl);
  
         atomic_set(&pcl->obj.refcount, 1);
         pcl->obj.index = map->m_pa >> PAGE_SHIFT;
@@ -416,25 +497,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         else
                 pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
  
-       pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0];
-       pcl->clusterbits -= PAGE_SHIFT;
-
         /* new pclusters should be claimed as type 1, primary and followed */
         pcl->next = clt->owned_head;
         clt->mode = COLLECT_PRIMARY_FOLLOWED;
  
         cl = z_erofs_primarycollection(pcl);
-
-       /* must be cleaned before freeing to slab */
-       DBG_BUGON(cl->nr_pages);
-       DBG_BUGON(cl->vcnt);
-
         cl->pageofs = map->m_la & ~PAGE_MASK;
  
         /*
          * lock all primary followed works before visible to others
          * and mutex_trylock *never* fails for a new pcluster.
          */
+       mutex_init(&cl->lock);
         DBG_BUGON(!mutex_trylock(&cl->lock));
  
         grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
@@ -458,7 +532,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
  
  err_out:
         mutex_unlock(&cl->lock);
-       kmem_cache_free(pcluster_cachep, pcl);
+       z_erofs_free_pcluster(pcl);
         return err;
  }
  
@@ -504,7 +578,7 @@ out:
  
         clt->compressedpages = clt->pcl->compressed_pages;
         if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
-               clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
+               clt->compressedpages += clt->pcl->pclusterpages;
         return 0;
  }
  
@@ -517,9 +591,8 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
         struct z_erofs_collection *const cl =
                 container_of(head, struct z_erofs_collection, rcu);
  
-       kmem_cache_free(pcluster_cachep,
-                       container_of(cl, struct z_erofs_pcluster,
-                                    primary_collection));
+       z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster,
+                                          primary_collection));
  }
  
  void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
@@ -706,9 +779,12 @@ err_out:
         goto out;
  }
  
+static void z_erofs_decompressqueue_work(struct work_struct *work);
  static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
                                        bool sync, int bios)
  {
+       struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
         /* wake up the caller thread for sync decompression */
         if (sync) {
                 unsigned long flags;
@@ -720,8 +796,15 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
                 return;
         }
  
-       if (!atomic_add_return(bios, &io->pending_bios))
+       if (atomic_add_return(bios, &io->pending_bios))
+               return;
+       /* Use workqueue and sync decompression for atomic contexts only */
+       if (in_atomic() || irqs_disabled()) {
                 queue_work(z_erofs_workqueue, &io->u.work);
+               sbi->ctx.readahead_sync_decompress = true;
+               return;
+       }
+       z_erofs_decompressqueue_work(&io->u.work);
  }
  
  static bool z_erofs_page_is_invalidated(struct page *page)
@@ -761,9 +844,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
                                        struct list_head *pagepool)
  {
         struct erofs_sb_info *const sbi = EROFS_SB(sb);
-       const unsigned int clusterpages = BIT(pcl->clusterbits);
         struct z_erofs_pagevec_ctor ctor;
-       unsigned int i, outputsize, llen, nr_pages;
+       unsigned int i, inputsize, outputsize, llen, nr_pages;
         struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
         struct page **pages, **compressed_pages, *page;
  
@@ -843,7 +925,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
         overlapped = false;
         compressed_pages = pcl->compressed_pages;
  
-       for (i = 0; i < clusterpages; ++i) {
+       for (i = 0; i < pcl->pclusterpages; ++i) {
                 unsigned int pagenr;
  
                 page = compressed_pages[i];
@@ -896,12 +978,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
                 partial = true;
         }
  
+       inputsize = pcl->pclusterpages * PAGE_SIZE;
         err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
                                         .sb = sb,
                                         .in = compressed_pages,
                                         .out = pages,
                                         .pageofs_out = cl->pageofs,
-                                       .inputsize = PAGE_SIZE,
+                                       .inputsize = inputsize,
                                         .outputsize = outputsize,
                                         .alg = pcl->algorithmformat,
                                         .inplace_io = overlapped,
@@ -909,8 +992,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
                                  }, pagepool);
  
  out:
-       /* must handle all compressed pages before endding pages */
-       for (i = 0; i < clusterpages; ++i) {
+       /* must handle all compressed pages before ending pages */
+       for (i = 0; i < pcl->pclusterpages; ++i) {
                 page = compressed_pages[i];
  
                 if (erofs_page_is_managed(sbi, page))
@@ -1213,7 +1296,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
                 pcl = container_of(owned_head, struct z_erofs_pcluster, next);
  
                 cur = pcl->obj.index;
-               end = cur + BIT(pcl->clusterbits);
+               end = cur + pcl->pclusterpages;
  
                 /* close the main owned chain at first */
                 owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
@@ -1333,7 +1416,8 @@ static void z_erofs_readahead(struct readahead_control *rac)
         struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
  
         unsigned int nr_pages = readahead_count(rac);
-       bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+       bool sync = (sbi->ctx.readahead_sync_decompress &&
+                       nr_pages <= sbi->ctx.max_sync_decompress_pages);
         struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
         struct page *page, *head = NULL;
         LIST_HEAD(pagepool);