page_io: zswap: do not crash the kernel on decompression failure

author Nhat Pham <nphamcs@gmail.com>

Thu, 6 Mar 2025 20:50:10 +0000 (12:50 -0800)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 18 Mar 2025 05:06:50 +0000 (22:06 -0700)
author Nhat Pham <nphamcs@gmail.com>
Thu, 6 Mar 2025 20:50:10 +0000 (12:50 -0800)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 18 Mar 2025 05:06:50 +0000 (22:06 -0700)
diff --git a/include/linux/zswap.h b/include/linux/zswap.h

index d961ead..30c193a 100644 (file)
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -26,7 +26,7 @@ struct zswap_lruvec_state {
  
  unsigned long zswap_total_pages(void);
  bool zswap_store(struct folio *folio);
-bool zswap_load(struct folio *folio);
+int zswap_load(struct folio *folio);
  void zswap_invalidate(swp_entry_t swp);
  int zswap_swapon(int type, unsigned long nr_pages);
  void zswap_swapoff(int type);
@@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio)
         return false;
  }
  
-static inline bool zswap_load(struct folio *folio)
+static inline int zswap_load(struct folio *folio)
  {
-       return false;
+       return -ENOENT;
  }
  
  static inline void zswap_invalidate(swp_entry_t swp) {}
diff --git a/mm/page_io.c b/mm/page_io.c

index 9b983de..4bce19d 100644 (file)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -638,11 +638,11 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
         if (swap_read_folio_zeromap(folio)) {
                 folio_unlock(folio);
                 goto finish;
-       } else if (zswap_load(folio)) {
-               folio_unlock(folio);
-               goto finish;
         }
  
+       if (zswap_load(folio) != -ENOENT)
+               goto finish;
+
         /* We have to read from slower devices. Increase zswap protection. */
         zswap_folio_swapin(folio);
  
diff --git a/mm/zswap.c b/mm/zswap.c

index 5f0e622..0dcc54e 100644 (file)
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -62,6 +62,8 @@ static u64 zswap_reject_reclaim_fail;
  static u64 zswap_reject_compress_fail;
  /* Compressed page was too big for the allocator to (optimally) store */
  static u64 zswap_reject_compress_poor;
+/* Load or writeback failed due to decompression failure */
+static u64 zswap_decompress_fail;
  /* Store failed because underlying allocator could not get memory */
  static u64 zswap_reject_alloc_fail;
  /* Store failed because the entry metadata could not be allocated (rare) */
@@ -985,11 +987,12 @@ unlock:
         return comp_ret == 0 && alloc_ret == 0;
  }
  
-static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
  {
         struct zpool *zpool = entry->pool->zpool;
         struct scatterlist input, output;
         struct crypto_acomp_ctx *acomp_ctx;
+       int decomp_ret, dlen;
         u8 *src, *obj;
  
         acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
@@ -1012,11 +1015,21 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
         sg_init_table(&output, 1);
         sg_set_folio(&output, folio, PAGE_SIZE, 0);
         acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-       BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
-       BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+       decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
+       dlen = acomp_ctx->req->dlen;
  
         zpool_obj_read_end(zpool, entry->handle, obj);
         acomp_ctx_put_unlock(acomp_ctx);
+
+       if (!decomp_ret && dlen == PAGE_SIZE)
+               return true;
+
+       zswap_decompress_fail++;
+       pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n",
+                                               swp_type(entry->swpentry),
+                                               swp_offset(entry->swpentry),
+                                               entry->pool->tfm_name, entry->length, dlen);
+       return false;
  }
  
  /*********************************
@@ -1046,6 +1059,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
         struct writeback_control wbc = {
                 .sync_mode = WB_SYNC_NONE,
         };
+       int ret = 0;
  
         /* try to allocate swap cache folio */
         si = get_swap_device(swpentry);
@@ -1067,8 +1081,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
          * and freed when invalidated by the concurrent shrinker anyway.
          */
         if (!folio_was_allocated) {
-               folio_put(folio);
-               return -EEXIST;
+               ret = -EEXIST;
+               goto out;
         }
  
         /*
@@ -1081,14 +1095,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
          * be dereferenced.
          */
         tree = swap_zswap_tree(swpentry);
-       if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) {
-               delete_from_swap_cache(folio);
-               folio_unlock(folio);
-               folio_put(folio);
-               return -ENOMEM;
+       if (entry != xa_load(tree, offset)) {
+               ret = -ENOMEM;
+               goto out;
         }
  
-       zswap_decompress(entry, folio);
+       if (!zswap_decompress(entry, folio)) {
+               ret = -EIO;
+               goto out;
+       }
+
+       xa_erase(tree, offset);
  
         count_vm_event(ZSWPWB);
         if (entry->objcg)
@@ -1104,9 +1121,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
  
         /* start writeback */
         __swap_writepage(folio, &wbc);
-       folio_put(folio);
  
-       return 0;
+out:
+       if (ret && ret != -EEXIST) {
+               delete_from_swap_cache(folio);
+               folio_unlock(folio);
+       }
+       folio_put(folio);
+       return ret;
  }
  
  /*********************************
@@ -1606,7 +1628,27 @@ check_old:
         return ret;
  }
  
-bool zswap_load(struct folio *folio)
+/**
+ * zswap_load() - load a folio from zswap
+ * @folio: folio to load
+ *
+ * Return: 0 on success, with the folio unlocked and marked up-to-date, or one
+ * of the following error codes:
+ *
+ *  -EIO: if the swapped out content was in zswap, but could not be loaded
+ *  into the page due to a decompression failure. The folio is unlocked, but
+ *  NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
+ *  will SIGBUS).
+ *
+ *  -EINVAL: if the swapped out content was in zswap, but the page belongs
+ *  to a large folio, which is not supported by zswap. The folio is unlocked,
+ *  but NOT marked up-to-date, so that an IO error is emitted (e.g.
+ *  do_swap_page() will SIGBUS).
+ *
+ *  -ENOENT: if the swapped out content was not in zswap. The folio remains
+ *  locked on return.
+ */
+int zswap_load(struct folio *folio)
  {
         swp_entry_t swp = folio->swap;
         pgoff_t offset = swp_offset(swp);
@@ -1617,18 +1659,32 @@ bool zswap_load(struct folio *folio)
         VM_WARN_ON_ONCE(!folio_test_locked(folio));
  
         if (zswap_never_enabled())
-               return false;
+               return -ENOENT;
  
         /*
          * Large folios should not be swapped in while zswap is being used, as
          * they are not properly handled. Zswap does not properly load large
          * folios, and a large folio may only be partially in zswap.
-        *
-        * Return true without marking the folio uptodate so that an IO error is
-        * emitted (e.g. do_swap_page() will sigbus).
          */
-       if (WARN_ON_ONCE(folio_test_large(folio)))
-               return true;
+       if (WARN_ON_ONCE(folio_test_large(folio))) {
+               folio_unlock(folio);
+               return -EINVAL;
+       }
+
+       entry = xa_load(tree, offset);
+       if (!entry)
+               return -ENOENT;
+
+       if (!zswap_decompress(entry, folio)) {
+               folio_unlock(folio);
+               return -EIO;
+       }
+
+       folio_mark_uptodate(folio);
+
+       count_vm_event(ZSWPIN);
+       if (entry->objcg)
+               count_objcg_events(entry->objcg, ZSWPIN, 1);
  
         /*
          * When reading into the swapcache, invalidate our entry. The
@@ -1642,27 +1698,14 @@ bool zswap_load(struct folio *folio)
          * files, which reads into a private page and may free it if
          * the fault fails. We remain the primary owner of the entry.)
          */
-       if (swapcache)
-               entry = xa_erase(tree, offset);
-       else
-               entry = xa_load(tree, offset);
-
-       if (!entry)
-               return false;
-
-       zswap_decompress(entry, folio);
-
-       count_vm_event(ZSWPIN);
-       if (entry->objcg)
-               count_objcg_events(entry->objcg, ZSWPIN, 1);
-
         if (swapcache) {
-               zswap_entry_free(entry);
                 folio_mark_dirty(folio);
+               xa_erase(tree, offset);
+               zswap_entry_free(entry);
         }
  
-       folio_mark_uptodate(folio);
-       return true;
+       folio_unlock(folio);
+       return 0;
  }
  
  void zswap_invalidate(swp_entry_t swp)
@@ -1757,6 +1800,8 @@ static int zswap_debugfs_init(void)
                            zswap_debugfs_root, &zswap_reject_compress_fail);
         debugfs_create_u64("reject_compress_poor", 0444,
                            zswap_debugfs_root, &zswap_reject_compress_poor);
+       debugfs_create_u64("decompress_fail", 0444,
+                          zswap_debugfs_root, &zswap_decompress_fail);
         debugfs_create_u64("written_back_pages", 0444,
                            zswap_debugfs_root, &zswap_written_back_pages);
         debugfs_create_file("pool_total_size", 0444,
author	Nhat Pham <nphamcs@gmail.com>
	Thu, 6 Mar 2025 20:50:10 +0000 (12:50 -0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 18 Mar 2025 05:06:50 +0000 (22:06 -0700)
include/linux/zswap.h		patch \| blob \| history
mm/page_io.c		patch \| blob \| history
mm/zswap.c		patch \| blob \| history