#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/swapops.h>
-#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/frontswap.h>
#include <linux/blkdev.h>
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
#include <linux/delayacct.h>
+#include <linux/zswap.h>
#include "swap.h"
-void end_swap_bio_write(struct bio *bio)
+static void __end_swap_bio_write(struct bio *bio)
{
- struct page *page = bio_first_page_all(bio);
+ struct folio *folio = bio_first_folio_all(bio);
if (bio->bi_status) {
- SetPageError(page);
/*
* We failed to write the page out to swap-space.
* Re-dirty the page in order to avoid it being reclaimed.
*
* Also clear PG_reclaim to avoid folio_rotate_reclaimable()
*/
- set_page_dirty(page);
+ folio_mark_dirty(folio);
pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
- ClearPageReclaim(page);
+ folio_clear_reclaim(folio);
}
- end_page_writeback(page);
+ folio_end_writeback(folio);
+}
+
+static void end_swap_bio_write(struct bio *bio)
+{
+ __end_swap_bio_write(bio);
bio_put(bio);
}
-static void end_swap_bio_read(struct bio *bio)
+static void __end_swap_bio_read(struct bio *bio)
{
- struct page *page = bio_first_page_all(bio);
- struct task_struct *waiter = bio->bi_private;
+ struct folio *folio = bio_first_folio_all(bio);
if (bio->bi_status) {
- SetPageError(page);
- ClearPageUptodate(page);
pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
- goto out;
+ } else {
+ folio_mark_uptodate(folio);
}
+ folio_unlock(folio);
+}
- SetPageUptodate(page);
-out:
- unlock_page(page);
- WRITE_ONCE(bio->bi_private, NULL);
+static void end_swap_bio_read(struct bio *bio)
+{
+ __end_swap_bio_read(bio);
bio_put(bio);
- if (waiter) {
- blk_wake_io_task(waiter);
- put_task_struct(waiter);
- }
}
int generic_swapfile_activate(struct swap_info_struct *sis,
*/
int swap_writepage(struct page *page, struct writeback_control *wbc)
{
- int ret = 0;
+ struct folio *folio = page_folio(page);
+ int ret;
- if (try_to_free_swap(page)) {
- unlock_page(page);
- goto out;
+ if (folio_free_swap(folio)) {
+ folio_unlock(folio);
+ return 0;
}
/*
* Arch code may have to preserve more data than just the page
* contents, e.g. memory tags.
*/
- ret = arch_prepare_to_swap(page);
+ ret = arch_prepare_to_swap(folio);
if (ret) {
- set_page_dirty(page);
- unlock_page(page);
- goto out;
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ return ret;
}
- if (frontswap_store(page) == 0) {
- set_page_writeback(page);
- unlock_page(page);
- end_page_writeback(page);
- goto out;
+ if (zswap_store(folio)) {
+ folio_unlock(folio);
+ return 0;
}
- ret = __swap_writepage(page, wbc, end_swap_bio_write);
-out:
- return ret;
+ if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio))) {
+ folio_mark_dirty(folio);
+ return AOP_WRITEPAGE_ACTIVATE;
+ }
+
+ __swap_writepage(folio, wbc);
+ return 0;
}
-static inline void count_swpout_vm_event(struct page *page)
+static inline void count_swpout_vm_event(struct folio *folio)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (unlikely(PageTransHuge(page)))
+ if (unlikely(folio_test_pmd_mappable(folio))) {
+ count_memcg_folio_events(folio, THP_SWPOUT, 1);
count_vm_event(THP_SWPOUT);
+ }
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT);
#endif
- count_vm_events(PSWPOUT, thp_nr_pages(page));
+ count_vm_events(PSWPOUT, folio_nr_pages(folio));
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
{
struct cgroup_subsys_state *css;
struct mem_cgroup *memcg;
- memcg = page_memcg(page);
+ memcg = folio_memcg(folio);
if (!memcg)
return;
rcu_read_unlock();
}
#else
-#define bio_associate_blkg_from_page(bio, page) do { } while (0)
+#define bio_associate_blkg_from_page(bio, folio) do { } while (0)
#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
struct swap_iocb {
* be temporary.
*/
pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
- ret, page_file_offset(page));
+ ret, swap_dev_pos(page_swap_entry(page)));
for (p = 0; p < sio->pages; p++) {
page = sio->bvec[p].bv_page;
set_page_dirty(page);
ClearPageReclaim(page);
}
- } else {
- for (p = 0; p < sio->pages; p++)
- count_swpout_vm_event(sio->bvec[p].bv_page);
}
for (p = 0; p < sio->pages; p++)
mempool_free(sio, sio_pool);
}
-static int swap_writepage_fs(struct page *page, struct writeback_control *wbc)
+static void swap_writepage_fs(struct folio *folio, struct writeback_control *wbc)
{
struct swap_iocb *sio = NULL;
- struct swap_info_struct *sis = page_swap_info(page);
+ struct swap_info_struct *sis = swp_swap_info(folio->swap);
struct file *swap_file = sis->swap_file;
- loff_t pos = page_file_offset(page);
+ loff_t pos = swap_dev_pos(folio->swap);
- set_page_writeback(page);
- unlock_page(page);
+ count_swpout_vm_event(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
if (wbc->swap_plug)
sio = *wbc->swap_plug;
if (sio) {
sio->pages = 0;
sio->len = 0;
}
- sio->bvec[sio->pages].bv_page = page;
- sio->bvec[sio->pages].bv_len = thp_size(page);
- sio->bvec[sio->pages].bv_offset = 0;
- sio->len += thp_size(page);
+ bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0);
+ sio->len += folio_size(folio);
sio->pages += 1;
if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
swap_write_unplug(sio);
}
if (wbc->swap_plug)
*wbc->swap_plug = sio;
+}
- return 0;
+static void swap_writepage_bdev_sync(struct folio *folio,
+ struct writeback_control *wbc, struct swap_info_struct *sis)
+{
+ struct bio_vec bv;
+ struct bio bio;
+
+ bio_init(&bio, sis->bdev, &bv, 1,
+ REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
+ bio.bi_iter.bi_sector = swap_folio_sector(folio);
+ bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
+
+ bio_associate_blkg_from_page(&bio, folio);
+ count_swpout_vm_event(folio);
+
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+
+ submit_bio_wait(&bio);
+ __end_swap_bio_write(&bio);
}
-int __swap_writepage(struct page *page, struct writeback_control *wbc,
- bio_end_io_t end_write_func)
+static void swap_writepage_bdev_async(struct folio *folio,
+ struct writeback_control *wbc, struct swap_info_struct *sis)
{
struct bio *bio;
- int ret;
- struct swap_info_struct *sis = page_swap_info(page);
- VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+ bio = bio_alloc(sis->bdev, 1,
+ REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
+ GFP_NOIO);
+ bio->bi_iter.bi_sector = swap_folio_sector(folio);
+ bio->bi_end_io = end_swap_bio_write;
+ bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
+
+ bio_associate_blkg_from_page(bio, folio);
+ count_swpout_vm_event(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+ submit_bio(bio);
+}
+
+void __swap_writepage(struct folio *folio, struct writeback_control *wbc)
+{
+ struct swap_info_struct *sis = swp_swap_info(folio->swap);
+
+ VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
/*
* ->flags can be updated non-atomicially (scan_swap_map_slots),
* but that will never affect SWP_FS_OPS, so the data_race
* is safe.
*/
if (data_race(sis->flags & SWP_FS_OPS))
- return swap_writepage_fs(page, wbc);
-
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
- if (!ret) {
- count_swpout_vm_event(page);
- return 0;
- }
-
- bio = bio_alloc(sis->bdev, 1,
- REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
- GFP_NOIO);
- bio->bi_iter.bi_sector = swap_page_sector(page);
- bio->bi_end_io = end_write_func;
- bio_add_page(bio, page, thp_size(page), 0);
-
- bio_associate_blkg_from_page(bio, page);
- count_swpout_vm_event(page);
- set_page_writeback(page);
- unlock_page(page);
- submit_bio(bio);
-
- return 0;
+ swap_writepage_fs(folio, wbc);
+ /*
+ * ->flags can be updated non-atomicially (scan_swap_map_slots),
+ * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
+ * is safe.
+ */
+ else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
+ swap_writepage_bdev_sync(folio, wbc, sis);
+ else
+ swap_writepage_bdev_async(folio, wbc, sis);
}
void swap_write_unplug(struct swap_iocb *sio)
struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
int ret;
- iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages, sio->len);
+ iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len);
ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
if (ret != -EIOCBQUEUED)
sio_write_complete(&sio->iocb, ret);
if (ret == sio->len) {
for (p = 0; p < sio->pages; p++) {
- struct page *page = sio->bvec[p].bv_page;
+ struct folio *folio = page_folio(sio->bvec[p].bv_page);
- SetPageUptodate(page);
- unlock_page(page);
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
}
count_vm_events(PSWPIN, sio->pages);
} else {
for (p = 0; p < sio->pages; p++) {
- struct page *page = sio->bvec[p].bv_page;
+ struct folio *folio = page_folio(sio->bvec[p].bv_page);
- SetPageError(page);
- ClearPageUptodate(page);
- unlock_page(page);
+ folio_unlock(folio);
}
pr_alert_ratelimited("Read-error on swap-device\n");
}
mempool_free(sio, sio_pool);
}
-static void swap_readpage_fs(struct page *page,
- struct swap_iocb **plug)
+static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
{
- struct swap_info_struct *sis = page_swap_info(page);
+ struct swap_info_struct *sis = swp_swap_info(folio->swap);
struct swap_iocb *sio = NULL;
- loff_t pos = page_file_offset(page);
+ loff_t pos = swap_dev_pos(folio->swap);
if (plug)
sio = *plug;
sio->pages = 0;
sio->len = 0;
}
- sio->bvec[sio->pages].bv_page = page;
- sio->bvec[sio->pages].bv_len = thp_size(page);
- sio->bvec[sio->pages].bv_offset = 0;
- sio->len += thp_size(page);
+ bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0);
+ sio->len += folio_size(folio);
sio->pages += 1;
if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
swap_read_unplug(sio);
*plug = sio;
}
-int swap_readpage(struct page *page, bool synchronous,
- struct swap_iocb **plug)
+static void swap_read_folio_bdev_sync(struct folio *folio,
+ struct swap_info_struct *sis)
{
- struct bio *bio;
- int ret = 0;
- struct swap_info_struct *sis = page_swap_info(page);
- bool workingset = PageWorkingset(page);
- unsigned long pflags;
-
- VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageUptodate(page), page);
+ struct bio_vec bv;
+ struct bio bio;
+ bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
+ bio.bi_iter.bi_sector = swap_folio_sector(folio);
+ bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
/*
- * Count submission time as memory stall. When the device is congested,
- * or the submitting cgroup IO-throttled, submission can be a
- * significant part of overall IO time.
+ * Keep this task valid during swap readpage because the oom killer may
+ * attempt to access it in the page fault retry time check.
*/
- if (workingset)
- psi_memstall_enter(&pflags);
- delayacct_swapin_start();
-
- if (frontswap_load(page) == 0) {
- SetPageUptodate(page);
- unlock_page(page);
- goto out;
- }
-
- if (data_race(sis->flags & SWP_FS_OPS)) {
- swap_readpage_fs(page, plug);
- goto out;
- }
+ get_task_struct(current);
+ count_vm_event(PSWPIN);
+ submit_bio_wait(&bio);
+ __end_swap_bio_read(&bio);
+ put_task_struct(current);
+}
- if (sis->flags & SWP_SYNCHRONOUS_IO) {
- ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
- if (!ret) {
- count_vm_event(PSWPIN);
- goto out;
- }
- }
+static void swap_read_folio_bdev_async(struct folio *folio,
+ struct swap_info_struct *sis)
+{
+ struct bio *bio;
- ret = 0;
bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
- bio->bi_iter.bi_sector = swap_page_sector(page);
+ bio->bi_iter.bi_sector = swap_folio_sector(folio);
bio->bi_end_io = end_swap_bio_read;
- bio_add_page(bio, page, thp_size(page), 0);
+ bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
+ count_vm_event(PSWPIN);
+ submit_bio(bio);
+}
+
+void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
+{
+ struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ bool synchronous = sis->flags & SWP_SYNCHRONOUS_IO;
+ bool workingset = folio_test_workingset(folio);
+ unsigned long pflags;
+ bool in_thrashing;
+
+ VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
+
/*
- * Keep this task valid during swap readpage because the oom killer may
- * attempt to access it in the page fault retry time check.
+ * Count submission time as memory stall and delay. When the device
+ * is congested, or the submitting cgroup IO-throttled, submission
+ * can be a significant part of overall IO time.
*/
- if (synchronous) {
- get_task_struct(current);
- bio->bi_private = current;
+ if (workingset) {
+ delayacct_thrashing_start(&in_thrashing);
+ psi_memstall_enter(&pflags);
}
- count_vm_event(PSWPIN);
- bio_get(bio);
- submit_bio(bio);
- while (synchronous) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio->bi_private))
- break;
+ delayacct_swapin_start();
- blk_io_schedule();
+ if (zswap_load(folio)) {
+ folio_unlock(folio);
+ } else if (data_race(sis->flags & SWP_FS_OPS)) {
+ swap_read_folio_fs(folio, plug);
+ } else if (synchronous) {
+ swap_read_folio_bdev_sync(folio, sis);
+ } else {
+ swap_read_folio_bdev_async(folio, sis);
}
- __set_current_state(TASK_RUNNING);
- bio_put(bio);
-out:
- if (workingset)
+ if (workingset) {
+ delayacct_thrashing_end(&in_thrashing);
psi_memstall_leave(&pflags);
+ }
delayacct_swapin_end();
- return ret;
}
void __swap_read_unplug(struct swap_iocb *sio)
struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
int ret;
- iov_iter_bvec(&from, READ, sio->bvec, sio->pages, sio->len);
+ iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len);
ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
if (ret != -EIOCBQUEUED)
sio_read_complete(&sio->iocb, ret);