mm/hmm: fault non-owner device private entries
[linux-2.6-microblaze.git] / mm / page_io.c
index 89fbf3c..6831813 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/uio.h>
 #include <linux/sched/task.h>
 #include <linux/delayacct.h>
+#include "swap.h"
 
 void end_swap_bio_write(struct bio *bio)
 {
@@ -234,55 +235,119 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 #define bio_associate_blkg_from_page(bio, page)                do { } while (0)
 #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
 
-int __swap_writepage(struct page *page, struct writeback_control *wbc,
-               bio_end_io_t end_write_func)
+struct swap_iocb {
+       struct kiocb            iocb;
+       struct bio_vec          bvec[SWAP_CLUSTER_MAX];
+       int                     pages;
+       int                     len;
+};
+static mempool_t *sio_pool;
+
+int sio_pool_init(void)
 {
-       struct bio *bio;
-       int ret;
-       struct swap_info_struct *sis = page_swap_info(page);
+       if (!sio_pool) {
+               mempool_t *pool = mempool_create_kmalloc_pool(
+                       SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
+               if (cmpxchg(&sio_pool, NULL, pool))
+                       mempool_destroy(pool);
+       }
+       if (!sio_pool)
+               return -ENOMEM;
+       return 0;
+}
 
-       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
-       if (data_race(sis->flags & SWP_FS_OPS)) {
-               struct kiocb kiocb;
-               struct file *swap_file = sis->swap_file;
-               struct address_space *mapping = swap_file->f_mapping;
-               struct bio_vec bv = {
-                       .bv_page = page,
-                       .bv_len  = PAGE_SIZE,
-                       .bv_offset = 0
-               };
-               struct iov_iter from;
-
-               iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
-               init_sync_kiocb(&kiocb, swap_file);
-               kiocb.ki_pos = page_file_offset(page);
+static void sio_write_complete(struct kiocb *iocb, long ret)
+{
+       struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
+       struct page *page = sio->bvec[0].bv_page;
+       int p;
 
-               set_page_writeback(page);
-               unlock_page(page);
-               ret = mapping->a_ops->direct_IO(&kiocb, &from);
-               if (ret == PAGE_SIZE) {
-                       count_vm_event(PSWPOUT);
-                       ret = 0;
-               } else {
-                       /*
-                        * In the case of swap-over-nfs, this can be a
-                        * temporary failure if the system has limited
-                        * memory for allocating transmit buffers.
-                        * Mark the page dirty and avoid
-                        * folio_rotate_reclaimable but rate-limit the
-                        * messages but do not flag PageError like
-                        * the normal direct-to-bio case as it could
-                        * be temporary.
-                        */
+       if (ret != sio->len) {
+               /*
+                * In the case of swap-over-nfs, this can be a
+                * temporary failure if the system has limited
+                * memory for allocating transmit buffers.
+                * Mark the page dirty and avoid
+                * folio_rotate_reclaimable but rate-limit the
+                * messages but do not flag PageError like
+                * the normal direct-to-bio case as it could
+                * be temporary.
+                */
+               pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
+                                  ret, page_file_offset(page));
+               for (p = 0; p < sio->pages; p++) {
+                       page = sio->bvec[p].bv_page;
                        set_page_dirty(page);
                        ClearPageReclaim(page);
-                       pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
-                                          page_file_offset(page));
                }
-               end_page_writeback(page);
-               return ret;
+       } else {
+               for (p = 0; p < sio->pages; p++)
+                       count_swpout_vm_event(sio->bvec[p].bv_page);
        }
 
+       for (p = 0; p < sio->pages; p++)
+               end_page_writeback(sio->bvec[p].bv_page);
+
+       mempool_free(sio, sio_pool);
+}
+
+static int swap_writepage_fs(struct page *page, struct writeback_control *wbc)
+{
+       struct swap_iocb *sio = NULL;
+       struct swap_info_struct *sis = page_swap_info(page);
+       struct file *swap_file = sis->swap_file;
+       loff_t pos = page_file_offset(page);
+
+       set_page_writeback(page);
+       unlock_page(page);
+       if (wbc->swap_plug)
+               sio = *wbc->swap_plug;
+       if (sio) {
+               if (sio->iocb.ki_filp != swap_file ||
+                   sio->iocb.ki_pos + sio->len != pos) {
+                       swap_write_unplug(sio);
+                       sio = NULL;
+               }
+       }
+       if (!sio) {
+               sio = mempool_alloc(sio_pool, GFP_NOIO);
+               init_sync_kiocb(&sio->iocb, swap_file);
+               sio->iocb.ki_complete = sio_write_complete;
+               sio->iocb.ki_pos = pos;
+               sio->pages = 0;
+               sio->len = 0;
+       }
+       sio->bvec[sio->pages].bv_page = page;
+       sio->bvec[sio->pages].bv_len = thp_size(page);
+       sio->bvec[sio->pages].bv_offset = 0;
+       sio->len += thp_size(page);
+       sio->pages += 1;
+       if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
+               swap_write_unplug(sio);
+               sio = NULL;
+       }
+       if (wbc->swap_plug)
+               *wbc->swap_plug = sio;
+
+       return 0;
+}
+
+int __swap_writepage(struct page *page, struct writeback_control *wbc,
+                    bio_end_io_t end_write_func)
+{
+       struct bio *bio;
+       int ret;
+       struct swap_info_struct *sis = page_swap_info(page);
+
+       VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+       /*
+        * ->flags can be updated non-atomicially (scan_swap_map_slots),
+        * but that will never affect SWP_FS_OPS, so the data_race
+        * is safe.
+        */
+       if (data_race(sis->flags & SWP_FS_OPS))
+               return swap_writepage_fs(page, wbc);
+
        ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
        if (!ret) {
                count_swpout_vm_event(page);
@@ -305,7 +370,83 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
        return 0;
 }
 
-int swap_readpage(struct page *page, bool synchronous)
+void swap_write_unplug(struct swap_iocb *sio)
+{
+       struct iov_iter from;
+       struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
+       int ret;
+
+       iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages, sio->len);
+       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+       if (ret != -EIOCBQUEUED)
+               sio_write_complete(&sio->iocb, ret);
+}
+
+static void sio_read_complete(struct kiocb *iocb, long ret)
+{
+       struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
+       int p;
+
+       if (ret == sio->len) {
+               for (p = 0; p < sio->pages; p++) {
+                       struct page *page = sio->bvec[p].bv_page;
+
+                       SetPageUptodate(page);
+                       unlock_page(page);
+               }
+               count_vm_events(PSWPIN, sio->pages);
+       } else {
+               for (p = 0; p < sio->pages; p++) {
+                       struct page *page = sio->bvec[p].bv_page;
+
+                       SetPageError(page);
+                       ClearPageUptodate(page);
+                       unlock_page(page);
+               }
+               pr_alert_ratelimited("Read-error on swap-device\n");
+       }
+       mempool_free(sio, sio_pool);
+}
+
+static void swap_readpage_fs(struct page *page,
+                            struct swap_iocb **plug)
+{
+       struct swap_info_struct *sis = page_swap_info(page);
+       struct swap_iocb *sio = NULL;
+       loff_t pos = page_file_offset(page);
+
+       if (plug)
+               sio = *plug;
+       if (sio) {
+               if (sio->iocb.ki_filp != sis->swap_file ||
+                   sio->iocb.ki_pos + sio->len != pos) {
+                       swap_read_unplug(sio);
+                       sio = NULL;
+               }
+       }
+       if (!sio) {
+               sio = mempool_alloc(sio_pool, GFP_KERNEL);
+               init_sync_kiocb(&sio->iocb, sis->swap_file);
+               sio->iocb.ki_pos = pos;
+               sio->iocb.ki_complete = sio_read_complete;
+               sio->pages = 0;
+               sio->len = 0;
+       }
+       sio->bvec[sio->pages].bv_page = page;
+       sio->bvec[sio->pages].bv_len = thp_size(page);
+       sio->bvec[sio->pages].bv_offset = 0;
+       sio->len += thp_size(page);
+       sio->pages += 1;
+       if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
+               swap_read_unplug(sio);
+               sio = NULL;
+       }
+       if (plug)
+               *plug = sio;
+}
+
+int swap_readpage(struct page *page, bool synchronous,
+                 struct swap_iocb **plug)
 {
        struct bio *bio;
        int ret = 0;
@@ -333,12 +474,7 @@ int swap_readpage(struct page *page, bool synchronous)
        }
 
        if (data_race(sis->flags & SWP_FS_OPS)) {
-               struct file *swap_file = sis->swap_file;
-               struct address_space *mapping = swap_file->f_mapping;
-
-               ret = mapping->a_ops->readpage(swap_file, page);
-               if (!ret)
-                       count_vm_event(PSWPIN);
+               swap_readpage_fs(page, plug);
                goto out;
        }
 
@@ -360,7 +496,6 @@ int swap_readpage(struct page *page, bool synchronous)
         * attempt to access it in the page fault retry time check.
         */
        if (synchronous) {
-               bio->bi_opf |= REQ_POLLED;
                get_task_struct(current);
                bio->bi_private = current;
        }
@@ -372,8 +507,7 @@ int swap_readpage(struct page *page, bool synchronous)
                if (!READ_ONCE(bio->bi_private))
                        break;
 
-               if (!bio_poll(bio, NULL, 0))
-                       blk_io_schedule();
+               blk_io_schedule();
        }
        __set_current_state(TASK_RUNNING);
        bio_put(bio);
@@ -385,19 +519,14 @@ out:
        return ret;
 }
 
-bool swap_dirty_folio(struct address_space *mapping, struct folio *folio)
+void __swap_read_unplug(struct swap_iocb *sio)
 {
-       struct swap_info_struct *sis = swp_swap_info(folio_swap_entry(folio));
-
-       if (data_race(sis->flags & SWP_FS_OPS)) {
-               const struct address_space_operations *aops;
-
-               mapping = sis->swap_file->f_mapping;
-               aops = mapping->a_ops;
+       struct iov_iter from;
+       struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
+       int ret;
 
-               VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
-               return aops->dirty_folio(mapping, folio);
-       } else {
-               return noop_dirty_folio(mapping, folio);
-       }
+       iov_iter_bvec(&from, READ, sio->bvec, sio->pages, sio->len);
+       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+       if (ret != -EIOCBQUEUED)
+               sio_read_complete(&sio->iocb, ret);
 }