powerpc/pseries/cmm: Switch to balloon_page_alloc()
[linux-2.6-microblaze.git] / arch / powerpc / platforms / pseries / cmm.c
index 33d31e4..86eb845 100644 (file)
 #include <linux/stringify.h>
 #include <linux/swap.h>
 #include <linux/device.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/magic.h>
+#include <linux/balloon_compaction.h>
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/mmu.h>
 #define CMM_MIN_MEM_MB         256
 #define KB2PAGES(_p)           ((_p)>>(PAGE_SHIFT-10))
 #define PAGES2KB(_p)           ((_p)<<(PAGE_SHIFT-10))
-/*
- * The priority level tries to ensure that this notifier is called as
- * late as possible to reduce thrashing in the shared memory pool.
- */
+
 #define CMM_MEM_HOTPLUG_PRI    1
-#define CMM_MEM_ISOLATE_PRI    15
 
 static unsigned int delay = CMM_DEFAULT_DELAY;
 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
@@ -77,17 +77,15 @@ MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
 
 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
 
-static unsigned long loaned_pages;
+static atomic_long_t loaned_pages;
 static unsigned long loaned_pages_target;
 static unsigned long oom_freed_pages;
 
-static LIST_HEAD(cmm_page_list);
-static DEFINE_SPINLOCK(cmm_lock);
-
 static DEFINE_MUTEX(hotplug_mutex);
 static int hotplug_occurred; /* protected by the hotplug mutex */
 
 static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
 
 static long plpar_page_set_loaned(struct page *page)
 {
@@ -149,23 +147,19 @@ static long cmm_alloc_pages(long nr)
                        break;
                }
 
-               page = alloc_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY |
-                                 __GFP_NOMEMALLOC);
+               page = balloon_page_alloc();
                if (!page)
                        break;
-               spin_lock(&cmm_lock);
                rc = plpar_page_set_loaned(page);
                if (rc) {
                        pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
-                       spin_unlock(&cmm_lock);
                        __free_page(page);
                        break;
                }
 
-               list_add(&page->lru, &cmm_page_list);
-               loaned_pages++;
-               totalram_pages_dec();
-               spin_unlock(&cmm_lock);
+               balloon_page_enqueue(&b_dev_info, page);
+               atomic_long_inc(&loaned_pages);
+               adjust_managed_page_count(page, -1);
                nr--;
        }
 
@@ -182,21 +176,19 @@ static long cmm_alloc_pages(long nr)
  **/
 static long cmm_free_pages(long nr)
 {
-       struct page *page, *tmp;
+       struct page *page;
 
        cmm_dbg("Begin free of %ld pages.\n", nr);
-       spin_lock(&cmm_lock);
-       list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) {
-               if (!nr)
+       while (nr) {
+               page = balloon_page_dequeue(&b_dev_info);
+               if (!page)
                        break;
                plpar_page_set_active(page);
-               list_del(&page->lru);
+               adjust_managed_page_count(page, 1);
                __free_page(page);
-               loaned_pages--;
+               atomic_long_dec(&loaned_pages);
                nr--;
-               totalram_pages_inc();
        }
-       spin_unlock(&cmm_lock);
        cmm_dbg("End request with %ld pages unfulfilled\n", nr);
        return nr;
 }
@@ -218,7 +210,7 @@ static int cmm_oom_notify(struct notifier_block *self,
 
        cmm_dbg("OOM processing started\n");
        nr = cmm_free_pages(nr);
-       loaned_pages_target = loaned_pages;
+       loaned_pages_target = atomic_long_read(&loaned_pages);
        *freed += KB2PAGES(oom_kb) - nr;
        oom_freed_pages += KB2PAGES(oom_kb) - nr;
        cmm_dbg("OOM processing complete\n");
@@ -235,10 +227,11 @@ static int cmm_oom_notify(struct notifier_block *self,
  **/
 static void cmm_get_mpp(void)
 {
+       const long __loaned_pages = atomic_long_read(&loaned_pages);
+       const long total_pages = totalram_pages() + __loaned_pages;
        int rc;
        struct hvcall_mpp_data mpp_data;
        signed long active_pages_target, page_loan_request, target;
-       signed long total_pages = totalram_pages() + loaned_pages;
        signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
 
        rc = h_get_mpp(&mpp_data);
@@ -247,7 +240,7 @@ static void cmm_get_mpp(void)
                return;
 
        page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
-       target = page_loan_request + (signed long)loaned_pages;
+       target = page_loan_request + __loaned_pages;
 
        if (target < 0 || total_pages < min_mem_pages)
                target = 0;
@@ -268,7 +261,7 @@ static void cmm_get_mpp(void)
        loaned_pages_target = target;
 
        cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
-               page_loan_request, loaned_pages, loaned_pages_target,
+               page_loan_request, __loaned_pages, loaned_pages_target,
                oom_freed_pages, totalram_pages());
 }
 
@@ -286,6 +279,7 @@ static struct notifier_block cmm_oom_nb = {
 static int cmm_thread(void *dummy)
 {
        unsigned long timeleft;
+       long __loaned_pages;
 
        while (1) {
                timeleft = msleep_interruptible(delay * 1000);
@@ -316,11 +310,12 @@ static int cmm_thread(void *dummy)
 
                cmm_get_mpp();
 
-               if (loaned_pages_target > loaned_pages) {
-                       if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
-                               loaned_pages_target = loaned_pages;
-               } else if (loaned_pages_target < loaned_pages)
-                       cmm_free_pages(loaned_pages - loaned_pages_target);
+               __loaned_pages = atomic_long_read(&loaned_pages);
+               if (loaned_pages_target > __loaned_pages) {
+                       if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+                               loaned_pages_target = __loaned_pages;
+               } else if (loaned_pages_target < __loaned_pages)
+                       cmm_free_pages(__loaned_pages - loaned_pages_target);
        }
        return 0;
 }
@@ -334,7 +329,7 @@ static int cmm_thread(void *dummy)
        }                                                       \
        static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
-CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
 
 static ssize_t show_oom_pages(struct device *dev,
@@ -437,7 +432,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb,
                if (cmm_thread_ptr)
                        kthread_stop(cmm_thread_ptr);
                cmm_thread_ptr = NULL;
-               cmm_free_pages(loaned_pages);
+               cmm_free_pages(atomic_long_read(&loaned_pages));
        }
        return NOTIFY_DONE;
 }
@@ -446,90 +441,6 @@ static struct notifier_block cmm_reboot_nb = {
        .notifier_call = cmm_reboot_notifier,
 };
 
-/**
- * cmm_count_pages - Count the number of pages loaned in a particular range.
- *
- * @arg: memory_isolate_notify structure with address range and count
- *
- * Return value:
- *      0 on success
- **/
-static unsigned long cmm_count_pages(void *arg)
-{
-       struct memory_isolate_notify *marg = arg;
-       struct page *page;
-
-       spin_lock(&cmm_lock);
-       list_for_each_entry(page, &cmm_page_list, lru) {
-               if (page_to_pfn(page) >= marg->start_pfn &&
-                   page_to_pfn(page) < marg->start_pfn + marg->nr_pages)
-                       marg->pages_found++;
-       }
-       spin_unlock(&cmm_lock);
-       return 0;
-}
-
-/**
- * cmm_memory_isolate_cb - Handle memory isolation notifier calls
- * @self:      notifier block struct
- * @action:    action to take
- * @arg:       struct memory_isolate_notify data for handler
- *
- * Return value:
- *     NOTIFY_OK or notifier error based on subfunction return value
- **/
-static int cmm_memory_isolate_cb(struct notifier_block *self,
-                                unsigned long action, void *arg)
-{
-       int ret = 0;
-
-       if (action == MEM_ISOLATE_COUNT)
-               ret = cmm_count_pages(arg);
-
-       return notifier_from_errno(ret);
-}
-
-static struct notifier_block cmm_mem_isolate_nb = {
-       .notifier_call = cmm_memory_isolate_cb,
-       .priority = CMM_MEM_ISOLATE_PRI
-};
-
-/**
- * cmm_mem_going_offline - Unloan pages where memory is to be removed
- * @arg: memory_notify structure with page range to be offlined
- *
- * Return value:
- *     0 on success
- **/
-static int cmm_mem_going_offline(void *arg)
-{
-       struct memory_notify *marg = arg;
-       struct page *page, *tmp;
-       unsigned long freed = 0;
-
-       cmm_dbg("Memory going offline, searching PFN 0x%lx (%ld pages).\n",
-               marg->start_pfn, marg->nr_pages);
-       spin_lock(&cmm_lock);
-
-       /* Search the page list for pages in the range to be offlined */
-       list_for_each_entry_safe(page, tmp, &cmm_page_list, lru) {
-               if (page_to_pfn(page) < marg->start_pfn ||
-                   page_to_pfn(page) >= marg->start_pfn + marg->nr_pages)
-                       continue;
-               plpar_page_set_active(page);
-               list_del(&page->lru);
-               __free_page(page);
-               freed++;
-               loaned_pages--;
-               totalram_pages_inc();
-       }
-
-       spin_unlock(&cmm_lock);
-       cmm_dbg("Released %ld pages in the search range.\n", freed);
-
-       return 0;
-}
-
 /**
  * cmm_memory_cb - Handle memory hotplug notifier calls
  * @self:      notifier block struct
@@ -549,7 +460,6 @@ static int cmm_memory_cb(struct notifier_block *self,
        case MEM_GOING_OFFLINE:
                mutex_lock(&hotplug_mutex);
                hotplug_occurred = 1;
-               ret = cmm_mem_going_offline(arg);
                break;
        case MEM_OFFLINE:
        case MEM_CANCEL_OFFLINE:
@@ -570,6 +480,106 @@ static struct notifier_block cmm_mem_nb = {
        .priority = CMM_MEM_HOTPLUG_PRI
 };
 
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+
+static int cmm_init_fs_context(struct fs_context *fc)
+{
+       return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type balloon_fs = {
+       .name = "ppc-cmm",
+       .init_fs_context = cmm_init_fs_context,
+       .kill_sb = kill_anon_super,
+};
+
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+                          struct page *newpage, struct page *page,
+                          enum migrate_mode mode)
+{
+       unsigned long flags;
+
+       /*
+        * loan/"inflate" the newpage first.
+        *
+        * We might race against the cmm_thread who might discover after our
+        * loan request that another page is to be unloaned. However, once
+        * the cmm_thread runs again later, this error will automatically
+        * be corrected.
+        */
+       if (plpar_page_set_loaned(newpage)) {
+               /* Unlikely, but possible. Tell the caller not to retry now. */
+               pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+               return -EBUSY;
+       }
+
+       /* balloon page list reference */
+       get_page(newpage);
+
+       spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+       balloon_page_insert(b_dev_info, newpage);
+       balloon_page_delete(page);
+       b_dev_info->isolated_pages--;
+       spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+       /*
+        * activate/"deflate" the old page. We ignore any errors just like the
+        * other callers.
+        */
+       plpar_page_set_active(page);
+
+       /* balloon page list reference */
+       put_page(page);
+
+       return MIGRATEPAGE_SUCCESS;
+}
+
+static int cmm_balloon_compaction_init(void)
+{
+       int rc;
+
+       balloon_devinfo_init(&b_dev_info);
+       b_dev_info.migratepage = cmm_migratepage;
+
+       balloon_mnt = kern_mount(&balloon_fs);
+       if (IS_ERR(balloon_mnt)) {
+               rc = PTR_ERR(balloon_mnt);
+               balloon_mnt = NULL;
+               return rc;
+       }
+
+       b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+       if (IS_ERR(b_dev_info.inode)) {
+               rc = PTR_ERR(b_dev_info.inode);
+               b_dev_info.inode = NULL;
+               kern_unmount(balloon_mnt);
+               balloon_mnt = NULL;
+               return rc;
+       }
+
+       b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+       return 0;
+}
+static void cmm_balloon_compaction_deinit(void)
+{
+       if (b_dev_info.inode)
+               iput(b_dev_info.inode);
+       b_dev_info.inode = NULL;
+       kern_unmount(balloon_mnt);
+       balloon_mnt = NULL;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static int cmm_balloon_compaction_init(void)
+{
+       return 0;
+}
+
+static void cmm_balloon_compaction_deinit(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
 /**
  * cmm_init - Module initialization
  *
@@ -583,9 +593,14 @@ static int cmm_init(void)
        if (!firmware_has_feature(FW_FEATURE_CMO))
                return -EOPNOTSUPP;
 
-       if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+       rc = cmm_balloon_compaction_init();
+       if (rc)
                return rc;
 
+       rc = register_oom_notifier(&cmm_oom_nb);
+       if (rc < 0)
+               goto out_balloon_compaction;
+
        if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
                goto out_oom_notifier;
 
@@ -596,10 +611,6 @@ static int cmm_init(void)
        if (rc)
                goto out_unregister_notifier;
 
-       rc = register_memory_isolate_notifier(&cmm_mem_isolate_nb);
-       if (rc)
-               goto out_unregister_notifier;
-
        if (cmm_disabled)
                return 0;
 
@@ -612,12 +623,13 @@ static int cmm_init(void)
        return 0;
 out_unregister_notifier:
        unregister_memory_notifier(&cmm_mem_nb);
-       unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
        cmm_unregister_sysfs(&cmm_dev);
 out_reboot_notifier:
        unregister_reboot_notifier(&cmm_reboot_nb);
 out_oom_notifier:
        unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+       cmm_balloon_compaction_deinit();
        return rc;
 }
 
@@ -634,9 +646,9 @@ static void cmm_exit(void)
        unregister_oom_notifier(&cmm_oom_nb);
        unregister_reboot_notifier(&cmm_reboot_nb);
        unregister_memory_notifier(&cmm_mem_nb);
-       unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
-       cmm_free_pages(loaned_pages);
+       cmm_free_pages(atomic_long_read(&loaned_pages));
        cmm_unregister_sysfs(&cmm_dev);
+       cmm_balloon_compaction_deinit();
 }
 
 /**
@@ -656,7 +668,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
                if (cmm_thread_ptr)
                        kthread_stop(cmm_thread_ptr);
                cmm_thread_ptr = NULL;
-               cmm_free_pages(loaned_pages);
+               cmm_free_pages(atomic_long_read(&loaned_pages));
        } else if (!disable && cmm_disabled) {
                cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
                if (IS_ERR(cmm_thread_ptr))