powerpc/pseries/cmm: Switch to balloon_page_alloc()
[linux-2.6-microblaze.git] / arch / powerpc / platforms / pseries / cmm.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Collaborative memory management interface.
4  *
5  * Copyright (C) 2008 IBM Corporation
6  * Author(s): Brian King (brking@linux.vnet.ibm.com),
7  */
8
9 #include <linux/ctype.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/fs.h>
13 #include <linux/gfp.h>
14 #include <linux/kthread.h>
15 #include <linux/module.h>
16 #include <linux/oom.h>
17 #include <linux/reboot.h>
18 #include <linux/sched.h>
19 #include <linux/stringify.h>
20 #include <linux/swap.h>
21 #include <linux/device.h>
22 #include <linux/mount.h>
23 #include <linux/pseudo_fs.h>
24 #include <linux/magic.h>
25 #include <linux/balloon_compaction.h>
26 #include <asm/firmware.h>
27 #include <asm/hvcall.h>
28 #include <asm/mmu.h>
29 #include <asm/pgalloc.h>
30 #include <linux/uaccess.h>
31 #include <linux/memory.h>
32 #include <asm/plpar_wrappers.h>
33
34 #include "pseries.h"
35
36 #define CMM_DRIVER_VERSION      "1.0.0"
37 #define CMM_DEFAULT_DELAY       1
38 #define CMM_HOTPLUG_DELAY       5
39 #define CMM_DEBUG                       0
40 #define CMM_DISABLE             0
41 #define CMM_OOM_KB              1024
42 #define CMM_MIN_MEM_MB          256
43 #define KB2PAGES(_p)            ((_p)>>(PAGE_SHIFT-10))
44 #define PAGES2KB(_p)            ((_p)<<(PAGE_SHIFT-10))
45
46 #define CMM_MEM_HOTPLUG_PRI     1
47
48 static unsigned int delay = CMM_DEFAULT_DELAY;
49 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
50 static unsigned int oom_kb = CMM_OOM_KB;
51 static unsigned int cmm_debug = CMM_DEBUG;
52 static unsigned int cmm_disabled = CMM_DISABLE;
53 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
54 static struct device cmm_dev;
55
56 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
57 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
58 MODULE_LICENSE("GPL");
59 MODULE_VERSION(CMM_DRIVER_VERSION);
60
61 module_param_named(delay, delay, uint, 0644);
62 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
63                  "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
64 module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
65 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
66                  "before loaning resumes. "
67                  "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
68 module_param_named(oom_kb, oom_kb, uint, 0644);
69 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
70                  "[Default=" __stringify(CMM_OOM_KB) "]");
71 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
72 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
73                  "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
74 module_param_named(debug, cmm_debug, uint, 0644);
75 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
76                  "[Default=" __stringify(CMM_DEBUG) "]");
77
78 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
79
80 static atomic_long_t loaned_pages;
81 static unsigned long loaned_pages_target;
82 static unsigned long oom_freed_pages;
83
84 static DEFINE_MUTEX(hotplug_mutex);
85 static int hotplug_occurred; /* protected by the hotplug mutex */
86
87 static struct task_struct *cmm_thread_ptr;
88 static struct balloon_dev_info b_dev_info;
89
90 static long plpar_page_set_loaned(struct page *page)
91 {
92         const unsigned long vpa = page_to_phys(page);
93         unsigned long cmo_page_sz = cmo_get_page_size();
94         long rc = 0;
95         int i;
96
97         for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
98                 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
99
100         for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
101                 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
102                                    vpa + i - cmo_page_sz, 0);
103
104         return rc;
105 }
106
107 static long plpar_page_set_active(struct page *page)
108 {
109         const unsigned long vpa = page_to_phys(page);
110         unsigned long cmo_page_sz = cmo_get_page_size();
111         long rc = 0;
112         int i;
113
114         for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
115                 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
116
117         for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
118                 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
119                                    vpa + i - cmo_page_sz, 0);
120
121         return rc;
122 }
123
124 /**
125  * cmm_alloc_pages - Allocate pages and mark them as loaned
126  * @nr: number of pages to allocate
127  *
128  * Return value:
129  *      number of pages requested to be allocated which were not
130  **/
131 static long cmm_alloc_pages(long nr)
132 {
133         struct page *page;
134         long rc;
135
136         cmm_dbg("Begin request for %ld pages\n", nr);
137
138         while (nr) {
139                 /* Exit if a hotplug operation is in progress or occurred */
140                 if (mutex_trylock(&hotplug_mutex)) {
141                         if (hotplug_occurred) {
142                                 mutex_unlock(&hotplug_mutex);
143                                 break;
144                         }
145                         mutex_unlock(&hotplug_mutex);
146                 } else {
147                         break;
148                 }
149
150                 page = balloon_page_alloc();
151                 if (!page)
152                         break;
153                 rc = plpar_page_set_loaned(page);
154                 if (rc) {
155                         pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
156                         __free_page(page);
157                         break;
158                 }
159
160                 balloon_page_enqueue(&b_dev_info, page);
161                 atomic_long_inc(&loaned_pages);
162                 adjust_managed_page_count(page, -1);
163                 nr--;
164         }
165
166         cmm_dbg("End request with %ld pages unfulfilled\n", nr);
167         return nr;
168 }
169
170 /**
171  * cmm_free_pages - Free pages and mark them as active
172  * @nr: number of pages to free
173  *
174  * Return value:
175  *      number of pages requested to be freed which were not
176  **/
177 static long cmm_free_pages(long nr)
178 {
179         struct page *page;
180
181         cmm_dbg("Begin free of %ld pages.\n", nr);
182         while (nr) {
183                 page = balloon_page_dequeue(&b_dev_info);
184                 if (!page)
185                         break;
186                 plpar_page_set_active(page);
187                 adjust_managed_page_count(page, 1);
188                 __free_page(page);
189                 atomic_long_dec(&loaned_pages);
190                 nr--;
191         }
192         cmm_dbg("End request with %ld pages unfulfilled\n", nr);
193         return nr;
194 }
195
196 /**
197  * cmm_oom_notify - OOM notifier
198  * @self:       notifier block struct
199  * @dummy:      not used
200  * @parm:       returned - number of pages freed
201  *
202  * Return value:
203  *      NOTIFY_OK
204  **/
205 static int cmm_oom_notify(struct notifier_block *self,
206                           unsigned long dummy, void *parm)
207 {
208         unsigned long *freed = parm;
209         long nr = KB2PAGES(oom_kb);
210
211         cmm_dbg("OOM processing started\n");
212         nr = cmm_free_pages(nr);
213         loaned_pages_target = atomic_long_read(&loaned_pages);
214         *freed += KB2PAGES(oom_kb) - nr;
215         oom_freed_pages += KB2PAGES(oom_kb) - nr;
216         cmm_dbg("OOM processing complete\n");
217         return NOTIFY_OK;
218 }
219
220 /**
221  * cmm_get_mpp - Read memory performance parameters
222  *
223  * Makes hcall to query the current page loan request from the hypervisor.
224  *
225  * Return value:
226  *      nothing
227  **/
228 static void cmm_get_mpp(void)
229 {
230         const long __loaned_pages = atomic_long_read(&loaned_pages);
231         const long total_pages = totalram_pages() + __loaned_pages;
232         int rc;
233         struct hvcall_mpp_data mpp_data;
234         signed long active_pages_target, page_loan_request, target;
235         signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
236
237         rc = h_get_mpp(&mpp_data);
238
239         if (rc != H_SUCCESS)
240                 return;
241
242         page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
243         target = page_loan_request + __loaned_pages;
244
245         if (target < 0 || total_pages < min_mem_pages)
246                 target = 0;
247
248         if (target > oom_freed_pages)
249                 target -= oom_freed_pages;
250         else
251                 target = 0;
252
253         active_pages_target = total_pages - target;
254
255         if (min_mem_pages > active_pages_target)
256                 target = total_pages - min_mem_pages;
257
258         if (target < 0)
259                 target = 0;
260
261         loaned_pages_target = target;
262
263         cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
264                 page_loan_request, __loaned_pages, loaned_pages_target,
265                 oom_freed_pages, totalram_pages());
266 }
267
268 static struct notifier_block cmm_oom_nb = {
269         .notifier_call = cmm_oom_notify
270 };
271
272 /**
273  * cmm_thread - CMM task thread
274  * @dummy:      not used
275  *
276  * Return value:
277  *      0
278  **/
279 static int cmm_thread(void *dummy)
280 {
281         unsigned long timeleft;
282         long __loaned_pages;
283
284         while (1) {
285                 timeleft = msleep_interruptible(delay * 1000);
286
287                 if (kthread_should_stop() || timeleft)
288                         break;
289
290                 if (mutex_trylock(&hotplug_mutex)) {
291                         if (hotplug_occurred) {
292                                 hotplug_occurred = 0;
293                                 mutex_unlock(&hotplug_mutex);
294                                 cmm_dbg("Hotplug operation has occurred, "
295                                                 "loaning activity suspended "
296                                                 "for %d seconds.\n",
297                                                 hotplug_delay);
298                                 timeleft = msleep_interruptible(hotplug_delay *
299                                                 1000);
300                                 if (kthread_should_stop() || timeleft)
301                                         break;
302                                 continue;
303                         }
304                         mutex_unlock(&hotplug_mutex);
305                 } else {
306                         cmm_dbg("Hotplug operation in progress, activity "
307                                         "suspended\n");
308                         continue;
309                 }
310
311                 cmm_get_mpp();
312
313                 __loaned_pages = atomic_long_read(&loaned_pages);
314                 if (loaned_pages_target > __loaned_pages) {
315                         if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
316                                 loaned_pages_target = __loaned_pages;
317                 } else if (loaned_pages_target < __loaned_pages)
318                         cmm_free_pages(__loaned_pages - loaned_pages_target);
319         }
320         return 0;
321 }
322
323 #define CMM_SHOW(name, format, args...)                 \
324         static ssize_t show_##name(struct device *dev,  \
325                                    struct device_attribute *attr,       \
326                                    char *buf)                   \
327         {                                                       \
328                 return sprintf(buf, format, ##args);            \
329         }                                                       \
330         static DEVICE_ATTR(name, 0444, show_##name, NULL)
331
332 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
333 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
334
335 static ssize_t show_oom_pages(struct device *dev,
336                               struct device_attribute *attr, char *buf)
337 {
338         return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
339 }
340
341 static ssize_t store_oom_pages(struct device *dev,
342                                struct device_attribute *attr,
343                                const char *buf, size_t count)
344 {
345         unsigned long val = simple_strtoul (buf, NULL, 10);
346
347         if (!capable(CAP_SYS_ADMIN))
348                 return -EPERM;
349         if (val != 0)
350                 return -EBADMSG;
351
352         oom_freed_pages = 0;
353         return count;
354 }
355
356 static DEVICE_ATTR(oom_freed_kb, 0644,
357                    show_oom_pages, store_oom_pages);
358
359 static struct device_attribute *cmm_attrs[] = {
360         &dev_attr_loaned_kb,
361         &dev_attr_loaned_target_kb,
362         &dev_attr_oom_freed_kb,
363 };
364
365 static struct bus_type cmm_subsys = {
366         .name = "cmm",
367         .dev_name = "cmm",
368 };
369
370 static void cmm_release_device(struct device *dev)
371 {
372 }
373
374 /**
375  * cmm_sysfs_register - Register with sysfs
376  *
377  * Return value:
378  *      0 on success / other on failure
379  **/
380 static int cmm_sysfs_register(struct device *dev)
381 {
382         int i, rc;
383
384         if ((rc = subsys_system_register(&cmm_subsys, NULL)))
385                 return rc;
386
387         dev->id = 0;
388         dev->bus = &cmm_subsys;
389         dev->release = cmm_release_device;
390
391         if ((rc = device_register(dev)))
392                 goto subsys_unregister;
393
394         for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
395                 if ((rc = device_create_file(dev, cmm_attrs[i])))
396                         goto fail;
397         }
398
399         return 0;
400
401 fail:
402         while (--i >= 0)
403                 device_remove_file(dev, cmm_attrs[i]);
404         device_unregister(dev);
405 subsys_unregister:
406         bus_unregister(&cmm_subsys);
407         return rc;
408 }
409
410 /**
411  * cmm_unregister_sysfs - Unregister from sysfs
412  *
413  **/
414 static void cmm_unregister_sysfs(struct device *dev)
415 {
416         int i;
417
418         for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
419                 device_remove_file(dev, cmm_attrs[i]);
420         device_unregister(dev);
421         bus_unregister(&cmm_subsys);
422 }
423
424 /**
425  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
426  *
427  **/
428 static int cmm_reboot_notifier(struct notifier_block *nb,
429                                unsigned long action, void *unused)
430 {
431         if (action == SYS_RESTART) {
432                 if (cmm_thread_ptr)
433                         kthread_stop(cmm_thread_ptr);
434                 cmm_thread_ptr = NULL;
435                 cmm_free_pages(atomic_long_read(&loaned_pages));
436         }
437         return NOTIFY_DONE;
438 }
439
440 static struct notifier_block cmm_reboot_nb = {
441         .notifier_call = cmm_reboot_notifier,
442 };
443
444 /**
445  * cmm_memory_cb - Handle memory hotplug notifier calls
446  * @self:       notifier block struct
447  * @action:     action to take
448  * @arg:        struct memory_notify data for handler
449  *
450  * Return value:
451  *      NOTIFY_OK or notifier error based on subfunction return value
452  *
453  **/
454 static int cmm_memory_cb(struct notifier_block *self,
455                         unsigned long action, void *arg)
456 {
457         int ret = 0;
458
459         switch (action) {
460         case MEM_GOING_OFFLINE:
461                 mutex_lock(&hotplug_mutex);
462                 hotplug_occurred = 1;
463                 break;
464         case MEM_OFFLINE:
465         case MEM_CANCEL_OFFLINE:
466                 mutex_unlock(&hotplug_mutex);
467                 cmm_dbg("Memory offline operation complete.\n");
468                 break;
469         case MEM_GOING_ONLINE:
470         case MEM_ONLINE:
471         case MEM_CANCEL_ONLINE:
472                 break;
473         }
474
475         return notifier_from_errno(ret);
476 }
477
478 static struct notifier_block cmm_mem_nb = {
479         .notifier_call = cmm_memory_cb,
480         .priority = CMM_MEM_HOTPLUG_PRI
481 };
482
483 #ifdef CONFIG_BALLOON_COMPACTION
484 static struct vfsmount *balloon_mnt;
485
486 static int cmm_init_fs_context(struct fs_context *fc)
487 {
488         return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
489 }
490
491 static struct file_system_type balloon_fs = {
492         .name = "ppc-cmm",
493         .init_fs_context = cmm_init_fs_context,
494         .kill_sb = kill_anon_super,
495 };
496
497 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
498                            struct page *newpage, struct page *page,
499                            enum migrate_mode mode)
500 {
501         unsigned long flags;
502
503         /*
504          * loan/"inflate" the newpage first.
505          *
506          * We might race against the cmm_thread who might discover after our
507          * loan request that another page is to be unloaned. However, once
508          * the cmm_thread runs again later, this error will automatically
509          * be corrected.
510          */
511         if (plpar_page_set_loaned(newpage)) {
512                 /* Unlikely, but possible. Tell the caller not to retry now. */
513                 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
514                 return -EBUSY;
515         }
516
517         /* balloon page list reference */
518         get_page(newpage);
519
520         spin_lock_irqsave(&b_dev_info->pages_lock, flags);
521         balloon_page_insert(b_dev_info, newpage);
522         balloon_page_delete(page);
523         b_dev_info->isolated_pages--;
524         spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
525
526         /*
527          * activate/"deflate" the old page. We ignore any errors just like the
528          * other callers.
529          */
530         plpar_page_set_active(page);
531
532         /* balloon page list reference */
533         put_page(page);
534
535         return MIGRATEPAGE_SUCCESS;
536 }
537
538 static int cmm_balloon_compaction_init(void)
539 {
540         int rc;
541
542         balloon_devinfo_init(&b_dev_info);
543         b_dev_info.migratepage = cmm_migratepage;
544
545         balloon_mnt = kern_mount(&balloon_fs);
546         if (IS_ERR(balloon_mnt)) {
547                 rc = PTR_ERR(balloon_mnt);
548                 balloon_mnt = NULL;
549                 return rc;
550         }
551
552         b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
553         if (IS_ERR(b_dev_info.inode)) {
554                 rc = PTR_ERR(b_dev_info.inode);
555                 b_dev_info.inode = NULL;
556                 kern_unmount(balloon_mnt);
557                 balloon_mnt = NULL;
558                 return rc;
559         }
560
561         b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
562         return 0;
563 }
564 static void cmm_balloon_compaction_deinit(void)
565 {
566         if (b_dev_info.inode)
567                 iput(b_dev_info.inode);
568         b_dev_info.inode = NULL;
569         kern_unmount(balloon_mnt);
570         balloon_mnt = NULL;
571 }
572 #else /* CONFIG_BALLOON_COMPACTION */
573 static int cmm_balloon_compaction_init(void)
574 {
575         return 0;
576 }
577
578 static void cmm_balloon_compaction_deinit(void)
579 {
580 }
581 #endif /* CONFIG_BALLOON_COMPACTION */
582
583 /**
584  * cmm_init - Module initialization
585  *
586  * Return value:
587  *      0 on success / other on failure
588  **/
589 static int cmm_init(void)
590 {
591         int rc;
592
593         if (!firmware_has_feature(FW_FEATURE_CMO))
594                 return -EOPNOTSUPP;
595
596         rc = cmm_balloon_compaction_init();
597         if (rc)
598                 return rc;
599
600         rc = register_oom_notifier(&cmm_oom_nb);
601         if (rc < 0)
602                 goto out_balloon_compaction;
603
604         if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
605                 goto out_oom_notifier;
606
607         if ((rc = cmm_sysfs_register(&cmm_dev)))
608                 goto out_reboot_notifier;
609
610         rc = register_memory_notifier(&cmm_mem_nb);
611         if (rc)
612                 goto out_unregister_notifier;
613
614         if (cmm_disabled)
615                 return 0;
616
617         cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
618         if (IS_ERR(cmm_thread_ptr)) {
619                 rc = PTR_ERR(cmm_thread_ptr);
620                 goto out_unregister_notifier;
621         }
622
623         return 0;
624 out_unregister_notifier:
625         unregister_memory_notifier(&cmm_mem_nb);
626         cmm_unregister_sysfs(&cmm_dev);
627 out_reboot_notifier:
628         unregister_reboot_notifier(&cmm_reboot_nb);
629 out_oom_notifier:
630         unregister_oom_notifier(&cmm_oom_nb);
631 out_balloon_compaction:
632         cmm_balloon_compaction_deinit();
633         return rc;
634 }
635
636 /**
637  * cmm_exit - Module exit
638  *
639  * Return value:
640  *      nothing
641  **/
642 static void cmm_exit(void)
643 {
644         if (cmm_thread_ptr)
645                 kthread_stop(cmm_thread_ptr);
646         unregister_oom_notifier(&cmm_oom_nb);
647         unregister_reboot_notifier(&cmm_reboot_nb);
648         unregister_memory_notifier(&cmm_mem_nb);
649         cmm_free_pages(atomic_long_read(&loaned_pages));
650         cmm_unregister_sysfs(&cmm_dev);
651         cmm_balloon_compaction_deinit();
652 }
653
654 /**
655  * cmm_set_disable - Disable/Enable CMM
656  *
657  * Return value:
658  *      0 on success / other on failure
659  **/
660 static int cmm_set_disable(const char *val, const struct kernel_param *kp)
661 {
662         int disable = simple_strtoul(val, NULL, 10);
663
664         if (disable != 0 && disable != 1)
665                 return -EINVAL;
666
667         if (disable && !cmm_disabled) {
668                 if (cmm_thread_ptr)
669                         kthread_stop(cmm_thread_ptr);
670                 cmm_thread_ptr = NULL;
671                 cmm_free_pages(atomic_long_read(&loaned_pages));
672         } else if (!disable && cmm_disabled) {
673                 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
674                 if (IS_ERR(cmm_thread_ptr))
675                         return PTR_ERR(cmm_thread_ptr);
676         }
677
678         cmm_disabled = disable;
679         return 0;
680 }
681
682 module_param_call(disable, cmm_set_disable, param_get_uint,
683                   &cmm_disabled, 0644);
684 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
685                  "[Default=" __stringify(CMM_DISABLE) "]");
686
687 module_init(cmm_init);
688 module_exit(cmm_exit);