15 #define MADV_PAGEOUT 21
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size;
20 static unsigned long page_size;
21 static int hpage_pmd_nr;
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
32 static const char *thp_enabled_strings[] = {
42 THP_DEFRAG_DEFER_MADVISE,
47 static const char *thp_defrag_strings[] = {
65 static const char *shmem_enabled_strings[] = {
75 struct khugepaged_settings {
77 unsigned int alloc_sleep_millisecs;
78 unsigned int scan_sleep_millisecs;
79 unsigned int max_ptes_none;
80 unsigned int max_ptes_swap;
81 unsigned int max_ptes_shared;
82 unsigned long pages_to_scan;
86 enum thp_enabled thp_enabled;
87 enum thp_defrag thp_defrag;
88 enum shmem_enabled shmem_enabled;
91 struct khugepaged_settings khugepaged;
94 static struct settings default_settings = {
95 .thp_enabled = THP_MADVISE,
96 .thp_defrag = THP_DEFRAG_ALWAYS,
97 .shmem_enabled = SHMEM_NEVER,
102 .alloc_sleep_millisecs = 10,
103 .scan_sleep_millisecs = 10,
107 static struct settings saved_settings;
108 static bool skip_settings_restore;
110 static int exit_status;
112 static void success(const char *msg)
114 printf(" \e[32m%s\e[0m\n", msg);
117 static void fail(const char *msg)
119 printf(" \e[31m%s\e[0m\n", msg);
123 static int read_file(const char *path, char *buf, size_t buflen)
128 fd = open(path, O_RDONLY);
132 numread = read(fd, buf, buflen - 1);
141 return (unsigned int) numread;
144 static int write_file(const char *path, const char *buf, size_t buflen)
149 fd = open(path, O_WRONLY);
153 numwritten = write(fd, buf, buflen - 1);
158 return (unsigned int) numwritten;
161 static int read_string(const char *name, const char *strings[])
168 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
169 if (ret >= PATH_MAX) {
170 printf("%s: Pathname is too long\n", __func__);
174 if (!read_file(path, buf, sizeof(buf))) {
179 c = strchr(buf, '[');
181 printf("%s: Parse failure\n", __func__);
186 memmove(buf, c, sizeof(buf) - (c - buf));
188 c = strchr(buf, ']');
190 printf("%s: Parse failure\n", __func__);
196 while (strings[ret]) {
197 if (!strcmp(strings[ret], buf))
202 printf("Failed to parse %s\n", name);
206 static void write_string(const char *name, const char *val)
211 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
212 if (ret >= PATH_MAX) {
213 printf("%s: Pathname is too long\n", __func__);
217 if (!write_file(path, val, strlen(val) + 1)) {
223 static const unsigned long read_num(const char *name)
229 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
230 if (ret >= PATH_MAX) {
231 printf("%s: Pathname is too long\n", __func__);
235 ret = read_file(path, buf, sizeof(buf));
237 perror("read_file(read_num)");
241 return strtoul(buf, NULL, 10);
244 static void write_num(const char *name, unsigned long num)
250 ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
251 if (ret >= PATH_MAX) {
252 printf("%s: Pathname is too long\n", __func__);
256 sprintf(buf, "%ld", num);
257 if (!write_file(path, buf, strlen(buf) + 1)) {
263 static void write_settings(struct settings *settings)
265 struct khugepaged_settings *khugepaged = &settings->khugepaged;
267 write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
268 write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
269 write_string("shmem_enabled",
270 shmem_enabled_strings[settings->shmem_enabled]);
271 write_num("debug_cow", settings->debug_cow);
272 write_num("use_zero_page", settings->use_zero_page);
274 write_num("khugepaged/defrag", khugepaged->defrag);
275 write_num("khugepaged/alloc_sleep_millisecs",
276 khugepaged->alloc_sleep_millisecs);
277 write_num("khugepaged/scan_sleep_millisecs",
278 khugepaged->scan_sleep_millisecs);
279 write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
280 write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
281 write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
282 write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
285 static void restore_settings(int sig)
287 if (skip_settings_restore)
290 printf("Restore THP and khugepaged settings...");
291 write_settings(&saved_settings);
299 static void save_settings(void)
301 printf("Save THP and khugepaged settings...");
302 saved_settings = (struct settings) {
303 .thp_enabled = read_string("enabled", thp_enabled_strings),
304 .thp_defrag = read_string("defrag", thp_defrag_strings),
306 read_string("shmem_enabled", shmem_enabled_strings),
307 .debug_cow = read_num("debug_cow"),
308 .use_zero_page = read_num("use_zero_page"),
310 saved_settings.khugepaged = (struct khugepaged_settings) {
311 .defrag = read_num("khugepaged/defrag"),
312 .alloc_sleep_millisecs =
313 read_num("khugepaged/alloc_sleep_millisecs"),
314 .scan_sleep_millisecs =
315 read_num("khugepaged/scan_sleep_millisecs"),
316 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
317 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
318 .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
319 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
323 signal(SIGTERM, restore_settings);
324 signal(SIGINT, restore_settings);
325 signal(SIGHUP, restore_settings);
326 signal(SIGQUIT, restore_settings);
329 static void adjust_settings(void)
332 printf("Adjust settings...");
333 write_settings(&default_settings);
337 #define MAX_LINE_LENGTH 500
339 static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
341 while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
342 if (!strncmp(buf, pattern, strlen(pattern)))
348 static bool check_huge(void *addr)
353 char buffer[MAX_LINE_LENGTH];
354 char addr_pattern[MAX_LINE_LENGTH];
356 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
357 (unsigned long) addr);
358 if (ret >= MAX_LINE_LENGTH) {
359 printf("%s: Pattern is too long\n", __func__);
364 fp = fopen(PID_SMAPS, "r");
366 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
369 if (!check_for_pattern(fp, addr_pattern, buffer))
372 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
373 hpage_pmd_size >> 10);
374 if (ret >= MAX_LINE_LENGTH) {
375 printf("%s: Pattern is too long\n", __func__);
379 * Fetch the AnonHugePages: in the same block and check whether it got
380 * the expected number of hugeepages next.
382 if (!check_for_pattern(fp, "AnonHugePages:", buffer))
385 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
395 static bool check_swap(void *addr, unsigned long size)
400 char buffer[MAX_LINE_LENGTH];
401 char addr_pattern[MAX_LINE_LENGTH];
403 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
404 (unsigned long) addr);
405 if (ret >= MAX_LINE_LENGTH) {
406 printf("%s: Pattern is too long\n", __func__);
411 fp = fopen(PID_SMAPS, "r");
413 printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
416 if (!check_for_pattern(fp, addr_pattern, buffer))
419 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
421 if (ret >= MAX_LINE_LENGTH) {
422 printf("%s: Pattern is too long\n", __func__);
426 * Fetch the Swap: in the same block and check whether it got
427 * the expected number of hugeepages next.
429 if (!check_for_pattern(fp, "Swap:", buffer))
432 if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
441 static void *alloc_mapping(void)
445 p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
446 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
447 if (p != BASE_ADDR) {
448 printf("Failed to allocate VMA at %p\n", BASE_ADDR);
455 static void fill_memory(int *p, unsigned long start, unsigned long end)
459 for (i = start / page_size; i < end / page_size; i++)
460 p[i * page_size / sizeof(*p)] = i + 0xdead0000;
463 static void validate_memory(int *p, unsigned long start, unsigned long end)
467 for (i = start / page_size; i < end / page_size; i++) {
468 if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
469 printf("Page %d is corrupted: %#x\n",
470 i, p[i * page_size / sizeof(*p)]);
477 static bool wait_for_scan(const char *msg, char *p)
480 int timeout = 6; /* 3 seconds */
484 printf("Unexpected huge page\n");
488 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
490 /* Wait until the second full_scan completed */
491 full_scans = read_num("khugepaged/full_scans") + 2;
493 printf("%s...", msg);
497 if (read_num("khugepaged/full_scans") >= full_scans)
503 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
505 return timeout == -1;
508 static void alloc_at_fault(void)
510 struct settings settings = default_settings;
513 settings.thp_enabled = THP_ALWAYS;
514 write_settings(&settings);
518 printf("Allocate huge page on fault...");
524 write_settings(&default_settings);
526 madvise(p, page_size, MADV_DONTNEED);
527 printf("Split huge PMD on MADV_DONTNEED...");
532 munmap(p, hpage_pmd_size);
535 static void collapse_full(void)
540 fill_memory(p, 0, hpage_pmd_size);
541 if (wait_for_scan("Collapse fully populated PTE table", p))
543 else if (check_huge(p))
547 validate_memory(p, 0, hpage_pmd_size);
548 munmap(p, hpage_pmd_size);
551 static void collapse_empty(void)
556 if (wait_for_scan("Do not collapse empty PTE table", p))
558 else if (check_huge(p))
562 munmap(p, hpage_pmd_size);
565 static void collapse_single_pte_entry(void)
570 fill_memory(p, 0, page_size);
571 if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
573 else if (check_huge(p))
577 validate_memory(p, 0, page_size);
578 munmap(p, hpage_pmd_size);
581 static void collapse_max_ptes_none(void)
583 int max_ptes_none = hpage_pmd_nr / 2;
584 struct settings settings = default_settings;
587 settings.khugepaged.max_ptes_none = max_ptes_none;
588 write_settings(&settings);
592 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
593 if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
595 else if (check_huge(p))
599 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
601 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
602 if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
604 else if (check_huge(p))
608 validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
610 munmap(p, hpage_pmd_size);
611 write_settings(&default_settings);
614 static void collapse_swapin_single_pte(void)
618 fill_memory(p, 0, hpage_pmd_size);
620 printf("Swapout one page...");
621 if (madvise(p, page_size, MADV_PAGEOUT)) {
622 perror("madvise(MADV_PAGEOUT)");
625 if (check_swap(p, page_size)) {
632 if (wait_for_scan("Collapse with swapping in single PTE entry", p))
634 else if (check_huge(p))
638 validate_memory(p, 0, hpage_pmd_size);
640 munmap(p, hpage_pmd_size);
643 static void collapse_max_ptes_swap(void)
645 int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
650 fill_memory(p, 0, hpage_pmd_size);
651 printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
652 if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
653 perror("madvise(MADV_PAGEOUT)");
656 if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
663 if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
665 else if (check_huge(p))
669 validate_memory(p, 0, hpage_pmd_size);
671 fill_memory(p, 0, hpage_pmd_size);
672 printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
673 if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
674 perror("madvise(MADV_PAGEOUT)");
677 if (check_swap(p, max_ptes_swap * page_size)) {
684 if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
686 else if (check_huge(p))
690 validate_memory(p, 0, hpage_pmd_size);
692 munmap(p, hpage_pmd_size);
695 static void collapse_single_pte_entry_compound(void)
701 printf("Allocate huge page...");
702 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
703 fill_memory(p, 0, hpage_pmd_size);
708 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
710 printf("Split huge page leaving single PTE mapping compound page...");
711 madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
717 if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
719 else if (check_huge(p))
723 validate_memory(p, 0, page_size);
724 munmap(p, hpage_pmd_size);
727 static void collapse_full_of_compound(void)
733 printf("Allocate huge page...");
734 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
735 fill_memory(p, 0, hpage_pmd_size);
741 printf("Split huge page leaving single PTE page table full of compound pages...");
742 madvise(p, page_size, MADV_NOHUGEPAGE);
743 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
749 if (wait_for_scan("Collapse PTE table full of compound pages", p))
751 else if (check_huge(p))
755 validate_memory(p, 0, hpage_pmd_size);
756 munmap(p, hpage_pmd_size);
759 static void collapse_compound_extreme(void)
765 for (i = 0; i < hpage_pmd_nr; i++) {
766 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
767 i + 1, hpage_pmd_nr);
769 madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
770 fill_memory(BASE_ADDR, 0, hpage_pmd_size);
771 if (!check_huge(BASE_ADDR)) {
772 printf("Failed to allocate huge page\n");
775 madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
777 p = mremap(BASE_ADDR - i * page_size,
778 i * page_size + hpage_pmd_size,
780 MREMAP_MAYMOVE | MREMAP_FIXED,
781 BASE_ADDR + 2 * hpage_pmd_size);
782 if (p == MAP_FAILED) {
783 perror("mremap+unmap");
787 p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
789 (i + 1) * page_size + hpage_pmd_size,
790 MREMAP_MAYMOVE | MREMAP_FIXED,
791 BASE_ADDR - (i + 1) * page_size);
792 if (p == MAP_FAILED) {
793 perror("mremap+alloc");
798 munmap(BASE_ADDR, hpage_pmd_size);
799 fill_memory(p, 0, hpage_pmd_size);
805 if (wait_for_scan("Collapse PTE table full of different compound pages", p))
807 else if (check_huge(p))
812 validate_memory(p, 0, hpage_pmd_size);
813 munmap(p, hpage_pmd_size);
816 static void collapse_fork(void)
823 printf("Allocate small page...");
824 fill_memory(p, 0, page_size);
830 printf("Share small page over fork()...");
832 /* Do not touch settings on child exit */
833 skip_settings_restore = true;
841 fill_memory(p, page_size, 2 * page_size);
843 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
845 else if (check_huge(p))
850 validate_memory(p, 0, page_size);
851 munmap(p, hpage_pmd_size);
856 exit_status += WEXITSTATUS(wstatus);
858 printf("Check if parent still has small page...");
863 validate_memory(p, 0, page_size);
864 munmap(p, hpage_pmd_size);
867 static void collapse_fork_compound(void)
874 printf("Allocate huge page...");
875 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
876 fill_memory(p, 0, hpage_pmd_size);
882 printf("Share huge page over fork()...");
884 /* Do not touch settings on child exit */
885 skip_settings_restore = true;
893 printf("Split huge page PMD in child process...");
894 madvise(p, page_size, MADV_NOHUGEPAGE);
895 madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
900 fill_memory(p, 0, page_size);
902 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
903 if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
905 else if (check_huge(p))
909 write_num("khugepaged/max_ptes_shared",
910 default_settings.khugepaged.max_ptes_shared);
912 validate_memory(p, 0, hpage_pmd_size);
913 munmap(p, hpage_pmd_size);
918 exit_status += WEXITSTATUS(wstatus);
920 printf("Check if parent still has huge page...");
925 validate_memory(p, 0, hpage_pmd_size);
926 munmap(p, hpage_pmd_size);
929 static void collapse_max_ptes_shared()
931 int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
937 printf("Allocate huge page...");
938 madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
939 fill_memory(p, 0, hpage_pmd_size);
945 printf("Share huge page over fork()...");
947 /* Do not touch settings on child exit */
948 skip_settings_restore = true;
956 printf("Trigger CoW on page %d of %d...",
957 hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
958 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
964 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
966 else if (!check_huge(p))
971 printf("Trigger CoW on page %d of %d...",
972 hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
973 fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
980 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
982 else if (check_huge(p))
987 validate_memory(p, 0, hpage_pmd_size);
988 munmap(p, hpage_pmd_size);
993 exit_status += WEXITSTATUS(wstatus);
995 printf("Check if parent still has huge page...");
1000 validate_memory(p, 0, hpage_pmd_size);
1001 munmap(p, hpage_pmd_size);
1006 setbuf(stdout, NULL);
1008 page_size = getpagesize();
1009 hpage_pmd_size = read_num("hpage_pmd_size");
1010 hpage_pmd_nr = hpage_pmd_size / page_size;
1012 default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1013 default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1014 default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1015 default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1023 collapse_single_pte_entry();
1024 collapse_max_ptes_none();
1025 collapse_swapin_single_pte();
1026 collapse_max_ptes_swap();
1027 collapse_single_pte_entry_compound();
1028 collapse_full_of_compound();
1029 collapse_compound_extreme();
1031 collapse_fork_compound();
1032 collapse_max_ptes_shared();
1034 restore_settings(0);