1 /* SPDX-License-Identifier: GPL-2.0 */
4 #include <linux/limits.h>
11 #include <sys/types.h>
13 #include <sys/socket.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
20 #include "../kselftest.h"
21 #include "cgroup_util.h"
24 * This test creates two nested cgroups with and without enabling
25 * the memory controller.
27 static int test_memcg_subtree_control(const char *root)
29 char *parent, *child, *parent2 = NULL, *child2 = NULL;
33 /* Create two nested cgroups with the memory controller enabled */
34 parent = cg_name(root, "memcg_test_0");
35 child = cg_name(root, "memcg_test_0/memcg_test_1");
36 if (!parent || !child)
39 if (cg_create(parent))
42 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
48 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
51 /* Create two nested cgroups without enabling memory controller */
52 parent2 = cg_name(root, "memcg_test_1");
53 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
54 if (!parent2 || !child2)
57 if (cg_create(parent2))
60 if (cg_create(child2))
63 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
66 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
89 static int alloc_anon_50M_check(const char *cgroup, void *arg)
97 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
100 current = cg_read_long(cgroup, "memory.current");
104 if (!values_close(size, current, 3))
107 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
111 if (!values_close(anon, current, 3))
120 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
122 size_t size = MB(50);
131 if (alloc_pagecache(fd, size))
134 current = cg_read_long(cgroup, "memory.current");
138 file = cg_read_key_long(cgroup, "memory.stat", "file ");
142 if (!values_close(file, current, 10))
153 * This test create a memory cgroup, allocates
154 * some anonymous memory and some pagecache
155 * and check memory.current and some memory.stat values.
157 static int test_memcg_current(const char *root)
163 memcg = cg_name(root, "memcg_test");
167 if (cg_create(memcg))
170 current = cg_read_long(memcg, "memory.current");
174 if (cg_run(memcg, alloc_anon_50M_check, NULL))
177 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
189 static int alloc_pagecache_50M(const char *cgroup, void *arg)
193 return alloc_pagecache(fd, MB(50));
196 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
199 int ppid = getppid();
201 if (alloc_pagecache(fd, MB(50)))
204 while (getppid() == ppid)
210 static int alloc_anon_noexit(const char *cgroup, void *arg)
212 int ppid = getppid();
214 if (alloc_anon(cgroup, arg))
217 while (getppid() == ppid)
224 * Wait until processes are killed asynchronously by the OOM killer
225 * If we exceed a timeout, fail.
227 static int cg_test_proc_killed(const char *cgroup)
231 for (limit = 10; limit > 0; limit--) {
232 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
241 * First, this test creates the following hierarchy:
242 * A memory.min = 50M, memory.max = 200M
243 * A/B memory.min = 50M, memory.current = 50M
244 * A/B/C memory.min = 75M, memory.current = 50M
245 * A/B/D memory.min = 25M, memory.current = 50M
246 * A/B/E memory.min = 500M, memory.current = 0
247 * A/B/F memory.min = 0, memory.current = 50M
249 * Usages are pagecache, but the test keeps a running
250 * process in every leaf cgroup.
251 * Then it creates A/G and creates a significant
252 * memory pressure in it.
254 * A/B memory.current ~= 50M
255 * A/B/C memory.current ~= 33M
256 * A/B/D memory.current ~= 17M
257 * A/B/E memory.current ~= 0
259 * After that it tries to allocate more than there is
260 * unprotected memory in A available, and checks
261 * checks that memory.min protects pagecache even
264 static int test_memcg_min(const char *root)
267 char *parent[3] = {NULL};
268 char *children[4] = {NULL};
277 parent[0] = cg_name(root, "memcg_test_0");
281 parent[1] = cg_name(parent[0], "memcg_test_1");
285 parent[2] = cg_name(parent[0], "memcg_test_2");
289 if (cg_create(parent[0]))
292 if (cg_read_long(parent[0], "memory.min")) {
297 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
300 if (cg_write(parent[0], "memory.max", "200M"))
303 if (cg_write(parent[0], "memory.swap.max", "0"))
306 if (cg_create(parent[1]))
309 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
312 if (cg_create(parent[2]))
315 for (i = 0; i < ARRAY_SIZE(children); i++) {
316 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
320 if (cg_create(children[i]))
326 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
330 if (cg_write(parent[0], "memory.min", "50M"))
332 if (cg_write(parent[1], "memory.min", "50M"))
334 if (cg_write(children[0], "memory.min", "75M"))
336 if (cg_write(children[1], "memory.min", "25M"))
338 if (cg_write(children[2], "memory.min", "500M"))
340 if (cg_write(children[3], "memory.min", "0"))
344 while (!values_close(cg_read_long(parent[1], "memory.current"),
351 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
354 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
357 for (i = 0; i < ARRAY_SIZE(children); i++)
358 c[i] = cg_read_long(children[i], "memory.current");
360 if (!values_close(c[0], MB(33), 10))
363 if (!values_close(c[1], MB(17), 10))
366 if (!values_close(c[2], 0, 1))
369 if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
372 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
378 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
382 cg_destroy(children[i]);
386 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
390 cg_destroy(parent[i]);
398 * First, this test creates the following hierarchy:
399 * A memory.low = 50M, memory.max = 200M
400 * A/B memory.low = 50M, memory.current = 50M
401 * A/B/C memory.low = 75M, memory.current = 50M
402 * A/B/D memory.low = 25M, memory.current = 50M
403 * A/B/E memory.low = 500M, memory.current = 0
404 * A/B/F memory.low = 0, memory.current = 50M
406 * Usages are pagecache.
407 * Then it creates A/G an creates a significant
408 * memory pressure in it.
410 * Then it checks actual memory usages and expects that:
411 * A/B memory.current ~= 50M
412 * A/B/ memory.current ~= 33M
413 * A/B/D memory.current ~= 17M
414 * A/B/E memory.current ~= 0
416 * After that it tries to allocate more than there is
417 * unprotected memory in A available,
418 * and checks low and oom events in memory.events.
420 static int test_memcg_low(const char *root)
423 char *parent[3] = {NULL};
424 char *children[4] = {NULL};
434 parent[0] = cg_name(root, "memcg_test_0");
438 parent[1] = cg_name(parent[0], "memcg_test_1");
442 parent[2] = cg_name(parent[0], "memcg_test_2");
446 if (cg_create(parent[0]))
449 if (cg_read_long(parent[0], "memory.low"))
452 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
455 if (cg_write(parent[0], "memory.max", "200M"))
458 if (cg_write(parent[0], "memory.swap.max", "0"))
461 if (cg_create(parent[1]))
464 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
467 if (cg_create(parent[2]))
470 for (i = 0; i < ARRAY_SIZE(children); i++) {
471 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
475 if (cg_create(children[i]))
481 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
485 if (cg_write(parent[0], "memory.low", "50M"))
487 if (cg_write(parent[1], "memory.low", "50M"))
489 if (cg_write(children[0], "memory.low", "75M"))
491 if (cg_write(children[1], "memory.low", "25M"))
493 if (cg_write(children[2], "memory.low", "500M"))
495 if (cg_write(children[3], "memory.low", "0"))
498 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
501 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
504 for (i = 0; i < ARRAY_SIZE(children); i++)
505 c[i] = cg_read_long(children[i], "memory.current");
507 if (!values_close(c[0], MB(33), 10))
510 if (!values_close(c[1], MB(17), 10))
513 if (!values_close(c[2], 0, 1))
516 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
518 "memory.low prevents from allocating anon memory\n");
522 for (i = 0; i < ARRAY_SIZE(children); i++) {
523 oom = cg_read_key_long(children[i], "memory.events", "oom ");
524 low = cg_read_key_long(children[i], "memory.events", "low ");
528 if (i < 2 && low <= 0)
537 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
541 cg_destroy(children[i]);
545 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
549 cg_destroy(parent[i]);
556 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
558 size_t size = MB(50);
567 if (alloc_pagecache(fd, size))
570 current = cg_read_long(cgroup, "memory.current");
571 if (current <= MB(29) || current > MB(30))
583 * This test checks that memory.high limits the amount of
584 * memory which can be consumed by either anonymous memory
587 static int test_memcg_high(const char *root)
593 memcg = cg_name(root, "memcg_test");
597 if (cg_create(memcg))
600 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
603 if (cg_write(memcg, "memory.swap.max", "0"))
606 if (cg_write(memcg, "memory.high", "30M"))
609 if (cg_run(memcg, alloc_anon, (void *)MB(100)))
612 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
615 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
618 high = cg_read_key_long(memcg, "memory.events", "high ");
632 * This test checks that memory.max limits the amount of
633 * memory which can be consumed by either anonymous memory
636 static int test_memcg_max(const char *root)
642 memcg = cg_name(root, "memcg_test");
646 if (cg_create(memcg))
649 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
652 if (cg_write(memcg, "memory.swap.max", "0"))
655 if (cg_write(memcg, "memory.max", "30M"))
658 /* Should be killed by OOM killer */
659 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
662 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
665 current = cg_read_long(memcg, "memory.current");
666 if (current > MB(30) || !current)
669 max = cg_read_key_long(memcg, "memory.events", "max ");
682 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
684 long mem_max = (long)arg;
685 size_t size = MB(50);
687 long mem_current, swap_current;
691 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
694 mem_current = cg_read_long(cgroup, "memory.current");
695 if (!mem_current || !values_close(mem_current, mem_max, 3))
698 swap_current = cg_read_long(cgroup, "memory.swap.current");
700 !values_close(mem_current + swap_current, size, 3))
710 * This test checks that memory.swap.max limits the amount of
711 * anonymous memory which can be swapped out.
713 static int test_memcg_swap_max(const char *root)
719 if (!is_swap_enabled())
722 memcg = cg_name(root, "memcg_test");
726 if (cg_create(memcg))
729 if (cg_read_long(memcg, "memory.swap.current")) {
734 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
737 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
740 if (cg_write(memcg, "memory.swap.max", "30M"))
743 if (cg_write(memcg, "memory.max", "30M"))
746 /* Should be killed by OOM killer */
747 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
750 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
753 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
756 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
759 max = cg_read_key_long(memcg, "memory.events", "max ");
773 * This test disables swapping and tries to allocate anonymous memory
774 * up to OOM. Then it checks for oom and oom_kill events in
777 static int test_memcg_oom_events(const char *root)
782 memcg = cg_name(root, "memcg_test");
786 if (cg_create(memcg))
789 if (cg_write(memcg, "memory.max", "30M"))
792 if (cg_write(memcg, "memory.swap.max", "0"))
795 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
798 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
801 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
804 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
816 struct tcp_server_args {
821 static int tcp_server(const char *cgroup, void *arg)
823 struct tcp_server_args *srv_args = arg;
824 struct sockaddr_in6 saddr = { 0 };
825 socklen_t slen = sizeof(saddr);
826 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
828 close(srv_args->ctl[0]);
829 ctl_fd = srv_args->ctl[1];
831 saddr.sin6_family = AF_INET6;
832 saddr.sin6_addr = in6addr_any;
833 saddr.sin6_port = htons(srv_args->port);
835 sk = socket(AF_INET6, SOCK_STREAM, 0);
839 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
842 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
843 write(ctl_fd, &errno, sizeof(errno));
851 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
856 client_sk = accept(sk, NULL, NULL);
862 uint8_t buf[0x100000];
864 if (write(client_sk, buf, sizeof(buf)) <= 0) {
865 if (errno == ECONNRESET)
878 static int tcp_client(const char *cgroup, unsigned short port)
880 const char server[] = "localhost";
883 int retries = 0x10; /* nice round number */
886 snprintf(servport, sizeof(servport), "%hd", port);
887 ret = getaddrinfo(server, servport, NULL, &ai);
891 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
895 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
901 uint8_t buf[0x100000];
904 if (read(sk, buf, sizeof(buf)) <= 0)
907 current = cg_read_long(cgroup, "memory.current");
908 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
910 if (current < 0 || sock < 0)
916 if (values_close(current, sock, 10)) {
930 * This test checks socket memory accounting.
931 * The test forks a TCP server listens on a random port between 1000
932 * and 61000. Once it gets a client connection, it starts writing to
934 * The TCP client interleaves reads from the socket with check whether
935 * memory.current and memory.stat.sock are similar.
937 static int test_memcg_sock(const char *root)
939 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
943 memcg = cg_name(root, "memcg_test");
947 if (cg_create(memcg))
950 while (bind_retries--) {
951 struct tcp_server_args args;
956 port = args.port = 1000 + rand() % 60000;
958 pid = cg_run_nowait(memcg, tcp_server, &args);
963 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
969 if (err != EADDRINUSE)
972 waitpid(pid, NULL, 0);
975 if (err == EADDRINUSE) {
980 if (tcp_client(memcg, port) != KSFT_PASS)
983 waitpid(pid, &err, 0);
984 if (WEXITSTATUS(err))
987 if (cg_read_long(memcg, "memory.current") < 0)
990 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1003 * This test disables swapping and tries to allocate anonymous memory
1004 * up to OOM with memory.group.oom set. Then it checks that all
1005 * processes in the leaf (but not the parent) were killed.
1007 static int test_memcg_oom_group_leaf_events(const char *root)
1009 int ret = KSFT_FAIL;
1010 char *parent, *child;
1012 parent = cg_name(root, "memcg_test_0");
1013 child = cg_name(root, "memcg_test_0/memcg_test_1");
1015 if (!parent || !child)
1018 if (cg_create(parent))
1021 if (cg_create(child))
1024 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1027 if (cg_write(child, "memory.max", "50M"))
1030 if (cg_write(child, "memory.swap.max", "0"))
1033 if (cg_write(child, "memory.oom.group", "1"))
1036 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1037 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1038 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1039 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1042 if (cg_test_proc_killed(child))
1045 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1048 if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1065 * This test disables swapping and tries to allocate anonymous memory
1066 * up to OOM with memory.group.oom set. Then it checks that all
1067 * processes in the parent and leaf were killed.
1069 static int test_memcg_oom_group_parent_events(const char *root)
1071 int ret = KSFT_FAIL;
1072 char *parent, *child;
1074 parent = cg_name(root, "memcg_test_0");
1075 child = cg_name(root, "memcg_test_0/memcg_test_1");
1077 if (!parent || !child)
1080 if (cg_create(parent))
1083 if (cg_create(child))
1086 if (cg_write(parent, "memory.max", "80M"))
1089 if (cg_write(parent, "memory.swap.max", "0"))
1092 if (cg_write(parent, "memory.oom.group", "1"))
1095 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1096 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1097 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1099 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1102 if (cg_test_proc_killed(child))
1104 if (cg_test_proc_killed(parent))
1121 * This test disables swapping and tries to allocate anonymous memory
1122 * up to OOM with memory.group.oom set. Then it checks that all
1123 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1125 static int test_memcg_oom_group_score_events(const char *root)
1127 int ret = KSFT_FAIL;
1131 memcg = cg_name(root, "memcg_test_0");
1136 if (cg_create(memcg))
1139 if (cg_write(memcg, "memory.max", "50M"))
1142 if (cg_write(memcg, "memory.swap.max", "0"))
1145 if (cg_write(memcg, "memory.oom.group", "1"))
1148 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1152 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1153 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1156 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1159 if (kill(safe_pid, SIGKILL))
1173 #define T(x) { x, #x }
1175 int (*fn)(const char *root);
1178 T(test_memcg_subtree_control),
1179 T(test_memcg_current),
1184 T(test_memcg_oom_events),
1185 T(test_memcg_swap_max),
1187 T(test_memcg_oom_group_leaf_events),
1188 T(test_memcg_oom_group_parent_events),
1189 T(test_memcg_oom_group_score_events),
1193 int main(int argc, char **argv)
1195 char root[PATH_MAX];
1196 int i, ret = EXIT_SUCCESS;
1198 if (cg_find_unified_root(root, sizeof(root)))
1199 ksft_exit_skip("cgroup v2 isn't mounted\n");
1202 * Check that memory controller is available:
1203 * memory is listed in cgroup.controllers
1205 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1206 ksft_exit_skip("memory controller isn't available\n");
1208 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1209 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1210 ksft_exit_skip("Failed to set memory controller\n");
1212 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1213 switch (tests[i].fn(root)) {
1215 ksft_test_result_pass("%s\n", tests[i].name);
1218 ksft_test_result_skip("%s\n", tests[i].name);
1222 ksft_test_result_fail("%s\n", tests[i].name);