1 /* SPDX-License-Identifier: GPL-2.0 */
4 #include <linux/limits.h>
11 #include <sys/types.h>
13 #include <sys/socket.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
25 * This test creates two nested cgroups with and without enabling
26 * the memory controller.
28 static int test_memcg_subtree_control(const char *root)
30 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 /* Create two nested cgroups with the memory controller enabled */
35 parent = cg_name(root, "memcg_test_0");
36 child = cg_name(root, "memcg_test_0/memcg_test_1");
37 if (!parent || !child)
40 if (cg_create(parent))
43 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
49 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
52 /* Create two nested cgroups without enabling memory controller */
53 parent2 = cg_name(root, "memcg_test_1");
54 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
55 if (!parent2 || !child2)
58 if (cg_create(parent2))
61 if (cg_create(child2))
64 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
67 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
90 static int alloc_anon_50M_check(const char *cgroup, void *arg)
98 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
101 current = cg_read_long(cgroup, "memory.current");
105 if (!values_close(size, current, 3))
108 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
112 if (!values_close(anon, current, 3))
121 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
123 size_t size = MB(50);
132 if (alloc_pagecache(fd, size))
135 current = cg_read_long(cgroup, "memory.current");
139 file = cg_read_key_long(cgroup, "memory.stat", "file ");
143 if (!values_close(file, current, 10))
154 * This test create a memory cgroup, allocates
155 * some anonymous memory and some pagecache
156 * and check memory.current and some memory.stat values.
158 static int test_memcg_current(const char *root)
164 memcg = cg_name(root, "memcg_test");
168 if (cg_create(memcg))
171 current = cg_read_long(memcg, "memory.current");
175 if (cg_run(memcg, alloc_anon_50M_check, NULL))
178 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
190 static int alloc_pagecache_50M(const char *cgroup, void *arg)
194 return alloc_pagecache(fd, MB(50));
197 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
200 int ppid = getppid();
202 if (alloc_pagecache(fd, MB(50)))
205 while (getppid() == ppid)
211 static int alloc_anon_noexit(const char *cgroup, void *arg)
213 int ppid = getppid();
215 if (alloc_anon(cgroup, arg))
218 while (getppid() == ppid)
225 * Wait until processes are killed asynchronously by the OOM killer
226 * If we exceed a timeout, fail.
228 static int cg_test_proc_killed(const char *cgroup)
232 for (limit = 10; limit > 0; limit--) {
233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
242 * First, this test creates the following hierarchy:
243 * A memory.min = 50M, memory.max = 200M
244 * A/B memory.min = 50M, memory.current = 50M
245 * A/B/C memory.min = 75M, memory.current = 50M
246 * A/B/D memory.min = 25M, memory.current = 50M
247 * A/B/E memory.min = 500M, memory.current = 0
248 * A/B/F memory.min = 0, memory.current = 50M
250 * Usages are pagecache, but the test keeps a running
251 * process in every leaf cgroup.
252 * Then it creates A/G and creates a significant
253 * memory pressure in it.
255 * A/B memory.current ~= 50M
256 * A/B/C memory.current ~= 33M
257 * A/B/D memory.current ~= 17M
258 * A/B/E memory.current ~= 0
260 * After that it tries to allocate more than there is
261 * unprotected memory in A available, and checks
262 * checks that memory.min protects pagecache even
265 static int test_memcg_min(const char *root)
268 char *parent[3] = {NULL};
269 char *children[4] = {NULL};
278 parent[0] = cg_name(root, "memcg_test_0");
282 parent[1] = cg_name(parent[0], "memcg_test_1");
286 parent[2] = cg_name(parent[0], "memcg_test_2");
290 if (cg_create(parent[0]))
293 if (cg_read_long(parent[0], "memory.min")) {
298 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
301 if (cg_write(parent[0], "memory.max", "200M"))
304 if (cg_write(parent[0], "memory.swap.max", "0"))
307 if (cg_create(parent[1]))
310 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
313 if (cg_create(parent[2]))
316 for (i = 0; i < ARRAY_SIZE(children); i++) {
317 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
321 if (cg_create(children[i]))
327 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
331 if (cg_write(parent[0], "memory.min", "50M"))
333 if (cg_write(parent[1], "memory.min", "50M"))
335 if (cg_write(children[0], "memory.min", "75M"))
337 if (cg_write(children[1], "memory.min", "25M"))
339 if (cg_write(children[2], "memory.min", "500M"))
341 if (cg_write(children[3], "memory.min", "0"))
345 while (!values_close(cg_read_long(parent[1], "memory.current"),
352 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
355 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
358 for (i = 0; i < ARRAY_SIZE(children); i++)
359 c[i] = cg_read_long(children[i], "memory.current");
361 if (!values_close(c[0], MB(33), 10))
364 if (!values_close(c[1], MB(17), 10))
367 if (!values_close(c[2], 0, 1))
370 if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
373 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
379 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
383 cg_destroy(children[i]);
387 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
391 cg_destroy(parent[i]);
399 * First, this test creates the following hierarchy:
400 * A memory.low = 50M, memory.max = 200M
401 * A/B memory.low = 50M, memory.current = 50M
402 * A/B/C memory.low = 75M, memory.current = 50M
403 * A/B/D memory.low = 25M, memory.current = 50M
404 * A/B/E memory.low = 500M, memory.current = 0
405 * A/B/F memory.low = 0, memory.current = 50M
407 * Usages are pagecache.
408 * Then it creates A/G an creates a significant
409 * memory pressure in it.
411 * Then it checks actual memory usages and expects that:
412 * A/B memory.current ~= 50M
413 * A/B/ memory.current ~= 33M
414 * A/B/D memory.current ~= 17M
415 * A/B/E memory.current ~= 0
417 * After that it tries to allocate more than there is
418 * unprotected memory in A available,
419 * and checks low and oom events in memory.events.
421 static int test_memcg_low(const char *root)
424 char *parent[3] = {NULL};
425 char *children[4] = {NULL};
435 parent[0] = cg_name(root, "memcg_test_0");
439 parent[1] = cg_name(parent[0], "memcg_test_1");
443 parent[2] = cg_name(parent[0], "memcg_test_2");
447 if (cg_create(parent[0]))
450 if (cg_read_long(parent[0], "memory.low"))
453 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
456 if (cg_write(parent[0], "memory.max", "200M"))
459 if (cg_write(parent[0], "memory.swap.max", "0"))
462 if (cg_create(parent[1]))
465 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
468 if (cg_create(parent[2]))
471 for (i = 0; i < ARRAY_SIZE(children); i++) {
472 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
476 if (cg_create(children[i]))
482 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
486 if (cg_write(parent[0], "memory.low", "50M"))
488 if (cg_write(parent[1], "memory.low", "50M"))
490 if (cg_write(children[0], "memory.low", "75M"))
492 if (cg_write(children[1], "memory.low", "25M"))
494 if (cg_write(children[2], "memory.low", "500M"))
496 if (cg_write(children[3], "memory.low", "0"))
499 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
502 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
505 for (i = 0; i < ARRAY_SIZE(children); i++)
506 c[i] = cg_read_long(children[i], "memory.current");
508 if (!values_close(c[0], MB(33), 10))
511 if (!values_close(c[1], MB(17), 10))
514 if (!values_close(c[2], 0, 1))
517 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
519 "memory.low prevents from allocating anon memory\n");
523 for (i = 0; i < ARRAY_SIZE(children); i++) {
524 oom = cg_read_key_long(children[i], "memory.events", "oom ");
525 low = cg_read_key_long(children[i], "memory.events", "low ");
529 if (i < 2 && low <= 0)
538 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
542 cg_destroy(children[i]);
546 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
550 cg_destroy(parent[i]);
557 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
559 size_t size = MB(50);
568 if (alloc_pagecache(fd, size))
571 current = cg_read_long(cgroup, "memory.current");
572 if (current <= MB(29) || current > MB(30))
584 * This test checks that memory.high limits the amount of
585 * memory which can be consumed by either anonymous memory
588 static int test_memcg_high(const char *root)
594 memcg = cg_name(root, "memcg_test");
598 if (cg_create(memcg))
601 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
604 if (cg_write(memcg, "memory.swap.max", "0"))
607 if (cg_write(memcg, "memory.high", "30M"))
610 if (cg_run(memcg, alloc_anon, (void *)MB(100)))
613 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
616 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
619 high = cg_read_key_long(memcg, "memory.events", "high ");
632 static int alloc_anon_mlock(const char *cgroup, void *arg)
634 size_t size = (size_t)arg;
637 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
639 if (buf == MAP_FAILED)
648 * This test checks that memory.high is able to throttle big single shot
649 * allocation i.e. large allocation within one kernel entry.
651 static int test_memcg_high_sync(const char *root)
653 int ret = KSFT_FAIL, pid, fd = -1;
655 long pre_high, pre_max;
656 long post_high, post_max;
658 memcg = cg_name(root, "memcg_test");
662 if (cg_create(memcg))
665 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
666 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
667 if (pre_high < 0 || pre_max < 0)
670 if (cg_write(memcg, "memory.swap.max", "0"))
673 if (cg_write(memcg, "memory.high", "30M"))
676 if (cg_write(memcg, "memory.max", "140M"))
679 fd = memcg_prepare_for_wait(memcg);
683 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
689 post_high = cg_read_key_long(memcg, "memory.events", "high ");
690 post_max = cg_read_key_long(memcg, "memory.events", "max ");
691 if (post_high < 0 || post_max < 0)
694 if (pre_high == post_high || pre_max != post_max)
709 * This test checks that memory.max limits the amount of
710 * memory which can be consumed by either anonymous memory
713 static int test_memcg_max(const char *root)
719 memcg = cg_name(root, "memcg_test");
723 if (cg_create(memcg))
726 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
729 if (cg_write(memcg, "memory.swap.max", "0"))
732 if (cg_write(memcg, "memory.max", "30M"))
735 /* Should be killed by OOM killer */
736 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
739 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
742 current = cg_read_long(memcg, "memory.current");
743 if (current > MB(30) || !current)
746 max = cg_read_key_long(memcg, "memory.events", "max ");
759 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
761 long mem_max = (long)arg;
762 size_t size = MB(50);
764 long mem_current, swap_current;
768 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
771 mem_current = cg_read_long(cgroup, "memory.current");
772 if (!mem_current || !values_close(mem_current, mem_max, 3))
775 swap_current = cg_read_long(cgroup, "memory.swap.current");
777 !values_close(mem_current + swap_current, size, 3))
787 * This test checks that memory.swap.max limits the amount of
788 * anonymous memory which can be swapped out.
790 static int test_memcg_swap_max(const char *root)
796 if (!is_swap_enabled())
799 memcg = cg_name(root, "memcg_test");
803 if (cg_create(memcg))
806 if (cg_read_long(memcg, "memory.swap.current")) {
811 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
814 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
817 if (cg_write(memcg, "memory.swap.max", "30M"))
820 if (cg_write(memcg, "memory.max", "30M"))
823 /* Should be killed by OOM killer */
824 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
827 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
830 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
833 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
836 max = cg_read_key_long(memcg, "memory.events", "max ");
850 * This test disables swapping and tries to allocate anonymous memory
851 * up to OOM. Then it checks for oom and oom_kill events in
854 static int test_memcg_oom_events(const char *root)
859 memcg = cg_name(root, "memcg_test");
863 if (cg_create(memcg))
866 if (cg_write(memcg, "memory.max", "30M"))
869 if (cg_write(memcg, "memory.swap.max", "0"))
872 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
875 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
878 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
881 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
893 struct tcp_server_args {
898 static int tcp_server(const char *cgroup, void *arg)
900 struct tcp_server_args *srv_args = arg;
901 struct sockaddr_in6 saddr = { 0 };
902 socklen_t slen = sizeof(saddr);
903 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
905 close(srv_args->ctl[0]);
906 ctl_fd = srv_args->ctl[1];
908 saddr.sin6_family = AF_INET6;
909 saddr.sin6_addr = in6addr_any;
910 saddr.sin6_port = htons(srv_args->port);
912 sk = socket(AF_INET6, SOCK_STREAM, 0);
916 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
919 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
920 write(ctl_fd, &errno, sizeof(errno));
928 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
933 client_sk = accept(sk, NULL, NULL);
939 uint8_t buf[0x100000];
941 if (write(client_sk, buf, sizeof(buf)) <= 0) {
942 if (errno == ECONNRESET)
955 static int tcp_client(const char *cgroup, unsigned short port)
957 const char server[] = "localhost";
960 int retries = 0x10; /* nice round number */
963 snprintf(servport, sizeof(servport), "%hd", port);
964 ret = getaddrinfo(server, servport, NULL, &ai);
968 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
972 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
978 uint8_t buf[0x100000];
981 if (read(sk, buf, sizeof(buf)) <= 0)
984 current = cg_read_long(cgroup, "memory.current");
985 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
987 if (current < 0 || sock < 0)
993 if (values_close(current, sock, 10)) {
1007 * This test checks socket memory accounting.
1008 * The test forks a TCP server listens on a random port between 1000
1009 * and 61000. Once it gets a client connection, it starts writing to
1011 * The TCP client interleaves reads from the socket with check whether
1012 * memory.current and memory.stat.sock are similar.
1014 static int test_memcg_sock(const char *root)
1016 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1017 unsigned short port;
1020 memcg = cg_name(root, "memcg_test");
1024 if (cg_create(memcg))
1027 while (bind_retries--) {
1028 struct tcp_server_args args;
1033 port = args.port = 1000 + rand() % 60000;
1035 pid = cg_run_nowait(memcg, tcp_server, &args);
1040 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1046 if (err != EADDRINUSE)
1049 waitpid(pid, NULL, 0);
1052 if (err == EADDRINUSE) {
1057 if (tcp_client(memcg, port) != KSFT_PASS)
1060 waitpid(pid, &err, 0);
1061 if (WEXITSTATUS(err))
1064 if (cg_read_long(memcg, "memory.current") < 0)
1067 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1080 * This test disables swapping and tries to allocate anonymous memory
1081 * up to OOM with memory.group.oom set. Then it checks that all
1082 * processes in the leaf were killed. It also checks that oom_events
1083 * were propagated to the parent level.
1085 static int test_memcg_oom_group_leaf_events(const char *root)
1087 int ret = KSFT_FAIL;
1088 char *parent, *child;
1090 parent = cg_name(root, "memcg_test_0");
1091 child = cg_name(root, "memcg_test_0/memcg_test_1");
1093 if (!parent || !child)
1096 if (cg_create(parent))
1099 if (cg_create(child))
1102 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1105 if (cg_write(child, "memory.max", "50M"))
1108 if (cg_write(child, "memory.swap.max", "0"))
1111 if (cg_write(child, "memory.oom.group", "1"))
1114 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1115 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1116 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1117 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1120 if (cg_test_proc_killed(child))
1123 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1126 if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0)
1143 * This test disables swapping and tries to allocate anonymous memory
1144 * up to OOM with memory.group.oom set. Then it checks that all
1145 * processes in the parent and leaf were killed.
1147 static int test_memcg_oom_group_parent_events(const char *root)
1149 int ret = KSFT_FAIL;
1150 char *parent, *child;
1152 parent = cg_name(root, "memcg_test_0");
1153 child = cg_name(root, "memcg_test_0/memcg_test_1");
1155 if (!parent || !child)
1158 if (cg_create(parent))
1161 if (cg_create(child))
1164 if (cg_write(parent, "memory.max", "80M"))
1167 if (cg_write(parent, "memory.swap.max", "0"))
1170 if (cg_write(parent, "memory.oom.group", "1"))
1173 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1174 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1175 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1177 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1180 if (cg_test_proc_killed(child))
1182 if (cg_test_proc_killed(parent))
1199 * This test disables swapping and tries to allocate anonymous memory
1200 * up to OOM with memory.group.oom set. Then it checks that all
1201 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1203 static int test_memcg_oom_group_score_events(const char *root)
1205 int ret = KSFT_FAIL;
1209 memcg = cg_name(root, "memcg_test_0");
1214 if (cg_create(memcg))
1217 if (cg_write(memcg, "memory.max", "50M"))
1220 if (cg_write(memcg, "memory.swap.max", "0"))
1223 if (cg_write(memcg, "memory.oom.group", "1"))
1226 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1227 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1230 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1231 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1234 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1237 if (kill(safe_pid, SIGKILL))
1251 #define T(x) { x, #x }
1253 int (*fn)(const char *root);
1256 T(test_memcg_subtree_control),
1257 T(test_memcg_current),
1261 T(test_memcg_high_sync),
1263 T(test_memcg_oom_events),
1264 T(test_memcg_swap_max),
1266 T(test_memcg_oom_group_leaf_events),
1267 T(test_memcg_oom_group_parent_events),
1268 T(test_memcg_oom_group_score_events),
1272 int main(int argc, char **argv)
1274 char root[PATH_MAX];
1275 int i, ret = EXIT_SUCCESS;
1277 if (cg_find_unified_root(root, sizeof(root)))
1278 ksft_exit_skip("cgroup v2 isn't mounted\n");
1281 * Check that memory controller is available:
1282 * memory is listed in cgroup.controllers
1284 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1285 ksft_exit_skip("memory controller isn't available\n");
1287 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1288 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1289 ksft_exit_skip("Failed to set memory controller\n");
1291 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1292 switch (tests[i].fn(root)) {
1294 ksft_test_result_pass("%s\n", tests[i].name);
1297 ksft_test_result_skip("%s\n", tests[i].name);
1301 ksft_test_result_fail("%s\n", tests[i].name);