1 // SPDX-License-Identifier: GPL-2.0
7 #include <linux/types.h>
14 #include <sys/prctl.h>
17 #include <sys/socket.h>
19 #include <linux/kcmp.h>
22 #include "../clone3/clone3_selftests.h"
23 #include "../kselftest.h"
24 #include "../kselftest_harness.h"
38 const struct ns_info {
42 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
43 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
44 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
45 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
46 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
47 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
48 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
49 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
52 FIXTURE(current_nsset)
56 int nsfds[PIDFD_NS_MAX];
58 pid_t child_pid_exited;
59 int child_pidfd_exited;
63 int child_nsfds1[PIDFD_NS_MAX];
67 int child_nsfds2[PIDFD_NS_MAX];
70 static int sys_waitid(int which, pid_t pid, int options)
72 return syscall(__NR_waitid, which, pid, NULL, options, NULL);
75 pid_t create_child(int *pidfd, unsigned flags)
77 struct clone_args args = {
78 .flags = CLONE_PIDFD | flags,
79 .exit_signal = SIGCHLD,
80 .pidfd = ptr_to_u64(pidfd),
83 return sys_clone3(&args, sizeof(struct clone_args));
86 FIXTURE_SETUP(current_nsset)
90 for (i = 0; i < PIDFD_NS_MAX; i++) {
91 self->nsfds[i] = -EBADF;
92 self->child_nsfds1[i] = -EBADF;
93 self->child_nsfds2[i] = -EBADF;
96 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
97 ASSERT_GE(proc_fd, 0) {
98 TH_LOG("%m - Failed to open /proc/self/ns");
101 self->pid = getpid();
102 for (i = 0; i < PIDFD_NS_MAX; i++) {
103 const struct ns_info *info = &ns_info[i];
104 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
105 if (self->nsfds[i] < 0) {
106 EXPECT_EQ(errno, ENOENT) {
107 TH_LOG("%m - Failed to open %s namespace for process %d",
108 info->name, self->pid);
113 self->pidfd = sys_pidfd_open(self->pid, 0);
114 EXPECT_GT(self->pidfd, 0) {
115 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
118 /* Create task that exits right away. */
119 self->child_pid_exited = create_child(&self->child_pidfd_exited,
120 CLONE_NEWUSER | CLONE_NEWNET);
121 EXPECT_GT(self->child_pid_exited, 0);
123 if (self->child_pid_exited == 0)
126 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
128 self->pidfd = sys_pidfd_open(self->pid, 0);
129 EXPECT_GE(self->pidfd, 0) {
130 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
133 /* Create tasks that will be stopped. */
134 self->child_pid1 = create_child(&self->child_pidfd1,
135 CLONE_NEWUSER | CLONE_NEWNS |
136 CLONE_NEWCGROUP | CLONE_NEWIPC |
137 CLONE_NEWUTS | CLONE_NEWPID |
139 EXPECT_GE(self->child_pid1, 0);
141 if (self->child_pid1 == 0) {
146 self->child_pid2 = create_child(&self->child_pidfd2,
147 CLONE_NEWUSER | CLONE_NEWNS |
148 CLONE_NEWCGROUP | CLONE_NEWIPC |
149 CLONE_NEWUTS | CLONE_NEWPID |
151 EXPECT_GE(self->child_pid2, 0);
153 if (self->child_pid2 == 0) {
158 for (i = 0; i < PIDFD_NS_MAX; i++) {
161 const struct ns_info *info = &ns_info[i];
163 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
164 if (self->nsfds[i] < 0) {
165 EXPECT_EQ(errno, ENOENT) {
166 TH_LOG("%m - Failed to open %s namespace for process %d",
167 info->name, self->pid);
171 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
172 self->child_pid1, info->name);
174 EXPECT_LT(ret, sizeof(p));
176 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
177 if (self->child_nsfds1[i] < 0) {
178 EXPECT_EQ(errno, ENOENT) {
179 TH_LOG("%m - Failed to open %s namespace for process %d",
180 info->name, self->child_pid1);
184 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
185 self->child_pid2, info->name);
187 EXPECT_LT(ret, sizeof(p));
189 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
190 if (self->child_nsfds2[i] < 0) {
191 EXPECT_EQ(errno, ENOENT) {
192 TH_LOG("%m - Failed to open %s namespace for process %d",
193 info->name, self->child_pid1);
201 FIXTURE_TEARDOWN(current_nsset)
205 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
206 SIGKILL, NULL, 0), 0);
207 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
208 SIGKILL, NULL, 0), 0);
210 for (i = 0; i < PIDFD_NS_MAX; i++) {
211 if (self->nsfds[i] >= 0)
212 close(self->nsfds[i]);
213 if (self->child_nsfds1[i] >= 0)
214 close(self->child_nsfds1[i]);
215 if (self->child_nsfds2[i] >= 0)
216 close(self->child_nsfds2[i]);
219 if (self->child_pidfd1 >= 0)
220 EXPECT_EQ(0, close(self->child_pidfd1));
221 if (self->child_pidfd2 >= 0)
222 EXPECT_EQ(0, close(self->child_pidfd2));
223 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
224 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
225 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
228 static int preserve_ns(const int pid, const char *ns)
233 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
234 if (ret < 0 || (size_t)ret >= sizeof(path))
237 return open(path, O_RDONLY | O_CLOEXEC);
240 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
244 struct stat ns_st1, ns_st2;
246 ret = fstat(ns_fd1, &ns_st1);
250 ns_fd2 = preserve_ns(pid2, ns);
254 ret = fstat(ns_fd2, &ns_st2);
259 /* processes are in the same namespace */
260 if ((ns_st1.st_dev == ns_st2.st_dev) &&
261 (ns_st1.st_ino == ns_st2.st_ino))
264 /* processes are in different namespaces */
268 /* Test that we can't pass garbage to the kernel. */
269 TEST_F(current_nsset, invalid_flags)
271 ASSERT_NE(setns(self->pidfd, 0), 0);
272 EXPECT_EQ(errno, EINVAL);
274 ASSERT_NE(setns(self->pidfd, -1), 0);
275 EXPECT_EQ(errno, EINVAL);
277 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
278 EXPECT_EQ(errno, EINVAL);
280 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
281 EXPECT_EQ(errno, EINVAL);
284 /* Test that we can't attach to a task that has already exited. */
285 TEST_F(current_nsset, pidfd_exited_child)
290 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
292 EXPECT_EQ(errno, ESRCH);
295 for (i = 0; i < PIDFD_NS_MAX; i++) {
296 const struct ns_info *info = &ns_info[i];
297 /* Verify that we haven't changed any namespaces. */
298 if (self->nsfds[i] >= 0)
299 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
303 TEST_F(current_nsset, pidfd_incremental_setns)
309 for (i = 0; i < PIDFD_NS_MAX; i++) {
310 const struct ns_info *info = &ns_info[i];
313 if (self->child_nsfds1[i] < 0)
317 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
318 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
319 info->name, self->child_pid1,
324 /* Verify that we have changed to the correct namespaces. */
325 if (info->flag == CLONE_NEWPID)
326 nsfd = self->nsfds[i];
328 nsfd = self->child_nsfds1[i];
329 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
330 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
331 info->name, self->child_pid1,
334 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
335 info->name, self->child_pid1, self->child_pidfd1);
339 TEST_F(current_nsset, nsfd_incremental_setns)
345 for (i = 0; i < PIDFD_NS_MAX; i++) {
346 const struct ns_info *info = &ns_info[i];
349 if (self->child_nsfds1[i] < 0)
353 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
354 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
355 info->name, self->child_pid1,
356 self->child_nsfds1[i]);
360 /* Verify that we have changed to the correct namespaces. */
361 if (info->flag == CLONE_NEWPID)
362 nsfd = self->nsfds[i];
364 nsfd = self->child_nsfds1[i];
365 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
366 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
367 info->name, self->child_pid1,
368 self->child_nsfds1[i]);
370 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
371 info->name, self->child_pid1, self->child_nsfds1[i]);
375 TEST_F(current_nsset, pidfd_one_shot_setns)
381 for (i = 0; i < PIDFD_NS_MAX; i++) {
382 const struct ns_info *info = &ns_info[i];
384 if (self->child_nsfds1[i] < 0)
388 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
389 info->name, self->child_pid1);
392 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
393 TH_LOG("%m - Failed to setns to namespaces of %d",
398 for (i = 0; i < PIDFD_NS_MAX; i++) {
399 const struct ns_info *info = &ns_info[i];
402 if (self->child_nsfds1[i] < 0)
405 /* Verify that we have changed to the correct namespaces. */
406 if (info->flag == CLONE_NEWPID)
407 nsfd = self->nsfds[i];
409 nsfd = self->child_nsfds1[i];
410 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
411 TH_LOG("setns failed to place us correctly into %s namespace of %d",
412 info->name, self->child_pid1);
414 TH_LOG("Managed to correctly setns to %s namespace of %d",
415 info->name, self->child_pid1);
419 TEST_F(current_nsset, no_foul_play)
424 for (i = 0; i < PIDFD_NS_MAX; i++) {
425 const struct ns_info *info = &ns_info[i];
427 if (self->child_nsfds1[i] < 0)
431 if (info->flag) /* No use logging pid_for_children. */
432 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
433 info->name, self->child_pid1);
436 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
437 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
438 self->child_pid1, self->child_pidfd1);
442 * Can't setns to a user namespace outside of our hierarchy since we
443 * don't have caps in there and didn't create it. That means that under
444 * no circumstances should we be able to setns to any of the other
445 * ones since they aren't owned by our user namespace.
447 for (i = 0; i < PIDFD_NS_MAX; i++) {
448 const struct ns_info *info = &ns_info[i];
450 if (self->child_nsfds2[i] < 0 || !info->flag)
453 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
454 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
455 info->name, self->child_pid2,
458 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
459 info->name, self->child_pid2,
462 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
463 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
464 info->name, self->child_pid2,
465 self->child_nsfds2[i]);
467 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
468 info->name, self->child_pid2,
469 self->child_nsfds2[i]);
477 fd = sys_memfd_create("rostock", 0);
480 ASSERT_NE(setns(fd, 0), 0);
481 EXPECT_EQ(errno, EINVAL);