Merge tag 'linux-kselftest-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / tools / testing / selftests / pidfd / pidfd_setns_test.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <linux/types.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <syscall.h>
14 #include <sys/prctl.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19 #include <linux/kcmp.h>
20
21 #include "pidfd.h"
22 #include "../clone3/clone3_selftests.h"
23 #include "../kselftest_harness.h"
24
25 enum {
26         PIDFD_NS_USER,
27         PIDFD_NS_MNT,
28         PIDFD_NS_PID,
29         PIDFD_NS_UTS,
30         PIDFD_NS_IPC,
31         PIDFD_NS_NET,
32         PIDFD_NS_CGROUP,
33         PIDFD_NS_PIDCLD,
34         PIDFD_NS_TIME,
35         PIDFD_NS_MAX
36 };
37
38 const struct ns_info {
39         const char *name;
40         int flag;
41 } ns_info[] = {
42         [PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
43         [PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
44         [PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
45         [PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
46         [PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
47         [PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
48         [PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
49         [PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
50         [PIDFD_NS_TIME]   = { "time",             CLONE_NEWTIME,   },
51 };
52
53 FIXTURE(current_nsset)
54 {
55         pid_t pid;
56         int pidfd;
57         int nsfds[PIDFD_NS_MAX];
58
59         pid_t child_pid_exited;
60         int child_pidfd_exited;
61
62         pid_t child_pid1;
63         int child_pidfd1;
64         int child_nsfds1[PIDFD_NS_MAX];
65
66         pid_t child_pid2;
67         int child_pidfd2;
68         int child_nsfds2[PIDFD_NS_MAX];
69 };
70
71 static int sys_waitid(int which, pid_t pid, int options)
72 {
73         return syscall(__NR_waitid, which, pid, NULL, options, NULL);
74 }
75
76 pid_t create_child(int *pidfd, unsigned flags)
77 {
78         struct clone_args args = {
79                 .flags          = CLONE_PIDFD | flags,
80                 .exit_signal    = SIGCHLD,
81                 .pidfd          = ptr_to_u64(pidfd),
82         };
83
84         return sys_clone3(&args, sizeof(struct clone_args));
85 }
86
87 static bool switch_timens(void)
88 {
89         int fd, ret;
90
91         if (unshare(CLONE_NEWTIME))
92                 return false;
93
94         fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
95         if (fd < 0)
96                 return false;
97
98         ret = setns(fd, CLONE_NEWTIME);
99         close(fd);
100         return ret == 0;
101 }
102
103 static ssize_t read_nointr(int fd, void *buf, size_t count)
104 {
105         ssize_t ret;
106
107         do {
108                 ret = read(fd, buf, count);
109         } while (ret < 0 && errno == EINTR);
110
111         return ret;
112 }
113
114 static ssize_t write_nointr(int fd, const void *buf, size_t count)
115 {
116         ssize_t ret;
117
118         do {
119                 ret = write(fd, buf, count);
120         } while (ret < 0 && errno == EINTR);
121
122         return ret;
123 }
124
125 FIXTURE_SETUP(current_nsset)
126 {
127         int i, proc_fd, ret;
128         int ipc_sockets[2];
129         char c;
130
131         for (i = 0; i < PIDFD_NS_MAX; i++) {
132                 self->nsfds[i]          = -EBADF;
133                 self->child_nsfds1[i]   = -EBADF;
134                 self->child_nsfds2[i]   = -EBADF;
135         }
136
137         proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
138         ASSERT_GE(proc_fd, 0) {
139                 TH_LOG("%m - Failed to open /proc/self/ns");
140         }
141
142         self->pid = getpid();
143         for (i = 0; i < PIDFD_NS_MAX; i++) {
144                 const struct ns_info *info = &ns_info[i];
145                 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
146                 if (self->nsfds[i] < 0) {
147                         EXPECT_EQ(errno, ENOENT) {
148                                 TH_LOG("%m - Failed to open %s namespace for process %d",
149                                        info->name, self->pid);
150                         }
151                 }
152         }
153
154         self->pidfd = sys_pidfd_open(self->pid, 0);
155         EXPECT_GT(self->pidfd, 0) {
156                 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
157         }
158
159         /* Create task that exits right away. */
160         self->child_pid_exited = create_child(&self->child_pidfd_exited,
161                                               CLONE_NEWUSER | CLONE_NEWNET);
162         EXPECT_GT(self->child_pid_exited, 0);
163
164         if (self->child_pid_exited == 0)
165                 _exit(EXIT_SUCCESS);
166
167         ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
168
169         self->pidfd = sys_pidfd_open(self->pid, 0);
170         EXPECT_GE(self->pidfd, 0) {
171                 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
172         }
173
174         ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
175         EXPECT_EQ(ret, 0);
176
177         /* Create tasks that will be stopped. */
178         self->child_pid1 = create_child(&self->child_pidfd1,
179                                         CLONE_NEWUSER | CLONE_NEWNS |
180                                         CLONE_NEWCGROUP | CLONE_NEWIPC |
181                                         CLONE_NEWUTS | CLONE_NEWPID |
182                                         CLONE_NEWNET);
183         EXPECT_GE(self->child_pid1, 0);
184
185         if (self->child_pid1 == 0) {
186                 close(ipc_sockets[0]);
187
188                 if (!switch_timens())
189                         _exit(EXIT_FAILURE);
190
191                 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
192                         _exit(EXIT_FAILURE);
193
194                 close(ipc_sockets[1]);
195
196                 pause();
197                 _exit(EXIT_SUCCESS);
198         }
199
200         close(ipc_sockets[1]);
201         ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
202         close(ipc_sockets[0]);
203
204         ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
205         EXPECT_EQ(ret, 0);
206
207         self->child_pid2 = create_child(&self->child_pidfd2,
208                                         CLONE_NEWUSER | CLONE_NEWNS |
209                                         CLONE_NEWCGROUP | CLONE_NEWIPC |
210                                         CLONE_NEWUTS | CLONE_NEWPID |
211                                         CLONE_NEWNET);
212         EXPECT_GE(self->child_pid2, 0);
213
214         if (self->child_pid2 == 0) {
215                 close(ipc_sockets[0]);
216
217                 if (!switch_timens())
218                         _exit(EXIT_FAILURE);
219
220                 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
221                         _exit(EXIT_FAILURE);
222
223                 close(ipc_sockets[1]);
224
225                 pause();
226                 _exit(EXIT_SUCCESS);
227         }
228
229         close(ipc_sockets[1]);
230         ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
231         close(ipc_sockets[0]);
232
233         for (i = 0; i < PIDFD_NS_MAX; i++) {
234                 char p[100];
235
236                 const struct ns_info *info = &ns_info[i];
237
238                 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
239                 if (self->nsfds[i] < 0) {
240                         EXPECT_EQ(errno, ENOENT) {
241                                 TH_LOG("%m - Failed to open %s namespace for process %d",
242                                        info->name, self->pid);
243                         }
244                 }
245
246                 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
247                                self->child_pid1, info->name);
248                 EXPECT_GT(ret, 0);
249                 EXPECT_LT(ret, sizeof(p));
250
251                 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
252                 if (self->child_nsfds1[i] < 0) {
253                         EXPECT_EQ(errno, ENOENT) {
254                                 TH_LOG("%m - Failed to open %s namespace for process %d",
255                                        info->name, self->child_pid1);
256                         }
257                 }
258
259                 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
260                                self->child_pid2, info->name);
261                 EXPECT_GT(ret, 0);
262                 EXPECT_LT(ret, sizeof(p));
263
264                 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
265                 if (self->child_nsfds2[i] < 0) {
266                         EXPECT_EQ(errno, ENOENT) {
267                                 TH_LOG("%m - Failed to open %s namespace for process %d",
268                                        info->name, self->child_pid1);
269                         }
270                 }
271         }
272
273         close(proc_fd);
274 }
275
276 FIXTURE_TEARDOWN(current_nsset)
277 {
278         int i;
279
280         ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
281                                         SIGKILL, NULL, 0), 0);
282         ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
283                                         SIGKILL, NULL, 0), 0);
284
285         for (i = 0; i < PIDFD_NS_MAX; i++) {
286                 if (self->nsfds[i] >= 0)
287                         close(self->nsfds[i]);
288                 if (self->child_nsfds1[i] >= 0)
289                         close(self->child_nsfds1[i]);
290                 if (self->child_nsfds2[i] >= 0)
291                         close(self->child_nsfds2[i]);
292         }
293
294         if (self->child_pidfd1 >= 0)
295                 EXPECT_EQ(0, close(self->child_pidfd1));
296         if (self->child_pidfd2 >= 0)
297                 EXPECT_EQ(0, close(self->child_pidfd2));
298         ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
299         ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
300         ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
301 }
302
303 static int preserve_ns(const int pid, const char *ns)
304 {
305         int ret;
306         char path[50];
307
308         ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
309         if (ret < 0 || (size_t)ret >= sizeof(path))
310                 return -EIO;
311
312         return open(path, O_RDONLY | O_CLOEXEC);
313 }
314
315 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
316 {
317         int ns_fd2 = -EBADF;
318         int ret = -1;
319         struct stat ns_st1, ns_st2;
320
321         ret = fstat(ns_fd1, &ns_st1);
322         if (ret < 0)
323                 return -1;
324
325         ns_fd2 = preserve_ns(pid2, ns);
326         if (ns_fd2 < 0)
327                 return -1;
328
329         ret = fstat(ns_fd2, &ns_st2);
330         close(ns_fd2);
331         if (ret < 0)
332                 return -1;
333
334         /* processes are in the same namespace */
335         if ((ns_st1.st_dev == ns_st2.st_dev) &&
336             (ns_st1.st_ino == ns_st2.st_ino))
337                 return 1;
338
339         /* processes are in different namespaces */
340         return 0;
341 }
342
343 /* Test that we can't pass garbage to the kernel. */
344 TEST_F(current_nsset, invalid_flags)
345 {
346         ASSERT_NE(setns(self->pidfd, 0), 0);
347         EXPECT_EQ(errno, EINVAL);
348
349         ASSERT_NE(setns(self->pidfd, -1), 0);
350         EXPECT_EQ(errno, EINVAL);
351
352         ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
353         EXPECT_EQ(errno, EINVAL);
354
355         ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
356         EXPECT_EQ(errno, EINVAL);
357 }
358
359 /* Test that we can't attach to a task that has already exited. */
360 TEST_F(current_nsset, pidfd_exited_child)
361 {
362         int i;
363         pid_t pid;
364
365         ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
366                   0);
367         EXPECT_EQ(errno, ESRCH);
368
369         pid = getpid();
370         for (i = 0; i < PIDFD_NS_MAX; i++) {
371                 const struct ns_info *info = &ns_info[i];
372                 /* Verify that we haven't changed any namespaces. */
373                 if (self->nsfds[i] >= 0)
374                         ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
375         }
376 }
377
378 TEST_F(current_nsset, pidfd_incremental_setns)
379 {
380         int i;
381         pid_t pid;
382
383         pid = getpid();
384         for (i = 0; i < PIDFD_NS_MAX; i++) {
385                 const struct ns_info *info = &ns_info[i];
386                 int nsfd;
387
388                 if (self->child_nsfds1[i] < 0)
389                         continue;
390
391                 if (info->flag) {
392                         ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
393                                 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
394                                        info->name, self->child_pid1,
395                                        self->child_pidfd1);
396                         }
397                 }
398
399                 /* Verify that we have changed to the correct namespaces. */
400                 if (info->flag == CLONE_NEWPID)
401                         nsfd = self->nsfds[i];
402                 else
403                         nsfd = self->child_nsfds1[i];
404                 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
405                         TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
406                                info->name, self->child_pid1,
407                                self->child_pidfd1);
408                 }
409                 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
410                        info->name, self->child_pid1, self->child_pidfd1);
411         }
412 }
413
414 TEST_F(current_nsset, nsfd_incremental_setns)
415 {
416         int i;
417         pid_t pid;
418
419         pid = getpid();
420         for (i = 0; i < PIDFD_NS_MAX; i++) {
421                 const struct ns_info *info = &ns_info[i];
422                 int nsfd;
423
424                 if (self->child_nsfds1[i] < 0)
425                         continue;
426
427                 if (info->flag) {
428                         ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
429                                 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
430                                        info->name, self->child_pid1,
431                                        self->child_nsfds1[i]);
432                         }
433                 }
434
435                 /* Verify that we have changed to the correct namespaces. */
436                 if (info->flag == CLONE_NEWPID)
437                         nsfd = self->nsfds[i];
438                 else
439                         nsfd = self->child_nsfds1[i];
440                 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
441                         TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
442                                info->name, self->child_pid1,
443                                self->child_nsfds1[i]);
444                 }
445                 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
446                        info->name, self->child_pid1, self->child_nsfds1[i]);
447         }
448 }
449
450 TEST_F(current_nsset, pidfd_one_shot_setns)
451 {
452         unsigned flags = 0;
453         int i;
454         pid_t pid;
455
456         for (i = 0; i < PIDFD_NS_MAX; i++) {
457                 const struct ns_info *info = &ns_info[i];
458
459                 if (self->child_nsfds1[i] < 0)
460                         continue;
461
462                 flags |= info->flag;
463                 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
464                        info->name, self->child_pid1);
465         }
466
467         ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
468                 TH_LOG("%m - Failed to setns to namespaces of %d",
469                        self->child_pid1);
470         }
471
472         pid = getpid();
473         for (i = 0; i < PIDFD_NS_MAX; i++) {
474                 const struct ns_info *info = &ns_info[i];
475                 int nsfd;
476
477                 if (self->child_nsfds1[i] < 0)
478                         continue;
479
480                 /* Verify that we have changed to the correct namespaces. */
481                 if (info->flag == CLONE_NEWPID)
482                         nsfd = self->nsfds[i];
483                 else
484                         nsfd = self->child_nsfds1[i];
485                 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
486                         TH_LOG("setns failed to place us correctly into %s namespace of %d",
487                                info->name, self->child_pid1);
488                 }
489                 TH_LOG("Managed to correctly setns to %s namespace of %d",
490                        info->name, self->child_pid1);
491         }
492 }
493
494 TEST_F(current_nsset, no_foul_play)
495 {
496         unsigned flags = 0;
497         int i;
498
499         for (i = 0; i < PIDFD_NS_MAX; i++) {
500                 const struct ns_info *info = &ns_info[i];
501
502                 if (self->child_nsfds1[i] < 0)
503                         continue;
504
505                 flags |= info->flag;
506                 if (info->flag) /* No use logging pid_for_children. */
507                         TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
508                                info->name, self->child_pid1);
509         }
510
511         ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
512                 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
513                        self->child_pid1, self->child_pidfd1);
514         }
515
516         /*
517          * Can't setns to a user namespace outside of our hierarchy since we
518          * don't have caps in there and didn't create it. That means that under
519          * no circumstances should we be able to setns to any of the other
520          * ones since they aren't owned by our user namespace.
521          */
522         for (i = 0; i < PIDFD_NS_MAX; i++) {
523                 const struct ns_info *info = &ns_info[i];
524
525                 if (self->child_nsfds2[i] < 0 || !info->flag)
526                         continue;
527
528                 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
529                         TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
530                                info->name, self->child_pid2,
531                                self->child_pidfd2);
532                 }
533                 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
534                        info->name, self->child_pid2,
535                        self->child_pidfd2);
536
537                 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
538                         TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
539                                info->name, self->child_pid2,
540                                self->child_nsfds2[i]);
541                 }
542                 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
543                        info->name, self->child_pid2,
544                        self->child_nsfds2[i]);
545         }
546 }
547
548 TEST(setns_einval)
549 {
550         int fd;
551
552         fd = sys_memfd_create("rostock", 0);
553         EXPECT_GT(fd, 0);
554
555         ASSERT_NE(setns(fd, 0), 0);
556         EXPECT_EQ(errno, EINVAL);
557         close(fd);
558 }
559
560 TEST_HARNESS_MAIN