selftests/seccomp: Zero out seccomp_notif
[linux-2.6-microblaze.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/user.h>
28 #include <linux/prctl.h>
29 #include <linux/ptrace.h>
30 #include <linux/seccomp.h>
31 #include <pthread.h>
32 #include <semaphore.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <time.h>
38 #include <limits.h>
39 #include <linux/elf.h>
40 #include <sys/uio.h>
41 #include <sys/utsname.h>
42 #include <sys/fcntl.h>
43 #include <sys/mman.h>
44 #include <sys/times.h>
45 #include <sys/socket.h>
46 #include <sys/ioctl.h>
47 #include <linux/kcmp.h>
48
49 #include <unistd.h>
50 #include <sys/syscall.h>
51 #include <poll.h>
52
53 #include "../kselftest_harness.h"
54
55 #ifndef PR_SET_PTRACER
56 # define PR_SET_PTRACER 0x59616d61
57 #endif
58
59 #ifndef PR_SET_NO_NEW_PRIVS
60 #define PR_SET_NO_NEW_PRIVS 38
61 #define PR_GET_NO_NEW_PRIVS 39
62 #endif
63
64 #ifndef PR_SECCOMP_EXT
65 #define PR_SECCOMP_EXT 43
66 #endif
67
68 #ifndef SECCOMP_EXT_ACT
69 #define SECCOMP_EXT_ACT 1
70 #endif
71
72 #ifndef SECCOMP_EXT_ACT_TSYNC
73 #define SECCOMP_EXT_ACT_TSYNC 1
74 #endif
75
76 #ifndef SECCOMP_MODE_STRICT
77 #define SECCOMP_MODE_STRICT 1
78 #endif
79
80 #ifndef SECCOMP_MODE_FILTER
81 #define SECCOMP_MODE_FILTER 2
82 #endif
83
84 #ifndef SECCOMP_RET_ALLOW
85 struct seccomp_data {
86         int nr;
87         __u32 arch;
88         __u64 instruction_pointer;
89         __u64 args[6];
90 };
91 #endif
92
93 #ifndef SECCOMP_RET_KILL_PROCESS
94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
95 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
96 #endif
97 #ifndef SECCOMP_RET_KILL
98 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
99 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
100 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
101 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
102 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
103 #endif
104 #ifndef SECCOMP_RET_LOG
105 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
106 #endif
107
108 #ifndef __NR_seccomp
109 # if defined(__i386__)
110 #  define __NR_seccomp 354
111 # elif defined(__x86_64__)
112 #  define __NR_seccomp 317
113 # elif defined(__arm__)
114 #  define __NR_seccomp 383
115 # elif defined(__aarch64__)
116 #  define __NR_seccomp 277
117 # elif defined(__riscv)
118 #  define __NR_seccomp 277
119 # elif defined(__hppa__)
120 #  define __NR_seccomp 338
121 # elif defined(__powerpc__)
122 #  define __NR_seccomp 358
123 # elif defined(__s390__)
124 #  define __NR_seccomp 348
125 # else
126 #  warning "seccomp syscall number unknown for this architecture"
127 #  define __NR_seccomp 0xffff
128 # endif
129 #endif
130
131 #ifndef SECCOMP_SET_MODE_STRICT
132 #define SECCOMP_SET_MODE_STRICT 0
133 #endif
134
135 #ifndef SECCOMP_SET_MODE_FILTER
136 #define SECCOMP_SET_MODE_FILTER 1
137 #endif
138
139 #ifndef SECCOMP_GET_ACTION_AVAIL
140 #define SECCOMP_GET_ACTION_AVAIL 2
141 #endif
142
143 #ifndef SECCOMP_GET_NOTIF_SIZES
144 #define SECCOMP_GET_NOTIF_SIZES 3
145 #endif
146
147 #ifndef SECCOMP_FILTER_FLAG_TSYNC
148 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
149 #endif
150
151 #ifndef SECCOMP_FILTER_FLAG_LOG
152 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
153 #endif
154
155 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
156 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
157 #endif
158
159 #ifndef PTRACE_SECCOMP_GET_METADATA
160 #define PTRACE_SECCOMP_GET_METADATA     0x420d
161
162 struct seccomp_metadata {
163         __u64 filter_off;       /* Input: which filter */
164         __u64 flags;             /* Output: filter's flags */
165 };
166 #endif
167
168 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
169 #define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
170
171 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
172
173 #define SECCOMP_IOC_MAGIC               '!'
174 #define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
175 #define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
176 #define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
177 #define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
178
179 /* Flags for seccomp notification fd ioctl. */
180 #define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
181 #define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
182                                                 struct seccomp_notif_resp)
183 #define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOR(2, __u64)
184
185 struct seccomp_notif {
186         __u64 id;
187         __u32 pid;
188         __u32 flags;
189         struct seccomp_data data;
190 };
191
192 struct seccomp_notif_resp {
193         __u64 id;
194         __s64 val;
195         __s32 error;
196         __u32 flags;
197 };
198
199 struct seccomp_notif_sizes {
200         __u16 seccomp_notif;
201         __u16 seccomp_notif_resp;
202         __u16 seccomp_data;
203 };
204 #endif
205
206 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
207 #define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
208 #define PTRACE_EVENTMSG_SYSCALL_EXIT    2
209 #endif
210
211 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
212 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
213 #endif
214
215 #ifndef seccomp
216 int seccomp(unsigned int op, unsigned int flags, void *args)
217 {
218         errno = 0;
219         return syscall(__NR_seccomp, op, flags, args);
220 }
221 #endif
222
223 #if __BYTE_ORDER == __LITTLE_ENDIAN
224 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
225 #elif __BYTE_ORDER == __BIG_ENDIAN
226 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
227 #else
228 #error "wut? Unknown __BYTE_ORDER?!"
229 #endif
230
231 #define SIBLING_EXIT_UNKILLED   0xbadbeef
232 #define SIBLING_EXIT_FAILURE    0xbadface
233 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
234
235 TEST(mode_strict_support)
236 {
237         long ret;
238
239         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
240         ASSERT_EQ(0, ret) {
241                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
242         }
243         syscall(__NR_exit, 0);
244 }
245
246 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
247 {
248         long ret;
249
250         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
251         ASSERT_EQ(0, ret) {
252                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
253         }
254         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
255                 NULL, NULL, NULL);
256         EXPECT_FALSE(true) {
257                 TH_LOG("Unreachable!");
258         }
259 }
260
261 /* Note! This doesn't test no new privs behavior */
262 TEST(no_new_privs_support)
263 {
264         long ret;
265
266         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
267         EXPECT_EQ(0, ret) {
268                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
269         }
270 }
271
272 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
273 TEST(mode_filter_support)
274 {
275         long ret;
276
277         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
278         ASSERT_EQ(0, ret) {
279                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
280         }
281         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
282         EXPECT_EQ(-1, ret);
283         EXPECT_EQ(EFAULT, errno) {
284                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
285         }
286 }
287
288 TEST(mode_filter_without_nnp)
289 {
290         struct sock_filter filter[] = {
291                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
292         };
293         struct sock_fprog prog = {
294                 .len = (unsigned short)ARRAY_SIZE(filter),
295                 .filter = filter,
296         };
297         long ret;
298
299         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
300         ASSERT_LE(0, ret) {
301                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
302         }
303         errno = 0;
304         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
305         /* Succeeds with CAP_SYS_ADMIN, fails without */
306         /* TODO(wad) check caps not euid */
307         if (geteuid()) {
308                 EXPECT_EQ(-1, ret);
309                 EXPECT_EQ(EACCES, errno);
310         } else {
311                 EXPECT_EQ(0, ret);
312         }
313 }
314
315 #define MAX_INSNS_PER_PATH 32768
316
317 TEST(filter_size_limits)
318 {
319         int i;
320         int count = BPF_MAXINSNS + 1;
321         struct sock_filter allow[] = {
322                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
323         };
324         struct sock_filter *filter;
325         struct sock_fprog prog = { };
326         long ret;
327
328         filter = calloc(count, sizeof(*filter));
329         ASSERT_NE(NULL, filter);
330
331         for (i = 0; i < count; i++)
332                 filter[i] = allow[0];
333
334         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
335         ASSERT_EQ(0, ret);
336
337         prog.filter = filter;
338         prog.len = count;
339
340         /* Too many filter instructions in a single filter. */
341         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
342         ASSERT_NE(0, ret) {
343                 TH_LOG("Installing %d insn filter was allowed", prog.len);
344         }
345
346         /* One less is okay, though. */
347         prog.len -= 1;
348         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
349         ASSERT_EQ(0, ret) {
350                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
351         }
352 }
353
354 TEST(filter_chain_limits)
355 {
356         int i;
357         int count = BPF_MAXINSNS;
358         struct sock_filter allow[] = {
359                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
360         };
361         struct sock_filter *filter;
362         struct sock_fprog prog = { };
363         long ret;
364
365         filter = calloc(count, sizeof(*filter));
366         ASSERT_NE(NULL, filter);
367
368         for (i = 0; i < count; i++)
369                 filter[i] = allow[0];
370
371         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
372         ASSERT_EQ(0, ret);
373
374         prog.filter = filter;
375         prog.len = 1;
376
377         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
378         ASSERT_EQ(0, ret);
379
380         prog.len = count;
381
382         /* Too many total filter instructions. */
383         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
384                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
385                 if (ret != 0)
386                         break;
387         }
388         ASSERT_NE(0, ret) {
389                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
390                        i, count, i * (count + 4));
391         }
392 }
393
394 TEST(mode_filter_cannot_move_to_strict)
395 {
396         struct sock_filter filter[] = {
397                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
398         };
399         struct sock_fprog prog = {
400                 .len = (unsigned short)ARRAY_SIZE(filter),
401                 .filter = filter,
402         };
403         long ret;
404
405         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
406         ASSERT_EQ(0, ret);
407
408         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
409         ASSERT_EQ(0, ret);
410
411         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
412         EXPECT_EQ(-1, ret);
413         EXPECT_EQ(EINVAL, errno);
414 }
415
416
417 TEST(mode_filter_get_seccomp)
418 {
419         struct sock_filter filter[] = {
420                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
421         };
422         struct sock_fprog prog = {
423                 .len = (unsigned short)ARRAY_SIZE(filter),
424                 .filter = filter,
425         };
426         long ret;
427
428         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
429         ASSERT_EQ(0, ret);
430
431         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
432         EXPECT_EQ(0, ret);
433
434         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
435         ASSERT_EQ(0, ret);
436
437         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
438         EXPECT_EQ(2, ret);
439 }
440
441
442 TEST(ALLOW_all)
443 {
444         struct sock_filter filter[] = {
445                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
446         };
447         struct sock_fprog prog = {
448                 .len = (unsigned short)ARRAY_SIZE(filter),
449                 .filter = filter,
450         };
451         long ret;
452
453         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
454         ASSERT_EQ(0, ret);
455
456         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
457         ASSERT_EQ(0, ret);
458 }
459
460 TEST(empty_prog)
461 {
462         struct sock_filter filter[] = {
463         };
464         struct sock_fprog prog = {
465                 .len = (unsigned short)ARRAY_SIZE(filter),
466                 .filter = filter,
467         };
468         long ret;
469
470         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
471         ASSERT_EQ(0, ret);
472
473         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
474         EXPECT_EQ(-1, ret);
475         EXPECT_EQ(EINVAL, errno);
476 }
477
478 TEST(log_all)
479 {
480         struct sock_filter filter[] = {
481                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
482         };
483         struct sock_fprog prog = {
484                 .len = (unsigned short)ARRAY_SIZE(filter),
485                 .filter = filter,
486         };
487         long ret;
488         pid_t parent = getppid();
489
490         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
491         ASSERT_EQ(0, ret);
492
493         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
494         ASSERT_EQ(0, ret);
495
496         /* getppid() should succeed and be logged (no check for logging) */
497         EXPECT_EQ(parent, syscall(__NR_getppid));
498 }
499
500 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
501 {
502         struct sock_filter filter[] = {
503                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
504         };
505         struct sock_fprog prog = {
506                 .len = (unsigned short)ARRAY_SIZE(filter),
507                 .filter = filter,
508         };
509         long ret;
510
511         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
512         ASSERT_EQ(0, ret);
513
514         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
515         ASSERT_EQ(0, ret);
516         EXPECT_EQ(0, syscall(__NR_getpid)) {
517                 TH_LOG("getpid() shouldn't ever return");
518         }
519 }
520
521 /* return code >= 0x80000000 is unused. */
522 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
523 {
524         struct sock_filter filter[] = {
525                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
526         };
527         struct sock_fprog prog = {
528                 .len = (unsigned short)ARRAY_SIZE(filter),
529                 .filter = filter,
530         };
531         long ret;
532
533         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
534         ASSERT_EQ(0, ret);
535
536         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
537         ASSERT_EQ(0, ret);
538         EXPECT_EQ(0, syscall(__NR_getpid)) {
539                 TH_LOG("getpid() shouldn't ever return");
540         }
541 }
542
543 TEST_SIGNAL(KILL_all, SIGSYS)
544 {
545         struct sock_filter filter[] = {
546                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
547         };
548         struct sock_fprog prog = {
549                 .len = (unsigned short)ARRAY_SIZE(filter),
550                 .filter = filter,
551         };
552         long ret;
553
554         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
555         ASSERT_EQ(0, ret);
556
557         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
558         ASSERT_EQ(0, ret);
559 }
560
561 TEST_SIGNAL(KILL_one, SIGSYS)
562 {
563         struct sock_filter filter[] = {
564                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
565                         offsetof(struct seccomp_data, nr)),
566                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
567                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
568                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
569         };
570         struct sock_fprog prog = {
571                 .len = (unsigned short)ARRAY_SIZE(filter),
572                 .filter = filter,
573         };
574         long ret;
575         pid_t parent = getppid();
576
577         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
578         ASSERT_EQ(0, ret);
579
580         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
581         ASSERT_EQ(0, ret);
582
583         EXPECT_EQ(parent, syscall(__NR_getppid));
584         /* getpid() should never return. */
585         EXPECT_EQ(0, syscall(__NR_getpid));
586 }
587
588 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
589 {
590         void *fatal_address;
591         struct sock_filter filter[] = {
592                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
593                         offsetof(struct seccomp_data, nr)),
594                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
595                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
596                 /* Only both with lower 32-bit for now. */
597                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
598                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
599                         (unsigned long)&fatal_address, 0, 1),
600                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
601                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
602         };
603         struct sock_fprog prog = {
604                 .len = (unsigned short)ARRAY_SIZE(filter),
605                 .filter = filter,
606         };
607         long ret;
608         pid_t parent = getppid();
609         struct tms timebuf;
610         clock_t clock = times(&timebuf);
611
612         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
613         ASSERT_EQ(0, ret);
614
615         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
616         ASSERT_EQ(0, ret);
617
618         EXPECT_EQ(parent, syscall(__NR_getppid));
619         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
620         /* times() should never return. */
621         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
622 }
623
624 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
625 {
626 #ifndef __NR_mmap2
627         int sysno = __NR_mmap;
628 #else
629         int sysno = __NR_mmap2;
630 #endif
631         struct sock_filter filter[] = {
632                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
633                         offsetof(struct seccomp_data, nr)),
634                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
635                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
636                 /* Only both with lower 32-bit for now. */
637                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
638                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
639                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
640                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
641         };
642         struct sock_fprog prog = {
643                 .len = (unsigned short)ARRAY_SIZE(filter),
644                 .filter = filter,
645         };
646         long ret;
647         pid_t parent = getppid();
648         int fd;
649         void *map1, *map2;
650         int page_size = sysconf(_SC_PAGESIZE);
651
652         ASSERT_LT(0, page_size);
653
654         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
655         ASSERT_EQ(0, ret);
656
657         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
658         ASSERT_EQ(0, ret);
659
660         fd = open("/dev/zero", O_RDONLY);
661         ASSERT_NE(-1, fd);
662
663         EXPECT_EQ(parent, syscall(__NR_getppid));
664         map1 = (void *)syscall(sysno,
665                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
666         EXPECT_NE(MAP_FAILED, map1);
667         /* mmap2() should never return. */
668         map2 = (void *)syscall(sysno,
669                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
670         EXPECT_EQ(MAP_FAILED, map2);
671
672         /* The test failed, so clean up the resources. */
673         munmap(map1, page_size);
674         munmap(map2, page_size);
675         close(fd);
676 }
677
678 /* This is a thread task to die via seccomp filter violation. */
679 void *kill_thread(void *data)
680 {
681         bool die = (bool)data;
682
683         if (die) {
684                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
685                 return (void *)SIBLING_EXIT_FAILURE;
686         }
687
688         return (void *)SIBLING_EXIT_UNKILLED;
689 }
690
691 /* Prepare a thread that will kill itself or both of us. */
692 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
693 {
694         pthread_t thread;
695         void *status;
696         /* Kill only when calling __NR_prctl. */
697         struct sock_filter filter_thread[] = {
698                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
699                         offsetof(struct seccomp_data, nr)),
700                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
701                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
702                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
703         };
704         struct sock_fprog prog_thread = {
705                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
706                 .filter = filter_thread,
707         };
708         struct sock_filter filter_process[] = {
709                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
710                         offsetof(struct seccomp_data, nr)),
711                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
712                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
713                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
714         };
715         struct sock_fprog prog_process = {
716                 .len = (unsigned short)ARRAY_SIZE(filter_process),
717                 .filter = filter_process,
718         };
719
720         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
721                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
722         }
723
724         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
725                              kill_process ? &prog_process : &prog_thread));
726
727         /*
728          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
729          * flag cannot be downgraded by a new filter.
730          */
731         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
732
733         /* Start a thread that will exit immediately. */
734         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
735         ASSERT_EQ(0, pthread_join(thread, &status));
736         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
737
738         /* Start a thread that will die immediately. */
739         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
740         ASSERT_EQ(0, pthread_join(thread, &status));
741         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
742
743         /*
744          * If we get here, only the spawned thread died. Let the parent know
745          * the whole process didn't die (i.e. this thread, the spawner,
746          * stayed running).
747          */
748         exit(42);
749 }
750
751 TEST(KILL_thread)
752 {
753         int status;
754         pid_t child_pid;
755
756         child_pid = fork();
757         ASSERT_LE(0, child_pid);
758         if (child_pid == 0) {
759                 kill_thread_or_group(_metadata, false);
760                 _exit(38);
761         }
762
763         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
764
765         /* If only the thread was killed, we'll see exit 42. */
766         ASSERT_TRUE(WIFEXITED(status));
767         ASSERT_EQ(42, WEXITSTATUS(status));
768 }
769
770 TEST(KILL_process)
771 {
772         int status;
773         pid_t child_pid;
774
775         child_pid = fork();
776         ASSERT_LE(0, child_pid);
777         if (child_pid == 0) {
778                 kill_thread_or_group(_metadata, true);
779                 _exit(38);
780         }
781
782         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
783
784         /* If the entire process was killed, we'll see SIGSYS. */
785         ASSERT_TRUE(WIFSIGNALED(status));
786         ASSERT_EQ(SIGSYS, WTERMSIG(status));
787 }
788
789 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
790 TEST(arg_out_of_range)
791 {
792         struct sock_filter filter[] = {
793                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
794                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
795         };
796         struct sock_fprog prog = {
797                 .len = (unsigned short)ARRAY_SIZE(filter),
798                 .filter = filter,
799         };
800         long ret;
801
802         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
803         ASSERT_EQ(0, ret);
804
805         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
806         EXPECT_EQ(-1, ret);
807         EXPECT_EQ(EINVAL, errno);
808 }
809
810 #define ERRNO_FILTER(name, errno)                                       \
811         struct sock_filter _read_filter_##name[] = {                    \
812                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
813                         offsetof(struct seccomp_data, nr)),             \
814                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
815                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
816                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
817         };                                                              \
818         struct sock_fprog prog_##name = {                               \
819                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
820                 .filter = _read_filter_##name,                          \
821         }
822
823 /* Make sure basic errno values are correctly passed through a filter. */
824 TEST(ERRNO_valid)
825 {
826         ERRNO_FILTER(valid, E2BIG);
827         long ret;
828         pid_t parent = getppid();
829
830         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
831         ASSERT_EQ(0, ret);
832
833         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
834         ASSERT_EQ(0, ret);
835
836         EXPECT_EQ(parent, syscall(__NR_getppid));
837         EXPECT_EQ(-1, read(0, NULL, 0));
838         EXPECT_EQ(E2BIG, errno);
839 }
840
841 /* Make sure an errno of zero is correctly handled by the arch code. */
842 TEST(ERRNO_zero)
843 {
844         ERRNO_FILTER(zero, 0);
845         long ret;
846         pid_t parent = getppid();
847
848         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
849         ASSERT_EQ(0, ret);
850
851         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
852         ASSERT_EQ(0, ret);
853
854         EXPECT_EQ(parent, syscall(__NR_getppid));
855         /* "errno" of 0 is ok. */
856         EXPECT_EQ(0, read(0, NULL, 0));
857 }
858
859 /*
860  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
861  * This tests that the errno value gets capped correctly, fixed by
862  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
863  */
864 TEST(ERRNO_capped)
865 {
866         ERRNO_FILTER(capped, 4096);
867         long ret;
868         pid_t parent = getppid();
869
870         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
871         ASSERT_EQ(0, ret);
872
873         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
874         ASSERT_EQ(0, ret);
875
876         EXPECT_EQ(parent, syscall(__NR_getppid));
877         EXPECT_EQ(-1, read(0, NULL, 0));
878         EXPECT_EQ(4095, errno);
879 }
880
881 /*
882  * Filters are processed in reverse order: last applied is executed first.
883  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
884  * SECCOMP_RET_DATA mask results will follow the most recently applied
885  * matching filter return (and not the lowest or highest value).
886  */
887 TEST(ERRNO_order)
888 {
889         ERRNO_FILTER(first,  11);
890         ERRNO_FILTER(second, 13);
891         ERRNO_FILTER(third,  12);
892         long ret;
893         pid_t parent = getppid();
894
895         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
896         ASSERT_EQ(0, ret);
897
898         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
899         ASSERT_EQ(0, ret);
900
901         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
902         ASSERT_EQ(0, ret);
903
904         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
905         ASSERT_EQ(0, ret);
906
907         EXPECT_EQ(parent, syscall(__NR_getppid));
908         EXPECT_EQ(-1, read(0, NULL, 0));
909         EXPECT_EQ(12, errno);
910 }
911
912 FIXTURE_DATA(TRAP) {
913         struct sock_fprog prog;
914 };
915
916 FIXTURE_SETUP(TRAP)
917 {
918         struct sock_filter filter[] = {
919                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
920                         offsetof(struct seccomp_data, nr)),
921                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
922                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
923                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
924         };
925
926         memset(&self->prog, 0, sizeof(self->prog));
927         self->prog.filter = malloc(sizeof(filter));
928         ASSERT_NE(NULL, self->prog.filter);
929         memcpy(self->prog.filter, filter, sizeof(filter));
930         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
931 }
932
933 FIXTURE_TEARDOWN(TRAP)
934 {
935         if (self->prog.filter)
936                 free(self->prog.filter);
937 }
938
939 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
940 {
941         long ret;
942
943         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
944         ASSERT_EQ(0, ret);
945
946         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
947         ASSERT_EQ(0, ret);
948         syscall(__NR_getpid);
949 }
950
951 /* Ensure that SIGSYS overrides SIG_IGN */
952 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
953 {
954         long ret;
955
956         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
957         ASSERT_EQ(0, ret);
958
959         signal(SIGSYS, SIG_IGN);
960
961         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
962         ASSERT_EQ(0, ret);
963         syscall(__NR_getpid);
964 }
965
966 static siginfo_t TRAP_info;
967 static volatile int TRAP_nr;
968 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
969 {
970         memcpy(&TRAP_info, info, sizeof(TRAP_info));
971         TRAP_nr = nr;
972 }
973
974 TEST_F(TRAP, handler)
975 {
976         int ret, test;
977         struct sigaction act;
978         sigset_t mask;
979
980         memset(&act, 0, sizeof(act));
981         sigemptyset(&mask);
982         sigaddset(&mask, SIGSYS);
983
984         act.sa_sigaction = &TRAP_action;
985         act.sa_flags = SA_SIGINFO;
986         ret = sigaction(SIGSYS, &act, NULL);
987         ASSERT_EQ(0, ret) {
988                 TH_LOG("sigaction failed");
989         }
990         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
991         ASSERT_EQ(0, ret) {
992                 TH_LOG("sigprocmask failed");
993         }
994
995         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
996         ASSERT_EQ(0, ret);
997         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
998         ASSERT_EQ(0, ret);
999         TRAP_nr = 0;
1000         memset(&TRAP_info, 0, sizeof(TRAP_info));
1001         /* Expect the registers to be rolled back. (nr = error) may vary
1002          * based on arch. */
1003         ret = syscall(__NR_getpid);
1004         /* Silence gcc warning about volatile. */
1005         test = TRAP_nr;
1006         EXPECT_EQ(SIGSYS, test);
1007         struct local_sigsys {
1008                 void *_call_addr;       /* calling user insn */
1009                 int _syscall;           /* triggering system call number */
1010                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1011         } *sigsys = (struct local_sigsys *)
1012 #ifdef si_syscall
1013                 &(TRAP_info.si_call_addr);
1014 #else
1015                 &TRAP_info.si_pid;
1016 #endif
1017         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1018         /* Make sure arch is non-zero. */
1019         EXPECT_NE(0, sigsys->_arch);
1020         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1021 }
1022
1023 FIXTURE_DATA(precedence) {
1024         struct sock_fprog allow;
1025         struct sock_fprog log;
1026         struct sock_fprog trace;
1027         struct sock_fprog error;
1028         struct sock_fprog trap;
1029         struct sock_fprog kill;
1030 };
1031
1032 FIXTURE_SETUP(precedence)
1033 {
1034         struct sock_filter allow_insns[] = {
1035                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1036         };
1037         struct sock_filter log_insns[] = {
1038                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1039                         offsetof(struct seccomp_data, nr)),
1040                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1041                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1042                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1043         };
1044         struct sock_filter trace_insns[] = {
1045                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1046                         offsetof(struct seccomp_data, nr)),
1047                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1048                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1049                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1050         };
1051         struct sock_filter error_insns[] = {
1052                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1053                         offsetof(struct seccomp_data, nr)),
1054                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1055                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1056                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1057         };
1058         struct sock_filter trap_insns[] = {
1059                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1060                         offsetof(struct seccomp_data, nr)),
1061                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1062                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1063                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1064         };
1065         struct sock_filter kill_insns[] = {
1066                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1067                         offsetof(struct seccomp_data, nr)),
1068                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1069                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1070                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1071         };
1072
1073         memset(self, 0, sizeof(*self));
1074 #define FILTER_ALLOC(_x) \
1075         self->_x.filter = malloc(sizeof(_x##_insns)); \
1076         ASSERT_NE(NULL, self->_x.filter); \
1077         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1078         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1079         FILTER_ALLOC(allow);
1080         FILTER_ALLOC(log);
1081         FILTER_ALLOC(trace);
1082         FILTER_ALLOC(error);
1083         FILTER_ALLOC(trap);
1084         FILTER_ALLOC(kill);
1085 }
1086
1087 FIXTURE_TEARDOWN(precedence)
1088 {
1089 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1090         FILTER_FREE(allow);
1091         FILTER_FREE(log);
1092         FILTER_FREE(trace);
1093         FILTER_FREE(error);
1094         FILTER_FREE(trap);
1095         FILTER_FREE(kill);
1096 }
1097
1098 TEST_F(precedence, allow_ok)
1099 {
1100         pid_t parent, res = 0;
1101         long ret;
1102
1103         parent = getppid();
1104         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1105         ASSERT_EQ(0, ret);
1106
1107         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1108         ASSERT_EQ(0, ret);
1109         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1110         ASSERT_EQ(0, ret);
1111         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1112         ASSERT_EQ(0, ret);
1113         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1114         ASSERT_EQ(0, ret);
1115         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1116         ASSERT_EQ(0, ret);
1117         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1118         ASSERT_EQ(0, ret);
1119         /* Should work just fine. */
1120         res = syscall(__NR_getppid);
1121         EXPECT_EQ(parent, res);
1122 }
1123
1124 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1125 {
1126         pid_t parent, res = 0;
1127         long ret;
1128
1129         parent = getppid();
1130         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1131         ASSERT_EQ(0, ret);
1132
1133         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1134         ASSERT_EQ(0, ret);
1135         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1136         ASSERT_EQ(0, ret);
1137         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1138         ASSERT_EQ(0, ret);
1139         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1140         ASSERT_EQ(0, ret);
1141         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1142         ASSERT_EQ(0, ret);
1143         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1144         ASSERT_EQ(0, ret);
1145         /* Should work just fine. */
1146         res = syscall(__NR_getppid);
1147         EXPECT_EQ(parent, res);
1148         /* getpid() should never return. */
1149         res = syscall(__NR_getpid);
1150         EXPECT_EQ(0, res);
1151 }
1152
1153 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1154 {
1155         pid_t parent;
1156         long ret;
1157
1158         parent = getppid();
1159         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1160         ASSERT_EQ(0, ret);
1161
1162         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1163         ASSERT_EQ(0, ret);
1164         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1165         ASSERT_EQ(0, ret);
1166         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1167         ASSERT_EQ(0, ret);
1168         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1169         ASSERT_EQ(0, ret);
1170         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1171         ASSERT_EQ(0, ret);
1172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1173         ASSERT_EQ(0, ret);
1174         /* Should work just fine. */
1175         EXPECT_EQ(parent, syscall(__NR_getppid));
1176         /* getpid() should never return. */
1177         EXPECT_EQ(0, syscall(__NR_getpid));
1178 }
1179
1180 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1181 {
1182         pid_t parent;
1183         long ret;
1184
1185         parent = getppid();
1186         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1187         ASSERT_EQ(0, ret);
1188
1189         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1190         ASSERT_EQ(0, ret);
1191         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1192         ASSERT_EQ(0, ret);
1193         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1194         ASSERT_EQ(0, ret);
1195         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1196         ASSERT_EQ(0, ret);
1197         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1198         ASSERT_EQ(0, ret);
1199         /* Should work just fine. */
1200         EXPECT_EQ(parent, syscall(__NR_getppid));
1201         /* getpid() should never return. */
1202         EXPECT_EQ(0, syscall(__NR_getpid));
1203 }
1204
1205 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1206 {
1207         pid_t parent;
1208         long ret;
1209
1210         parent = getppid();
1211         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1212         ASSERT_EQ(0, ret);
1213
1214         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1215         ASSERT_EQ(0, ret);
1216         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1217         ASSERT_EQ(0, ret);
1218         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1219         ASSERT_EQ(0, ret);
1220         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1221         ASSERT_EQ(0, ret);
1222         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1223         ASSERT_EQ(0, ret);
1224         /* Should work just fine. */
1225         EXPECT_EQ(parent, syscall(__NR_getppid));
1226         /* getpid() should never return. */
1227         EXPECT_EQ(0, syscall(__NR_getpid));
1228 }
1229
1230 TEST_F(precedence, errno_is_third)
1231 {
1232         pid_t parent;
1233         long ret;
1234
1235         parent = getppid();
1236         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1237         ASSERT_EQ(0, ret);
1238
1239         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1240         ASSERT_EQ(0, ret);
1241         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1242         ASSERT_EQ(0, ret);
1243         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1244         ASSERT_EQ(0, ret);
1245         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1246         ASSERT_EQ(0, ret);
1247         /* Should work just fine. */
1248         EXPECT_EQ(parent, syscall(__NR_getppid));
1249         EXPECT_EQ(0, syscall(__NR_getpid));
1250 }
1251
1252 TEST_F(precedence, errno_is_third_in_any_order)
1253 {
1254         pid_t parent;
1255         long ret;
1256
1257         parent = getppid();
1258         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1259         ASSERT_EQ(0, ret);
1260
1261         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1262         ASSERT_EQ(0, ret);
1263         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1264         ASSERT_EQ(0, ret);
1265         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1266         ASSERT_EQ(0, ret);
1267         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1268         ASSERT_EQ(0, ret);
1269         /* Should work just fine. */
1270         EXPECT_EQ(parent, syscall(__NR_getppid));
1271         EXPECT_EQ(0, syscall(__NR_getpid));
1272 }
1273
1274 TEST_F(precedence, trace_is_fourth)
1275 {
1276         pid_t parent;
1277         long ret;
1278
1279         parent = getppid();
1280         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1281         ASSERT_EQ(0, ret);
1282
1283         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1284         ASSERT_EQ(0, ret);
1285         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1286         ASSERT_EQ(0, ret);
1287         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1288         ASSERT_EQ(0, ret);
1289         /* Should work just fine. */
1290         EXPECT_EQ(parent, syscall(__NR_getppid));
1291         /* No ptracer */
1292         EXPECT_EQ(-1, syscall(__NR_getpid));
1293 }
1294
1295 TEST_F(precedence, trace_is_fourth_in_any_order)
1296 {
1297         pid_t parent;
1298         long ret;
1299
1300         parent = getppid();
1301         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1302         ASSERT_EQ(0, ret);
1303
1304         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1305         ASSERT_EQ(0, ret);
1306         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1307         ASSERT_EQ(0, ret);
1308         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1309         ASSERT_EQ(0, ret);
1310         /* Should work just fine. */
1311         EXPECT_EQ(parent, syscall(__NR_getppid));
1312         /* No ptracer */
1313         EXPECT_EQ(-1, syscall(__NR_getpid));
1314 }
1315
1316 TEST_F(precedence, log_is_fifth)
1317 {
1318         pid_t mypid, parent;
1319         long ret;
1320
1321         mypid = getpid();
1322         parent = getppid();
1323         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1324         ASSERT_EQ(0, ret);
1325
1326         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1327         ASSERT_EQ(0, ret);
1328         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1329         ASSERT_EQ(0, ret);
1330         /* Should work just fine. */
1331         EXPECT_EQ(parent, syscall(__NR_getppid));
1332         /* Should also work just fine */
1333         EXPECT_EQ(mypid, syscall(__NR_getpid));
1334 }
1335
1336 TEST_F(precedence, log_is_fifth_in_any_order)
1337 {
1338         pid_t mypid, parent;
1339         long ret;
1340
1341         mypid = getpid();
1342         parent = getppid();
1343         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1344         ASSERT_EQ(0, ret);
1345
1346         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1347         ASSERT_EQ(0, ret);
1348         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1349         ASSERT_EQ(0, ret);
1350         /* Should work just fine. */
1351         EXPECT_EQ(parent, syscall(__NR_getppid));
1352         /* Should also work just fine */
1353         EXPECT_EQ(mypid, syscall(__NR_getpid));
1354 }
1355
1356 #ifndef PTRACE_O_TRACESECCOMP
1357 #define PTRACE_O_TRACESECCOMP   0x00000080
1358 #endif
1359
1360 /* Catch the Ubuntu 12.04 value error. */
1361 #if PTRACE_EVENT_SECCOMP != 7
1362 #undef PTRACE_EVENT_SECCOMP
1363 #endif
1364
1365 #ifndef PTRACE_EVENT_SECCOMP
1366 #define PTRACE_EVENT_SECCOMP 7
1367 #endif
1368
1369 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1370 bool tracer_running;
1371 void tracer_stop(int sig)
1372 {
1373         tracer_running = false;
1374 }
1375
1376 typedef void tracer_func_t(struct __test_metadata *_metadata,
1377                            pid_t tracee, int status, void *args);
1378
1379 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1380             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1381 {
1382         int ret = -1;
1383         struct sigaction action = {
1384                 .sa_handler = tracer_stop,
1385         };
1386
1387         /* Allow external shutdown. */
1388         tracer_running = true;
1389         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1390
1391         errno = 0;
1392         while (ret == -1 && errno != EINVAL)
1393                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1394         ASSERT_EQ(0, ret) {
1395                 kill(tracee, SIGKILL);
1396         }
1397         /* Wait for attach stop */
1398         wait(NULL);
1399
1400         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1401                                                       PTRACE_O_TRACESYSGOOD :
1402                                                       PTRACE_O_TRACESECCOMP);
1403         ASSERT_EQ(0, ret) {
1404                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1405                 kill(tracee, SIGKILL);
1406         }
1407         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1408                      tracee, NULL, 0);
1409         ASSERT_EQ(0, ret);
1410
1411         /* Unblock the tracee */
1412         ASSERT_EQ(1, write(fd, "A", 1));
1413         ASSERT_EQ(0, close(fd));
1414
1415         /* Run until we're shut down. Must assert to stop execution. */
1416         while (tracer_running) {
1417                 int status;
1418
1419                 if (wait(&status) != tracee)
1420                         continue;
1421                 if (WIFSIGNALED(status) || WIFEXITED(status))
1422                         /* Child is dead. Time to go. */
1423                         return;
1424
1425                 /* Check if this is a seccomp event. */
1426                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1427
1428                 tracer_func(_metadata, tracee, status, args);
1429
1430                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1431                              tracee, NULL, 0);
1432                 ASSERT_EQ(0, ret);
1433         }
1434         /* Directly report the status of our test harness results. */
1435         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1436 }
1437
1438 /* Common tracer setup/teardown functions. */
1439 void cont_handler(int num)
1440 { }
1441 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1442                           tracer_func_t func, void *args, bool ptrace_syscall)
1443 {
1444         char sync;
1445         int pipefd[2];
1446         pid_t tracer_pid;
1447         pid_t tracee = getpid();
1448
1449         /* Setup a pipe for clean synchronization. */
1450         ASSERT_EQ(0, pipe(pipefd));
1451
1452         /* Fork a child which we'll promote to tracer */
1453         tracer_pid = fork();
1454         ASSERT_LE(0, tracer_pid);
1455         signal(SIGALRM, cont_handler);
1456         if (tracer_pid == 0) {
1457                 close(pipefd[0]);
1458                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1459                              ptrace_syscall);
1460                 syscall(__NR_exit, 0);
1461         }
1462         close(pipefd[1]);
1463         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1464         read(pipefd[0], &sync, 1);
1465         close(pipefd[0]);
1466
1467         return tracer_pid;
1468 }
1469 void teardown_trace_fixture(struct __test_metadata *_metadata,
1470                             pid_t tracer)
1471 {
1472         if (tracer) {
1473                 int status;
1474                 /*
1475                  * Extract the exit code from the other process and
1476                  * adopt it for ourselves in case its asserts failed.
1477                  */
1478                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1479                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1480                 if (WEXITSTATUS(status))
1481                         _metadata->passed = 0;
1482         }
1483 }
1484
1485 /* "poke" tracer arguments and function. */
1486 struct tracer_args_poke_t {
1487         unsigned long poke_addr;
1488 };
1489
1490 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1491                  void *args)
1492 {
1493         int ret;
1494         unsigned long msg;
1495         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1496
1497         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1498         EXPECT_EQ(0, ret);
1499         /* If this fails, don't try to recover. */
1500         ASSERT_EQ(0x1001, msg) {
1501                 kill(tracee, SIGKILL);
1502         }
1503         /*
1504          * Poke in the message.
1505          * Registers are not touched to try to keep this relatively arch
1506          * agnostic.
1507          */
1508         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1509         EXPECT_EQ(0, ret);
1510 }
1511
1512 FIXTURE_DATA(TRACE_poke) {
1513         struct sock_fprog prog;
1514         pid_t tracer;
1515         long poked;
1516         struct tracer_args_poke_t tracer_args;
1517 };
1518
1519 FIXTURE_SETUP(TRACE_poke)
1520 {
1521         struct sock_filter filter[] = {
1522                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1523                         offsetof(struct seccomp_data, nr)),
1524                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1525                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1526                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1527         };
1528
1529         self->poked = 0;
1530         memset(&self->prog, 0, sizeof(self->prog));
1531         self->prog.filter = malloc(sizeof(filter));
1532         ASSERT_NE(NULL, self->prog.filter);
1533         memcpy(self->prog.filter, filter, sizeof(filter));
1534         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1535
1536         /* Set up tracer args. */
1537         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1538
1539         /* Launch tracer. */
1540         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1541                                            &self->tracer_args, false);
1542 }
1543
1544 FIXTURE_TEARDOWN(TRACE_poke)
1545 {
1546         teardown_trace_fixture(_metadata, self->tracer);
1547         if (self->prog.filter)
1548                 free(self->prog.filter);
1549 }
1550
1551 TEST_F(TRACE_poke, read_has_side_effects)
1552 {
1553         ssize_t ret;
1554
1555         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1556         ASSERT_EQ(0, ret);
1557
1558         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1559         ASSERT_EQ(0, ret);
1560
1561         EXPECT_EQ(0, self->poked);
1562         ret = read(-1, NULL, 0);
1563         EXPECT_EQ(-1, ret);
1564         EXPECT_EQ(0x1001, self->poked);
1565 }
1566
1567 TEST_F(TRACE_poke, getpid_runs_normally)
1568 {
1569         long ret;
1570
1571         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1572         ASSERT_EQ(0, ret);
1573
1574         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1575         ASSERT_EQ(0, ret);
1576
1577         EXPECT_EQ(0, self->poked);
1578         EXPECT_NE(0, syscall(__NR_getpid));
1579         EXPECT_EQ(0, self->poked);
1580 }
1581
1582 #if defined(__x86_64__)
1583 # define ARCH_REGS      struct user_regs_struct
1584 # define SYSCALL_NUM    orig_rax
1585 # define SYSCALL_RET    rax
1586 #elif defined(__i386__)
1587 # define ARCH_REGS      struct user_regs_struct
1588 # define SYSCALL_NUM    orig_eax
1589 # define SYSCALL_RET    eax
1590 #elif defined(__arm__)
1591 # define ARCH_REGS      struct pt_regs
1592 # define SYSCALL_NUM    ARM_r7
1593 # define SYSCALL_RET    ARM_r0
1594 #elif defined(__aarch64__)
1595 # define ARCH_REGS      struct user_pt_regs
1596 # define SYSCALL_NUM    regs[8]
1597 # define SYSCALL_RET    regs[0]
1598 #elif defined(__riscv) && __riscv_xlen == 64
1599 # define ARCH_REGS      struct user_regs_struct
1600 # define SYSCALL_NUM    a7
1601 # define SYSCALL_RET    a0
1602 #elif defined(__hppa__)
1603 # define ARCH_REGS      struct user_regs_struct
1604 # define SYSCALL_NUM    gr[20]
1605 # define SYSCALL_RET    gr[28]
1606 #elif defined(__powerpc__)
1607 # define ARCH_REGS      struct pt_regs
1608 # define SYSCALL_NUM    gpr[0]
1609 # define SYSCALL_RET    gpr[3]
1610 #elif defined(__s390__)
1611 # define ARCH_REGS     s390_regs
1612 # define SYSCALL_NUM   gprs[2]
1613 # define SYSCALL_RET   gprs[2]
1614 #elif defined(__mips__)
1615 # define ARCH_REGS      struct pt_regs
1616 # define SYSCALL_NUM    regs[2]
1617 # define SYSCALL_SYSCALL_NUM regs[4]
1618 # define SYSCALL_RET    regs[2]
1619 # define SYSCALL_NUM_RET_SHARE_REG
1620 #else
1621 # error "Do not know how to find your architecture's registers and syscalls"
1622 #endif
1623
1624 /* When the syscall return can't be changed, stub out the tests for it. */
1625 #ifdef SYSCALL_NUM_RET_SHARE_REG
1626 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1627 #else
1628 # define EXPECT_SYSCALL_RETURN(val, action)             \
1629         do {                                            \
1630                 errno = 0;                              \
1631                 if (val < 0) {                          \
1632                         EXPECT_EQ(-1, action);          \
1633                         EXPECT_EQ(-(val), errno);       \
1634                 } else {                                \
1635                         EXPECT_EQ(val, action);         \
1636                 }                                       \
1637         } while (0)
1638 #endif
1639
1640 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1641  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1642  */
1643 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1644 #define HAVE_GETREGS
1645 #endif
1646
1647 /* Architecture-specific syscall fetching routine. */
1648 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1649 {
1650         ARCH_REGS regs;
1651 #ifdef HAVE_GETREGS
1652         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1653                 TH_LOG("PTRACE_GETREGS failed");
1654                 return -1;
1655         }
1656 #else
1657         struct iovec iov;
1658
1659         iov.iov_base = &regs;
1660         iov.iov_len = sizeof(regs);
1661         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1662                 TH_LOG("PTRACE_GETREGSET failed");
1663                 return -1;
1664         }
1665 #endif
1666
1667 #if defined(__mips__)
1668         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1669                 return regs.SYSCALL_SYSCALL_NUM;
1670 #endif
1671         return regs.SYSCALL_NUM;
1672 }
1673
1674 /* Architecture-specific syscall changing routine. */
1675 void change_syscall(struct __test_metadata *_metadata,
1676                     pid_t tracee, int syscall, int result)
1677 {
1678         int ret;
1679         ARCH_REGS regs;
1680 #ifdef HAVE_GETREGS
1681         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1682 #else
1683         struct iovec iov;
1684         iov.iov_base = &regs;
1685         iov.iov_len = sizeof(regs);
1686         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1687 #endif
1688         EXPECT_EQ(0, ret) {}
1689
1690 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1691         defined(__s390__) || defined(__hppa__) || defined(__riscv)
1692         {
1693                 regs.SYSCALL_NUM = syscall;
1694         }
1695 #elif defined(__mips__)
1696         {
1697                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1698                         regs.SYSCALL_SYSCALL_NUM = syscall;
1699                 else
1700                         regs.SYSCALL_NUM = syscall;
1701         }
1702
1703 #elif defined(__arm__)
1704 # ifndef PTRACE_SET_SYSCALL
1705 #  define PTRACE_SET_SYSCALL   23
1706 # endif
1707         {
1708                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1709                 EXPECT_EQ(0, ret);
1710         }
1711
1712 #elif defined(__aarch64__)
1713 # ifndef NT_ARM_SYSTEM_CALL
1714 #  define NT_ARM_SYSTEM_CALL 0x404
1715 # endif
1716         {
1717                 iov.iov_base = &syscall;
1718                 iov.iov_len = sizeof(syscall);
1719                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1720                              &iov);
1721                 EXPECT_EQ(0, ret);
1722         }
1723
1724 #else
1725         ASSERT_EQ(1, 0) {
1726                 TH_LOG("How is the syscall changed on this architecture?");
1727         }
1728 #endif
1729
1730         /* If syscall is skipped, change return value. */
1731         if (syscall == -1)
1732 #ifdef SYSCALL_NUM_RET_SHARE_REG
1733                 TH_LOG("Can't modify syscall return on this architecture");
1734 #else
1735                 regs.SYSCALL_RET = result;
1736 #endif
1737
1738 #ifdef HAVE_GETREGS
1739         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1740 #else
1741         iov.iov_base = &regs;
1742         iov.iov_len = sizeof(regs);
1743         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1744 #endif
1745         EXPECT_EQ(0, ret);
1746 }
1747
1748 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1749                     int status, void *args)
1750 {
1751         int ret;
1752         unsigned long msg;
1753
1754         /* Make sure we got the right message. */
1755         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1756         EXPECT_EQ(0, ret);
1757
1758         /* Validate and take action on expected syscalls. */
1759         switch (msg) {
1760         case 0x1002:
1761                 /* change getpid to getppid. */
1762                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1763                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1764                 break;
1765         case 0x1003:
1766                 /* skip gettid with valid return code. */
1767                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1768                 change_syscall(_metadata, tracee, -1, 45000);
1769                 break;
1770         case 0x1004:
1771                 /* skip openat with error. */
1772                 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1773                 change_syscall(_metadata, tracee, -1, -ESRCH);
1774                 break;
1775         case 0x1005:
1776                 /* do nothing (allow getppid) */
1777                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1778                 break;
1779         default:
1780                 EXPECT_EQ(0, msg) {
1781                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1782                         kill(tracee, SIGKILL);
1783                 }
1784         }
1785
1786 }
1787
1788 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1789                    int status, void *args)
1790 {
1791         int ret, nr;
1792         unsigned long msg;
1793         static bool entry;
1794
1795         /*
1796          * The traditional way to tell PTRACE_SYSCALL entry/exit
1797          * is by counting.
1798          */
1799         entry = !entry;
1800
1801         /* Make sure we got an appropriate message. */
1802         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1803         EXPECT_EQ(0, ret);
1804         EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1805                         : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1806
1807         if (!entry)
1808                 return;
1809
1810         nr = get_syscall(_metadata, tracee);
1811
1812         if (nr == __NR_getpid)
1813                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1814         if (nr == __NR_gettid)
1815                 change_syscall(_metadata, tracee, -1, 45000);
1816         if (nr == __NR_openat)
1817                 change_syscall(_metadata, tracee, -1, -ESRCH);
1818 }
1819
1820 FIXTURE_DATA(TRACE_syscall) {
1821         struct sock_fprog prog;
1822         pid_t tracer, mytid, mypid, parent;
1823 };
1824
1825 FIXTURE_SETUP(TRACE_syscall)
1826 {
1827         struct sock_filter filter[] = {
1828                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1829                         offsetof(struct seccomp_data, nr)),
1830                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1831                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1832                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1833                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1834                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1835                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1836                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1837                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1838                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1839         };
1840
1841         memset(&self->prog, 0, sizeof(self->prog));
1842         self->prog.filter = malloc(sizeof(filter));
1843         ASSERT_NE(NULL, self->prog.filter);
1844         memcpy(self->prog.filter, filter, sizeof(filter));
1845         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1846
1847         /* Prepare some testable syscall results. */
1848         self->mytid = syscall(__NR_gettid);
1849         ASSERT_GT(self->mytid, 0);
1850         ASSERT_NE(self->mytid, 1) {
1851                 TH_LOG("Running this test as init is not supported. :)");
1852         }
1853
1854         self->mypid = getpid();
1855         ASSERT_GT(self->mypid, 0);
1856         ASSERT_EQ(self->mytid, self->mypid);
1857
1858         self->parent = getppid();
1859         ASSERT_GT(self->parent, 0);
1860         ASSERT_NE(self->parent, self->mypid);
1861
1862         /* Launch tracer. */
1863         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1864                                            false);
1865 }
1866
1867 FIXTURE_TEARDOWN(TRACE_syscall)
1868 {
1869         teardown_trace_fixture(_metadata, self->tracer);
1870         if (self->prog.filter)
1871                 free(self->prog.filter);
1872 }
1873
1874 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1875 {
1876         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1877         teardown_trace_fixture(_metadata, self->tracer);
1878         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1879                                            true);
1880
1881         /* Tracer will redirect getpid to getppid. */
1882         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1883 }
1884
1885 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1886 {
1887         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1888         teardown_trace_fixture(_metadata, self->tracer);
1889         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1890                                            true);
1891
1892         /* Tracer should skip the open syscall, resulting in ESRCH. */
1893         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1894 }
1895
1896 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1897 {
1898         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1899         teardown_trace_fixture(_metadata, self->tracer);
1900         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1901                                            true);
1902
1903         /* Tracer should skip the gettid syscall, resulting fake pid. */
1904         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1905 }
1906
1907 TEST_F(TRACE_syscall, syscall_allowed)
1908 {
1909         long ret;
1910
1911         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1912         ASSERT_EQ(0, ret);
1913
1914         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1915         ASSERT_EQ(0, ret);
1916
1917         /* getppid works as expected (no changes). */
1918         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1919         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1920 }
1921
1922 TEST_F(TRACE_syscall, syscall_redirected)
1923 {
1924         long ret;
1925
1926         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1927         ASSERT_EQ(0, ret);
1928
1929         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1930         ASSERT_EQ(0, ret);
1931
1932         /* getpid has been redirected to getppid as expected. */
1933         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1934         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1935 }
1936
1937 TEST_F(TRACE_syscall, syscall_errno)
1938 {
1939         long ret;
1940
1941         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1942         ASSERT_EQ(0, ret);
1943
1944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1945         ASSERT_EQ(0, ret);
1946
1947         /* openat has been skipped and an errno return. */
1948         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1949 }
1950
1951 TEST_F(TRACE_syscall, syscall_faked)
1952 {
1953         long ret;
1954
1955         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1956         ASSERT_EQ(0, ret);
1957
1958         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1959         ASSERT_EQ(0, ret);
1960
1961         /* gettid has been skipped and an altered return value stored. */
1962         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1963 }
1964
1965 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1966 {
1967         struct sock_filter filter[] = {
1968                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1969                         offsetof(struct seccomp_data, nr)),
1970                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1971                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1972                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1973         };
1974         struct sock_fprog prog = {
1975                 .len = (unsigned short)ARRAY_SIZE(filter),
1976                 .filter = filter,
1977         };
1978         long ret;
1979
1980         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1981         ASSERT_EQ(0, ret);
1982
1983         /* Install fixture filter. */
1984         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1985         ASSERT_EQ(0, ret);
1986
1987         /* Install "errno on getppid" filter. */
1988         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1989         ASSERT_EQ(0, ret);
1990
1991         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1992         errno = 0;
1993         EXPECT_EQ(-1, syscall(__NR_getpid));
1994         EXPECT_EQ(EPERM, errno);
1995 }
1996
1997 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1998 {
1999         struct sock_filter filter[] = {
2000                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2001                         offsetof(struct seccomp_data, nr)),
2002                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2003                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2004                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2005         };
2006         struct sock_fprog prog = {
2007                 .len = (unsigned short)ARRAY_SIZE(filter),
2008                 .filter = filter,
2009         };
2010         long ret;
2011
2012         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2013         ASSERT_EQ(0, ret);
2014
2015         /* Install fixture filter. */
2016         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2017         ASSERT_EQ(0, ret);
2018
2019         /* Install "death on getppid" filter. */
2020         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2021         ASSERT_EQ(0, ret);
2022
2023         /* Tracer will redirect getpid to getppid, and we should die. */
2024         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2025 }
2026
2027 TEST_F(TRACE_syscall, skip_after_ptrace)
2028 {
2029         struct sock_filter filter[] = {
2030                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2031                         offsetof(struct seccomp_data, nr)),
2032                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2033                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2034                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2035         };
2036         struct sock_fprog prog = {
2037                 .len = (unsigned short)ARRAY_SIZE(filter),
2038                 .filter = filter,
2039         };
2040         long ret;
2041
2042         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2043         teardown_trace_fixture(_metadata, self->tracer);
2044         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2045                                            true);
2046
2047         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2048         ASSERT_EQ(0, ret);
2049
2050         /* Install "errno on getppid" filter. */
2051         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2052         ASSERT_EQ(0, ret);
2053
2054         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2055         EXPECT_EQ(-1, syscall(__NR_getpid));
2056         EXPECT_EQ(EPERM, errno);
2057 }
2058
2059 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2060 {
2061         struct sock_filter filter[] = {
2062                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2063                         offsetof(struct seccomp_data, nr)),
2064                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2065                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2066                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2067         };
2068         struct sock_fprog prog = {
2069                 .len = (unsigned short)ARRAY_SIZE(filter),
2070                 .filter = filter,
2071         };
2072         long ret;
2073
2074         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2075         teardown_trace_fixture(_metadata, self->tracer);
2076         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2077                                            true);
2078
2079         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2080         ASSERT_EQ(0, ret);
2081
2082         /* Install "death on getppid" filter. */
2083         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2084         ASSERT_EQ(0, ret);
2085
2086         /* Tracer will redirect getpid to getppid, and we should die. */
2087         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2088 }
2089
2090 TEST(seccomp_syscall)
2091 {
2092         struct sock_filter filter[] = {
2093                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2094         };
2095         struct sock_fprog prog = {
2096                 .len = (unsigned short)ARRAY_SIZE(filter),
2097                 .filter = filter,
2098         };
2099         long ret;
2100
2101         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2102         ASSERT_EQ(0, ret) {
2103                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2104         }
2105
2106         /* Reject insane operation. */
2107         ret = seccomp(-1, 0, &prog);
2108         ASSERT_NE(ENOSYS, errno) {
2109                 TH_LOG("Kernel does not support seccomp syscall!");
2110         }
2111         EXPECT_EQ(EINVAL, errno) {
2112                 TH_LOG("Did not reject crazy op value!");
2113         }
2114
2115         /* Reject strict with flags or pointer. */
2116         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2117         EXPECT_EQ(EINVAL, errno) {
2118                 TH_LOG("Did not reject mode strict with flags!");
2119         }
2120         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2121         EXPECT_EQ(EINVAL, errno) {
2122                 TH_LOG("Did not reject mode strict with uargs!");
2123         }
2124
2125         /* Reject insane args for filter. */
2126         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2127         EXPECT_EQ(EINVAL, errno) {
2128                 TH_LOG("Did not reject crazy filter flags!");
2129         }
2130         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2131         EXPECT_EQ(EFAULT, errno) {
2132                 TH_LOG("Did not reject NULL filter!");
2133         }
2134
2135         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2136         EXPECT_EQ(0, errno) {
2137                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2138                         strerror(errno));
2139         }
2140 }
2141
2142 TEST(seccomp_syscall_mode_lock)
2143 {
2144         struct sock_filter filter[] = {
2145                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2146         };
2147         struct sock_fprog prog = {
2148                 .len = (unsigned short)ARRAY_SIZE(filter),
2149                 .filter = filter,
2150         };
2151         long ret;
2152
2153         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2154         ASSERT_EQ(0, ret) {
2155                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2156         }
2157
2158         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2159         ASSERT_NE(ENOSYS, errno) {
2160                 TH_LOG("Kernel does not support seccomp syscall!");
2161         }
2162         EXPECT_EQ(0, ret) {
2163                 TH_LOG("Could not install filter!");
2164         }
2165
2166         /* Make sure neither entry point will switch to strict. */
2167         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2168         EXPECT_EQ(EINVAL, errno) {
2169                 TH_LOG("Switched to mode strict!");
2170         }
2171
2172         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2173         EXPECT_EQ(EINVAL, errno) {
2174                 TH_LOG("Switched to mode strict!");
2175         }
2176 }
2177
2178 /*
2179  * Test detection of known and unknown filter flags. Userspace needs to be able
2180  * to check if a filter flag is supported by the current kernel and a good way
2181  * of doing that is by attempting to enter filter mode, with the flag bit in
2182  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2183  * that the flag is valid and EINVAL indicates that the flag is invalid.
2184  */
2185 TEST(detect_seccomp_filter_flags)
2186 {
2187         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2188                                  SECCOMP_FILTER_FLAG_LOG,
2189                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2190                                  SECCOMP_FILTER_FLAG_NEW_LISTENER };
2191         unsigned int exclusive[] = {
2192                                 SECCOMP_FILTER_FLAG_TSYNC,
2193                                 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2194         unsigned int flag, all_flags, exclusive_mask;
2195         int i;
2196         long ret;
2197
2198         /* Test detection of individual known-good filter flags */
2199         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2200                 int bits = 0;
2201
2202                 flag = flags[i];
2203                 /* Make sure the flag is a single bit! */
2204                 while (flag) {
2205                         if (flag & 0x1)
2206                                 bits ++;
2207                         flag >>= 1;
2208                 }
2209                 ASSERT_EQ(1, bits);
2210                 flag = flags[i];
2211
2212                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2213                 ASSERT_NE(ENOSYS, errno) {
2214                         TH_LOG("Kernel does not support seccomp syscall!");
2215                 }
2216                 EXPECT_EQ(-1, ret);
2217                 EXPECT_EQ(EFAULT, errno) {
2218                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2219                                flag);
2220                 }
2221
2222                 all_flags |= flag;
2223         }
2224
2225         /*
2226          * Test detection of all known-good filter flags combined. But
2227          * for the exclusive flags we need to mask them out and try them
2228          * individually for the "all flags" testing.
2229          */
2230         exclusive_mask = 0;
2231         for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2232                 exclusive_mask |= exclusive[i];
2233         for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2234                 flag = all_flags & ~exclusive_mask;
2235                 flag |= exclusive[i];
2236
2237                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2238                 EXPECT_EQ(-1, ret);
2239                 EXPECT_EQ(EFAULT, errno) {
2240                         TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2241                                flag);
2242                 }
2243         }
2244
2245         /* Test detection of an unknown filter flags, without exclusives. */
2246         flag = -1;
2247         flag &= ~exclusive_mask;
2248         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2249         EXPECT_EQ(-1, ret);
2250         EXPECT_EQ(EINVAL, errno) {
2251                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2252                        flag);
2253         }
2254
2255         /*
2256          * Test detection of an unknown filter flag that may simply need to be
2257          * added to this test
2258          */
2259         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2260         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2261         EXPECT_EQ(-1, ret);
2262         EXPECT_EQ(EINVAL, errno) {
2263                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2264                        flag);
2265         }
2266 }
2267
2268 TEST(TSYNC_first)
2269 {
2270         struct sock_filter filter[] = {
2271                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2272         };
2273         struct sock_fprog prog = {
2274                 .len = (unsigned short)ARRAY_SIZE(filter),
2275                 .filter = filter,
2276         };
2277         long ret;
2278
2279         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2280         ASSERT_EQ(0, ret) {
2281                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2282         }
2283
2284         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2285                       &prog);
2286         ASSERT_NE(ENOSYS, errno) {
2287                 TH_LOG("Kernel does not support seccomp syscall!");
2288         }
2289         EXPECT_EQ(0, ret) {
2290                 TH_LOG("Could not install initial filter with TSYNC!");
2291         }
2292 }
2293
2294 #define TSYNC_SIBLINGS 2
2295 struct tsync_sibling {
2296         pthread_t tid;
2297         pid_t system_tid;
2298         sem_t *started;
2299         pthread_cond_t *cond;
2300         pthread_mutex_t *mutex;
2301         int diverge;
2302         int num_waits;
2303         struct sock_fprog *prog;
2304         struct __test_metadata *metadata;
2305 };
2306
2307 /*
2308  * To avoid joining joined threads (which is not allowed by Bionic),
2309  * make sure we both successfully join and clear the tid to skip a
2310  * later join attempt during fixture teardown. Any remaining threads
2311  * will be directly killed during teardown.
2312  */
2313 #define PTHREAD_JOIN(tid, status)                                       \
2314         do {                                                            \
2315                 int _rc = pthread_join(tid, status);                    \
2316                 if (_rc) {                                              \
2317                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2318                                 (unsigned int)tid, _rc);                \
2319                 } else {                                                \
2320                         tid = 0;                                        \
2321                 }                                                       \
2322         } while (0)
2323
2324 FIXTURE_DATA(TSYNC) {
2325         struct sock_fprog root_prog, apply_prog;
2326         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2327         sem_t started;
2328         pthread_cond_t cond;
2329         pthread_mutex_t mutex;
2330         int sibling_count;
2331 };
2332
2333 FIXTURE_SETUP(TSYNC)
2334 {
2335         struct sock_filter root_filter[] = {
2336                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2337         };
2338         struct sock_filter apply_filter[] = {
2339                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2340                         offsetof(struct seccomp_data, nr)),
2341                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2342                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2343                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2344         };
2345
2346         memset(&self->root_prog, 0, sizeof(self->root_prog));
2347         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2348         memset(&self->sibling, 0, sizeof(self->sibling));
2349         self->root_prog.filter = malloc(sizeof(root_filter));
2350         ASSERT_NE(NULL, self->root_prog.filter);
2351         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2352         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2353
2354         self->apply_prog.filter = malloc(sizeof(apply_filter));
2355         ASSERT_NE(NULL, self->apply_prog.filter);
2356         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2357         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2358
2359         self->sibling_count = 0;
2360         pthread_mutex_init(&self->mutex, NULL);
2361         pthread_cond_init(&self->cond, NULL);
2362         sem_init(&self->started, 0, 0);
2363         self->sibling[0].tid = 0;
2364         self->sibling[0].cond = &self->cond;
2365         self->sibling[0].started = &self->started;
2366         self->sibling[0].mutex = &self->mutex;
2367         self->sibling[0].diverge = 0;
2368         self->sibling[0].num_waits = 1;
2369         self->sibling[0].prog = &self->root_prog;
2370         self->sibling[0].metadata = _metadata;
2371         self->sibling[1].tid = 0;
2372         self->sibling[1].cond = &self->cond;
2373         self->sibling[1].started = &self->started;
2374         self->sibling[1].mutex = &self->mutex;
2375         self->sibling[1].diverge = 0;
2376         self->sibling[1].prog = &self->root_prog;
2377         self->sibling[1].num_waits = 1;
2378         self->sibling[1].metadata = _metadata;
2379 }
2380
2381 FIXTURE_TEARDOWN(TSYNC)
2382 {
2383         int sib = 0;
2384
2385         if (self->root_prog.filter)
2386                 free(self->root_prog.filter);
2387         if (self->apply_prog.filter)
2388                 free(self->apply_prog.filter);
2389
2390         for ( ; sib < self->sibling_count; ++sib) {
2391                 struct tsync_sibling *s = &self->sibling[sib];
2392
2393                 if (!s->tid)
2394                         continue;
2395                 /*
2396                  * If a thread is still running, it may be stuck, so hit
2397                  * it over the head really hard.
2398                  */
2399                 pthread_kill(s->tid, 9);
2400         }
2401         pthread_mutex_destroy(&self->mutex);
2402         pthread_cond_destroy(&self->cond);
2403         sem_destroy(&self->started);
2404 }
2405
2406 void *tsync_sibling(void *data)
2407 {
2408         long ret = 0;
2409         struct tsync_sibling *me = data;
2410
2411         me->system_tid = syscall(__NR_gettid);
2412
2413         pthread_mutex_lock(me->mutex);
2414         if (me->diverge) {
2415                 /* Just re-apply the root prog to fork the tree */
2416                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2417                                 me->prog, 0, 0);
2418         }
2419         sem_post(me->started);
2420         /* Return outside of started so parent notices failures. */
2421         if (ret) {
2422                 pthread_mutex_unlock(me->mutex);
2423                 return (void *)SIBLING_EXIT_FAILURE;
2424         }
2425         do {
2426                 pthread_cond_wait(me->cond, me->mutex);
2427                 me->num_waits = me->num_waits - 1;
2428         } while (me->num_waits);
2429         pthread_mutex_unlock(me->mutex);
2430
2431         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2432         if (!ret)
2433                 return (void *)SIBLING_EXIT_NEWPRIVS;
2434         read(0, NULL, 0);
2435         return (void *)SIBLING_EXIT_UNKILLED;
2436 }
2437
2438 void tsync_start_sibling(struct tsync_sibling *sibling)
2439 {
2440         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2441 }
2442
2443 TEST_F(TSYNC, siblings_fail_prctl)
2444 {
2445         long ret;
2446         void *status;
2447         struct sock_filter filter[] = {
2448                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2449                         offsetof(struct seccomp_data, nr)),
2450                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2451                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2452                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2453         };
2454         struct sock_fprog prog = {
2455                 .len = (unsigned short)ARRAY_SIZE(filter),
2456                 .filter = filter,
2457         };
2458
2459         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2460                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2461         }
2462
2463         /* Check prctl failure detection by requesting sib 0 diverge. */
2464         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2465         ASSERT_NE(ENOSYS, errno) {
2466                 TH_LOG("Kernel does not support seccomp syscall!");
2467         }
2468         ASSERT_EQ(0, ret) {
2469                 TH_LOG("setting filter failed");
2470         }
2471
2472         self->sibling[0].diverge = 1;
2473         tsync_start_sibling(&self->sibling[0]);
2474         tsync_start_sibling(&self->sibling[1]);
2475
2476         while (self->sibling_count < TSYNC_SIBLINGS) {
2477                 sem_wait(&self->started);
2478                 self->sibling_count++;
2479         }
2480
2481         /* Signal the threads to clean up*/
2482         pthread_mutex_lock(&self->mutex);
2483         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2484                 TH_LOG("cond broadcast non-zero");
2485         }
2486         pthread_mutex_unlock(&self->mutex);
2487
2488         /* Ensure diverging sibling failed to call prctl. */
2489         PTHREAD_JOIN(self->sibling[0].tid, &status);
2490         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2491         PTHREAD_JOIN(self->sibling[1].tid, &status);
2492         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2493 }
2494
2495 TEST_F(TSYNC, two_siblings_with_ancestor)
2496 {
2497         long ret;
2498         void *status;
2499
2500         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2501                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2502         }
2503
2504         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2505         ASSERT_NE(ENOSYS, errno) {
2506                 TH_LOG("Kernel does not support seccomp syscall!");
2507         }
2508         ASSERT_EQ(0, ret) {
2509                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2510         }
2511         tsync_start_sibling(&self->sibling[0]);
2512         tsync_start_sibling(&self->sibling[1]);
2513
2514         while (self->sibling_count < TSYNC_SIBLINGS) {
2515                 sem_wait(&self->started);
2516                 self->sibling_count++;
2517         }
2518
2519         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2520                       &self->apply_prog);
2521         ASSERT_EQ(0, ret) {
2522                 TH_LOG("Could install filter on all threads!");
2523         }
2524         /* Tell the siblings to test the policy */
2525         pthread_mutex_lock(&self->mutex);
2526         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2527                 TH_LOG("cond broadcast non-zero");
2528         }
2529         pthread_mutex_unlock(&self->mutex);
2530         /* Ensure they are both killed and don't exit cleanly. */
2531         PTHREAD_JOIN(self->sibling[0].tid, &status);
2532         EXPECT_EQ(0x0, (long)status);
2533         PTHREAD_JOIN(self->sibling[1].tid, &status);
2534         EXPECT_EQ(0x0, (long)status);
2535 }
2536
2537 TEST_F(TSYNC, two_sibling_want_nnp)
2538 {
2539         void *status;
2540
2541         /* start siblings before any prctl() operations */
2542         tsync_start_sibling(&self->sibling[0]);
2543         tsync_start_sibling(&self->sibling[1]);
2544         while (self->sibling_count < TSYNC_SIBLINGS) {
2545                 sem_wait(&self->started);
2546                 self->sibling_count++;
2547         }
2548
2549         /* Tell the siblings to test no policy */
2550         pthread_mutex_lock(&self->mutex);
2551         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2552                 TH_LOG("cond broadcast non-zero");
2553         }
2554         pthread_mutex_unlock(&self->mutex);
2555
2556         /* Ensure they are both upset about lacking nnp. */
2557         PTHREAD_JOIN(self->sibling[0].tid, &status);
2558         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2559         PTHREAD_JOIN(self->sibling[1].tid, &status);
2560         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2561 }
2562
2563 TEST_F(TSYNC, two_siblings_with_no_filter)
2564 {
2565         long ret;
2566         void *status;
2567
2568         /* start siblings before any prctl() operations */
2569         tsync_start_sibling(&self->sibling[0]);
2570         tsync_start_sibling(&self->sibling[1]);
2571         while (self->sibling_count < TSYNC_SIBLINGS) {
2572                 sem_wait(&self->started);
2573                 self->sibling_count++;
2574         }
2575
2576         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2577                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2578         }
2579
2580         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2581                       &self->apply_prog);
2582         ASSERT_NE(ENOSYS, errno) {
2583                 TH_LOG("Kernel does not support seccomp syscall!");
2584         }
2585         ASSERT_EQ(0, ret) {
2586                 TH_LOG("Could install filter on all threads!");
2587         }
2588
2589         /* Tell the siblings to test the policy */
2590         pthread_mutex_lock(&self->mutex);
2591         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2592                 TH_LOG("cond broadcast non-zero");
2593         }
2594         pthread_mutex_unlock(&self->mutex);
2595
2596         /* Ensure they are both killed and don't exit cleanly. */
2597         PTHREAD_JOIN(self->sibling[0].tid, &status);
2598         EXPECT_EQ(0x0, (long)status);
2599         PTHREAD_JOIN(self->sibling[1].tid, &status);
2600         EXPECT_EQ(0x0, (long)status);
2601 }
2602
2603 TEST_F(TSYNC, two_siblings_with_one_divergence)
2604 {
2605         long ret;
2606         void *status;
2607
2608         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2609                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2610         }
2611
2612         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2613         ASSERT_NE(ENOSYS, errno) {
2614                 TH_LOG("Kernel does not support seccomp syscall!");
2615         }
2616         ASSERT_EQ(0, ret) {
2617                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2618         }
2619         self->sibling[0].diverge = 1;
2620         tsync_start_sibling(&self->sibling[0]);
2621         tsync_start_sibling(&self->sibling[1]);
2622
2623         while (self->sibling_count < TSYNC_SIBLINGS) {
2624                 sem_wait(&self->started);
2625                 self->sibling_count++;
2626         }
2627
2628         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2629                       &self->apply_prog);
2630         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2631                 TH_LOG("Did not fail on diverged sibling.");
2632         }
2633
2634         /* Wake the threads */
2635         pthread_mutex_lock(&self->mutex);
2636         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2637                 TH_LOG("cond broadcast non-zero");
2638         }
2639         pthread_mutex_unlock(&self->mutex);
2640
2641         /* Ensure they are both unkilled. */
2642         PTHREAD_JOIN(self->sibling[0].tid, &status);
2643         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2644         PTHREAD_JOIN(self->sibling[1].tid, &status);
2645         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2646 }
2647
2648 TEST_F(TSYNC, two_siblings_not_under_filter)
2649 {
2650         long ret, sib;
2651         void *status;
2652         struct timespec delay = { .tv_nsec = 100000000 };
2653
2654         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2655                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2656         }
2657
2658         /*
2659          * Sibling 0 will have its own seccomp policy
2660          * and Sibling 1 will not be under seccomp at
2661          * all. Sibling 1 will enter seccomp and 0
2662          * will cause failure.
2663          */
2664         self->sibling[0].diverge = 1;
2665         tsync_start_sibling(&self->sibling[0]);
2666         tsync_start_sibling(&self->sibling[1]);
2667
2668         while (self->sibling_count < TSYNC_SIBLINGS) {
2669                 sem_wait(&self->started);
2670                 self->sibling_count++;
2671         }
2672
2673         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2674         ASSERT_NE(ENOSYS, errno) {
2675                 TH_LOG("Kernel does not support seccomp syscall!");
2676         }
2677         ASSERT_EQ(0, ret) {
2678                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2679         }
2680
2681         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2682                       &self->apply_prog);
2683         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2684                 TH_LOG("Did not fail on diverged sibling.");
2685         }
2686         sib = 1;
2687         if (ret == self->sibling[0].system_tid)
2688                 sib = 0;
2689
2690         pthread_mutex_lock(&self->mutex);
2691
2692         /* Increment the other siblings num_waits so we can clean up
2693          * the one we just saw.
2694          */
2695         self->sibling[!sib].num_waits += 1;
2696
2697         /* Signal the thread to clean up*/
2698         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2699                 TH_LOG("cond broadcast non-zero");
2700         }
2701         pthread_mutex_unlock(&self->mutex);
2702         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2703         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2704         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2705         while (!kill(self->sibling[sib].system_tid, 0))
2706                 nanosleep(&delay, NULL);
2707         /* Switch to the remaining sibling */
2708         sib = !sib;
2709
2710         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2711                       &self->apply_prog);
2712         ASSERT_EQ(0, ret) {
2713                 TH_LOG("Expected the remaining sibling to sync");
2714         };
2715
2716         pthread_mutex_lock(&self->mutex);
2717
2718         /* If remaining sibling didn't have a chance to wake up during
2719          * the first broadcast, manually reduce the num_waits now.
2720          */
2721         if (self->sibling[sib].num_waits > 1)
2722                 self->sibling[sib].num_waits = 1;
2723         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2724                 TH_LOG("cond broadcast non-zero");
2725         }
2726         pthread_mutex_unlock(&self->mutex);
2727         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2728         EXPECT_EQ(0, (long)status);
2729         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2730         while (!kill(self->sibling[sib].system_tid, 0))
2731                 nanosleep(&delay, NULL);
2732
2733         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2734                       &self->apply_prog);
2735         ASSERT_EQ(0, ret);  /* just us chickens */
2736 }
2737
2738 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2739 TEST(syscall_restart)
2740 {
2741         long ret;
2742         unsigned long msg;
2743         pid_t child_pid;
2744         int pipefd[2];
2745         int status;
2746         siginfo_t info = { };
2747         struct sock_filter filter[] = {
2748                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2749                          offsetof(struct seccomp_data, nr)),
2750
2751 #ifdef __NR_sigreturn
2752                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2753 #endif
2754                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2755                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2756                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2757                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2758                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2759
2760                 /* Allow __NR_write for easy logging. */
2761                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2762                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2763                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2764                 /* The nanosleep jump target. */
2765                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2766                 /* The restart_syscall jump target. */
2767                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2768         };
2769         struct sock_fprog prog = {
2770                 .len = (unsigned short)ARRAY_SIZE(filter),
2771                 .filter = filter,
2772         };
2773 #if defined(__arm__)
2774         struct utsname utsbuf;
2775 #endif
2776
2777         ASSERT_EQ(0, pipe(pipefd));
2778
2779         child_pid = fork();
2780         ASSERT_LE(0, child_pid);
2781         if (child_pid == 0) {
2782                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2783                 char buf = ' ';
2784                 struct timespec timeout = { };
2785
2786                 /* Attach parent as tracer and stop. */
2787                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2788                 EXPECT_EQ(0, raise(SIGSTOP));
2789
2790                 EXPECT_EQ(0, close(pipefd[1]));
2791
2792                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2793                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2794                 }
2795
2796                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2797                 EXPECT_EQ(0, ret) {
2798                         TH_LOG("Failed to install filter!");
2799                 }
2800
2801                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2802                         TH_LOG("Failed to read() sync from parent");
2803                 }
2804                 EXPECT_EQ('.', buf) {
2805                         TH_LOG("Failed to get sync data from read()");
2806                 }
2807
2808                 /* Start nanosleep to be interrupted. */
2809                 timeout.tv_sec = 1;
2810                 errno = 0;
2811                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2812                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2813                 }
2814
2815                 /* Read final sync from parent. */
2816                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2817                         TH_LOG("Failed final read() from parent");
2818                 }
2819                 EXPECT_EQ('!', buf) {
2820                         TH_LOG("Failed to get final data from read()");
2821                 }
2822
2823                 /* Directly report the status of our test harness results. */
2824                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2825                                                      : EXIT_FAILURE);
2826         }
2827         EXPECT_EQ(0, close(pipefd[0]));
2828
2829         /* Attach to child, setup options, and release. */
2830         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2831         ASSERT_EQ(true, WIFSTOPPED(status));
2832         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2833                             PTRACE_O_TRACESECCOMP));
2834         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2835         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2836
2837         /* Wait for nanosleep() to start. */
2838         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2839         ASSERT_EQ(true, WIFSTOPPED(status));
2840         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2841         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2842         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2843         ASSERT_EQ(0x100, msg);
2844         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2845
2846         /* Might as well check siginfo for sanity while we're here. */
2847         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2848         ASSERT_EQ(SIGTRAP, info.si_signo);
2849         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2850         EXPECT_EQ(0, info.si_errno);
2851         EXPECT_EQ(getuid(), info.si_uid);
2852         /* Verify signal delivery came from child (seccomp-triggered). */
2853         EXPECT_EQ(child_pid, info.si_pid);
2854
2855         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2856         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2857         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2858         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2859         ASSERT_EQ(true, WIFSTOPPED(status));
2860         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2861         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2862         /*
2863          * There is no siginfo on SIGSTOP any more, so we can't verify
2864          * signal delivery came from parent now (getpid() == info.si_pid).
2865          * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2866          * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2867          */
2868         EXPECT_EQ(SIGSTOP, info.si_signo);
2869
2870         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2871         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2872         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2873         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2874         ASSERT_EQ(true, WIFSTOPPED(status));
2875         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2876         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2877
2878         /* Wait for restart_syscall() to start. */
2879         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2880         ASSERT_EQ(true, WIFSTOPPED(status));
2881         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2882         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2883         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2884
2885         ASSERT_EQ(0x200, msg);
2886         ret = get_syscall(_metadata, child_pid);
2887 #if defined(__arm__)
2888         /*
2889          * FIXME:
2890          * - native ARM registers do NOT expose true syscall.
2891          * - compat ARM registers on ARM64 DO expose true syscall.
2892          */
2893         ASSERT_EQ(0, uname(&utsbuf));
2894         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2895                 EXPECT_EQ(__NR_nanosleep, ret);
2896         } else
2897 #endif
2898         {
2899                 EXPECT_EQ(__NR_restart_syscall, ret);
2900         }
2901
2902         /* Write again to end test. */
2903         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2904         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2905         EXPECT_EQ(0, close(pipefd[1]));
2906
2907         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2908         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2909                 _metadata->passed = 0;
2910 }
2911
2912 TEST_SIGNAL(filter_flag_log, SIGSYS)
2913 {
2914         struct sock_filter allow_filter[] = {
2915                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2916         };
2917         struct sock_filter kill_filter[] = {
2918                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2919                         offsetof(struct seccomp_data, nr)),
2920                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2921                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2922                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2923         };
2924         struct sock_fprog allow_prog = {
2925                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
2926                 .filter = allow_filter,
2927         };
2928         struct sock_fprog kill_prog = {
2929                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
2930                 .filter = kill_filter,
2931         };
2932         long ret;
2933         pid_t parent = getppid();
2934
2935         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2936         ASSERT_EQ(0, ret);
2937
2938         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2939         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2940                       &allow_prog);
2941         ASSERT_NE(ENOSYS, errno) {
2942                 TH_LOG("Kernel does not support seccomp syscall!");
2943         }
2944         EXPECT_NE(0, ret) {
2945                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2946         }
2947         EXPECT_EQ(EINVAL, errno) {
2948                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2949         }
2950
2951         /* Verify that a simple, permissive filter can be added with no flags */
2952         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2953         EXPECT_EQ(0, ret);
2954
2955         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2956         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2957                       &allow_prog);
2958         ASSERT_NE(EINVAL, errno) {
2959                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2960         }
2961         EXPECT_EQ(0, ret);
2962
2963         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2964         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2965                       &kill_prog);
2966         EXPECT_EQ(0, ret);
2967
2968         EXPECT_EQ(parent, syscall(__NR_getppid));
2969         /* getpid() should never return. */
2970         EXPECT_EQ(0, syscall(__NR_getpid));
2971 }
2972
2973 TEST(get_action_avail)
2974 {
2975         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2976                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2977                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2978         __u32 unknown_action = 0x10000000U;
2979         int i;
2980         long ret;
2981
2982         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2983         ASSERT_NE(ENOSYS, errno) {
2984                 TH_LOG("Kernel does not support seccomp syscall!");
2985         }
2986         ASSERT_NE(EINVAL, errno) {
2987                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2988         }
2989         EXPECT_EQ(ret, 0);
2990
2991         for (i = 0; i < ARRAY_SIZE(actions); i++) {
2992                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2993                 EXPECT_EQ(ret, 0) {
2994                         TH_LOG("Expected action (0x%X) not available!",
2995                                actions[i]);
2996                 }
2997         }
2998
2999         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
3000         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3001         EXPECT_EQ(ret, -1);
3002         EXPECT_EQ(errno, EOPNOTSUPP);
3003 }
3004
3005 TEST(get_metadata)
3006 {
3007         pid_t pid;
3008         int pipefd[2];
3009         char buf;
3010         struct seccomp_metadata md;
3011         long ret;
3012
3013         /* Only real root can get metadata. */
3014         if (geteuid()) {
3015                 XFAIL(return, "get_metadata requires real root");
3016                 return;
3017         }
3018
3019         ASSERT_EQ(0, pipe(pipefd));
3020
3021         pid = fork();
3022         ASSERT_GE(pid, 0);
3023         if (pid == 0) {
3024                 struct sock_filter filter[] = {
3025                         BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3026                 };
3027                 struct sock_fprog prog = {
3028                         .len = (unsigned short)ARRAY_SIZE(filter),
3029                         .filter = filter,
3030                 };
3031
3032                 /* one with log, one without */
3033                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3034                                      SECCOMP_FILTER_FLAG_LOG, &prog));
3035                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3036
3037                 EXPECT_EQ(0, close(pipefd[0]));
3038                 ASSERT_EQ(1, write(pipefd[1], "1", 1));
3039                 ASSERT_EQ(0, close(pipefd[1]));
3040
3041                 while (1)
3042                         sleep(100);
3043         }
3044
3045         ASSERT_EQ(0, close(pipefd[1]));
3046         ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3047
3048         ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3049         ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3050
3051         /* Past here must not use ASSERT or child process is never killed. */
3052
3053         md.filter_off = 0;
3054         errno = 0;
3055         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3056         EXPECT_EQ(sizeof(md), ret) {
3057                 if (errno == EINVAL)
3058                         XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3059         }
3060
3061         EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3062         EXPECT_EQ(md.filter_off, 0);
3063
3064         md.filter_off = 1;
3065         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3066         EXPECT_EQ(sizeof(md), ret);
3067         EXPECT_EQ(md.flags, 0);
3068         EXPECT_EQ(md.filter_off, 1);
3069
3070 skip:
3071         ASSERT_EQ(0, kill(pid, SIGKILL));
3072 }
3073
3074 static int user_trap_syscall(int nr, unsigned int flags)
3075 {
3076         struct sock_filter filter[] = {
3077                 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3078                         offsetof(struct seccomp_data, nr)),
3079                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3080                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3081                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3082         };
3083
3084         struct sock_fprog prog = {
3085                 .len = (unsigned short)ARRAY_SIZE(filter),
3086                 .filter = filter,
3087         };
3088
3089         return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3090 }
3091
3092 #define USER_NOTIF_MAGIC INT_MAX
3093 TEST(user_notification_basic)
3094 {
3095         pid_t pid;
3096         long ret;
3097         int status, listener;
3098         struct seccomp_notif req = {};
3099         struct seccomp_notif_resp resp = {};
3100         struct pollfd pollfd;
3101
3102         struct sock_filter filter[] = {
3103                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3104         };
3105         struct sock_fprog prog = {
3106                 .len = (unsigned short)ARRAY_SIZE(filter),
3107                 .filter = filter,
3108         };
3109
3110         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3111         ASSERT_EQ(0, ret) {
3112                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3113         }
3114
3115         pid = fork();
3116         ASSERT_GE(pid, 0);
3117
3118         /* Check that we get -ENOSYS with no listener attached */
3119         if (pid == 0) {
3120                 if (user_trap_syscall(__NR_getppid, 0) < 0)
3121                         exit(1);
3122                 ret = syscall(__NR_getppid);
3123                 exit(ret >= 0 || errno != ENOSYS);
3124         }
3125
3126         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3127         EXPECT_EQ(true, WIFEXITED(status));
3128         EXPECT_EQ(0, WEXITSTATUS(status));
3129
3130         /* Add some no-op filters for grins. */
3131         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3132         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3133         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3134         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3135
3136         /* Check that the basic notification machinery works */
3137         listener = user_trap_syscall(__NR_getppid,
3138                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3139         ASSERT_GE(listener, 0);
3140
3141         /* Installing a second listener in the chain should EBUSY */
3142         EXPECT_EQ(user_trap_syscall(__NR_getppid,
3143                                     SECCOMP_FILTER_FLAG_NEW_LISTENER),
3144                   -1);
3145         EXPECT_EQ(errno, EBUSY);
3146
3147         pid = fork();
3148         ASSERT_GE(pid, 0);
3149
3150         if (pid == 0) {
3151                 ret = syscall(__NR_getppid);
3152                 exit(ret != USER_NOTIF_MAGIC);
3153         }
3154
3155         pollfd.fd = listener;
3156         pollfd.events = POLLIN | POLLOUT;
3157
3158         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3159         EXPECT_EQ(pollfd.revents, POLLIN);
3160
3161         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3162
3163         pollfd.fd = listener;
3164         pollfd.events = POLLIN | POLLOUT;
3165
3166         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3167         EXPECT_EQ(pollfd.revents, POLLOUT);
3168
3169         EXPECT_EQ(req.data.nr,  __NR_getppid);
3170
3171         resp.id = req.id;
3172         resp.error = 0;
3173         resp.val = USER_NOTIF_MAGIC;
3174
3175         /* check that we make sure flags == 0 */
3176         resp.flags = 1;
3177         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3178         EXPECT_EQ(errno, EINVAL);
3179
3180         resp.flags = 0;
3181         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3182
3183         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184         EXPECT_EQ(true, WIFEXITED(status));
3185         EXPECT_EQ(0, WEXITSTATUS(status));
3186 }
3187
3188 TEST(user_notification_kill_in_middle)
3189 {
3190         pid_t pid;
3191         long ret;
3192         int listener;
3193         struct seccomp_notif req = {};
3194         struct seccomp_notif_resp resp = {};
3195
3196         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3197         ASSERT_EQ(0, ret) {
3198                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3199         }
3200
3201         listener = user_trap_syscall(__NR_getppid,
3202                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3203         ASSERT_GE(listener, 0);
3204
3205         /*
3206          * Check that nothing bad happens when we kill the task in the middle
3207          * of a syscall.
3208          */
3209         pid = fork();
3210         ASSERT_GE(pid, 0);
3211
3212         if (pid == 0) {
3213                 ret = syscall(__NR_getppid);
3214                 exit(ret != USER_NOTIF_MAGIC);
3215         }
3216
3217         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3218         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3219
3220         EXPECT_EQ(kill(pid, SIGKILL), 0);
3221         EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3222
3223         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3224
3225         resp.id = req.id;
3226         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3227         EXPECT_EQ(ret, -1);
3228         EXPECT_EQ(errno, ENOENT);
3229 }
3230
3231 static int handled = -1;
3232
3233 static void signal_handler(int signal)
3234 {
3235         if (write(handled, "c", 1) != 1)
3236                 perror("write from signal");
3237 }
3238
3239 TEST(user_notification_signal)
3240 {
3241         pid_t pid;
3242         long ret;
3243         int status, listener, sk_pair[2];
3244         struct seccomp_notif req = {};
3245         struct seccomp_notif_resp resp = {};
3246         char c;
3247
3248         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3249         ASSERT_EQ(0, ret) {
3250                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3251         }
3252
3253         ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3254
3255         listener = user_trap_syscall(__NR_gettid,
3256                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3257         ASSERT_GE(listener, 0);
3258
3259         pid = fork();
3260         ASSERT_GE(pid, 0);
3261
3262         if (pid == 0) {
3263                 close(sk_pair[0]);
3264                 handled = sk_pair[1];
3265                 if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3266                         perror("signal");
3267                         exit(1);
3268                 }
3269                 /*
3270                  * ERESTARTSYS behavior is a bit hard to test, because we need
3271                  * to rely on a signal that has not yet been handled. Let's at
3272                  * least check that the error code gets propagated through, and
3273                  * hope that it doesn't break when there is actually a signal :)
3274                  */
3275                 ret = syscall(__NR_gettid);
3276                 exit(!(ret == -1 && errno == 512));
3277         }
3278
3279         close(sk_pair[1]);
3280
3281         memset(&req, 0, sizeof(req));
3282         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3283
3284         EXPECT_EQ(kill(pid, SIGUSR1), 0);
3285
3286         /*
3287          * Make sure the signal really is delivered, which means we're not
3288          * stuck in the user notification code any more and the notification
3289          * should be dead.
3290          */
3291         EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3292
3293         resp.id = req.id;
3294         resp.error = -EPERM;
3295         resp.val = 0;
3296
3297         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3298         EXPECT_EQ(errno, ENOENT);
3299
3300         memset(&req, 0, sizeof(req));
3301         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3302
3303         resp.id = req.id;
3304         resp.error = -512; /* -ERESTARTSYS */
3305         resp.val = 0;
3306
3307         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3308
3309         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3310         EXPECT_EQ(true, WIFEXITED(status));
3311         EXPECT_EQ(0, WEXITSTATUS(status));
3312 }
3313
3314 TEST(user_notification_closed_listener)
3315 {
3316         pid_t pid;
3317         long ret;
3318         int status, listener;
3319
3320         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3321         ASSERT_EQ(0, ret) {
3322                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3323         }
3324
3325         listener = user_trap_syscall(__NR_getppid,
3326                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3327         ASSERT_GE(listener, 0);
3328
3329         /*
3330          * Check that we get an ENOSYS when the listener is closed.
3331          */
3332         pid = fork();
3333         ASSERT_GE(pid, 0);
3334         if (pid == 0) {
3335                 close(listener);
3336                 ret = syscall(__NR_getppid);
3337                 exit(ret != -1 && errno != ENOSYS);
3338         }
3339
3340         close(listener);
3341
3342         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3343         EXPECT_EQ(true, WIFEXITED(status));
3344         EXPECT_EQ(0, WEXITSTATUS(status));
3345 }
3346
3347 /*
3348  * Check that a pid in a child namespace still shows up as valid in ours.
3349  */
3350 TEST(user_notification_child_pid_ns)
3351 {
3352         pid_t pid;
3353         int status, listener;
3354         struct seccomp_notif req = {};
3355         struct seccomp_notif_resp resp = {};
3356
3357         ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3358
3359         listener = user_trap_syscall(__NR_getppid,
3360                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3361         ASSERT_GE(listener, 0);
3362
3363         pid = fork();
3364         ASSERT_GE(pid, 0);
3365
3366         if (pid == 0)
3367                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3368
3369         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3370         EXPECT_EQ(req.pid, pid);
3371
3372         resp.id = req.id;
3373         resp.error = 0;
3374         resp.val = USER_NOTIF_MAGIC;
3375
3376         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3377
3378         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3379         EXPECT_EQ(true, WIFEXITED(status));
3380         EXPECT_EQ(0, WEXITSTATUS(status));
3381         close(listener);
3382 }
3383
3384 /*
3385  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3386  * invalid.
3387  */
3388 TEST(user_notification_sibling_pid_ns)
3389 {
3390         pid_t pid, pid2;
3391         int status, listener;
3392         struct seccomp_notif req = {};
3393         struct seccomp_notif_resp resp = {};
3394
3395         ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3396                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3397         }
3398
3399         listener = user_trap_syscall(__NR_getppid,
3400                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3401         ASSERT_GE(listener, 0);
3402
3403         pid = fork();
3404         ASSERT_GE(pid, 0);
3405
3406         if (pid == 0) {
3407                 ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3408
3409                 pid2 = fork();
3410                 ASSERT_GE(pid2, 0);
3411
3412                 if (pid2 == 0)
3413                         exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3414
3415                 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3416                 EXPECT_EQ(true, WIFEXITED(status));
3417                 EXPECT_EQ(0, WEXITSTATUS(status));
3418                 exit(WEXITSTATUS(status));
3419         }
3420
3421         /* Create the sibling ns, and sibling in it. */
3422         ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3423         ASSERT_EQ(errno, 0);
3424
3425         pid2 = fork();
3426         ASSERT_GE(pid2, 0);
3427
3428         if (pid2 == 0) {
3429                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3430                 /*
3431                  * The pid should be 0, i.e. the task is in some namespace that
3432                  * we can't "see".
3433                  */
3434                 EXPECT_EQ(req.pid, 0);
3435
3436                 resp.id = req.id;
3437                 resp.error = 0;
3438                 resp.val = USER_NOTIF_MAGIC;
3439
3440                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3441                 exit(0);
3442         }
3443
3444         close(listener);
3445
3446         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3447         EXPECT_EQ(true, WIFEXITED(status));
3448         EXPECT_EQ(0, WEXITSTATUS(status));
3449
3450         EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3451         EXPECT_EQ(true, WIFEXITED(status));
3452         EXPECT_EQ(0, WEXITSTATUS(status));
3453 }
3454
3455 TEST(user_notification_fault_recv)
3456 {
3457         pid_t pid;
3458         int status, listener;
3459         struct seccomp_notif req = {};
3460         struct seccomp_notif_resp resp = {};
3461
3462         ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3463
3464         listener = user_trap_syscall(__NR_getppid,
3465                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3466         ASSERT_GE(listener, 0);
3467
3468         pid = fork();
3469         ASSERT_GE(pid, 0);
3470
3471         if (pid == 0)
3472                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3473
3474         /* Do a bad recv() */
3475         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3476         EXPECT_EQ(errno, EFAULT);
3477
3478         /* We should still be able to receive this notification, though. */
3479         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3480         EXPECT_EQ(req.pid, pid);
3481
3482         resp.id = req.id;
3483         resp.error = 0;
3484         resp.val = USER_NOTIF_MAGIC;
3485
3486         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3487
3488         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3489         EXPECT_EQ(true, WIFEXITED(status));
3490         EXPECT_EQ(0, WEXITSTATUS(status));
3491 }
3492
3493 TEST(seccomp_get_notif_sizes)
3494 {
3495         struct seccomp_notif_sizes sizes;
3496
3497         ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3498         EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3499         EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3500 }
3501
3502 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3503 {
3504 #ifdef __NR_kcmp
3505         return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3506 #else
3507         errno = ENOSYS;
3508         return -1;
3509 #endif
3510 }
3511
3512 TEST(user_notification_continue)
3513 {
3514         pid_t pid;
3515         long ret;
3516         int status, listener;
3517         struct seccomp_notif req = {};
3518         struct seccomp_notif_resp resp = {};
3519         struct pollfd pollfd;
3520
3521         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3522         ASSERT_EQ(0, ret) {
3523                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3524         }
3525
3526         listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3527         ASSERT_GE(listener, 0);
3528
3529         pid = fork();
3530         ASSERT_GE(pid, 0);
3531
3532         if (pid == 0) {
3533                 int dup_fd, pipe_fds[2];
3534                 pid_t self;
3535
3536                 ret = pipe(pipe_fds);
3537                 if (ret < 0)
3538                         exit(1);
3539
3540                 dup_fd = dup(pipe_fds[0]);
3541                 if (dup_fd < 0)
3542                         exit(1);
3543
3544                 self = getpid();
3545
3546                 ret = filecmp(self, self, pipe_fds[0], dup_fd);
3547                 if (ret)
3548                         exit(2);
3549
3550                 exit(0);
3551         }
3552
3553         pollfd.fd = listener;
3554         pollfd.events = POLLIN | POLLOUT;
3555
3556         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3557         EXPECT_EQ(pollfd.revents, POLLIN);
3558
3559         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3560
3561         pollfd.fd = listener;
3562         pollfd.events = POLLIN | POLLOUT;
3563
3564         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3565         EXPECT_EQ(pollfd.revents, POLLOUT);
3566
3567         EXPECT_EQ(req.data.nr, __NR_dup);
3568
3569         resp.id = req.id;
3570         resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3571
3572         /*
3573          * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3574          * args be set to 0.
3575          */
3576         resp.error = 0;
3577         resp.val = USER_NOTIF_MAGIC;
3578         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3579         EXPECT_EQ(errno, EINVAL);
3580
3581         resp.error = USER_NOTIF_MAGIC;
3582         resp.val = 0;
3583         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3584         EXPECT_EQ(errno, EINVAL);
3585
3586         resp.error = 0;
3587         resp.val = 0;
3588         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3589                 if (errno == EINVAL)
3590                         XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3591         }
3592
3593 skip:
3594         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3595         EXPECT_EQ(true, WIFEXITED(status));
3596         EXPECT_EQ(0, WEXITSTATUS(status)) {
3597                 if (WEXITSTATUS(status) == 2) {
3598                         XFAIL(return, "Kernel does not support kcmp() syscall");
3599                         return;
3600                 }
3601         }
3602 }
3603
3604 /*
3605  * TODO:
3606  * - add microbenchmarks
3607  * - expand NNP testing
3608  * - better arch-specific TRACE and TRAP handlers.
3609  * - endianness checking when appropriate
3610  * - 64-bit arg prodding
3611  * - arch value testing (x86 modes especially)
3612  * - verify that FILTER_FLAG_LOG filters generate log messages
3613  * - verify that RET_LOG generates log messages
3614  * - ...
3615  */
3616
3617 TEST_HARNESS_MAIN