selftests: add seccomp suite
[linux-2.6-microblaze.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <asm/siginfo.h>
9 #define __have_siginfo_t 1
10 #define __have_sigval_t 1
11 #define __have_sigevent_t 1
12
13 #include <errno.h>
14 #include <linux/filter.h>
15 #include <sys/prctl.h>
16 #include <sys/ptrace.h>
17 #include <sys/user.h>
18 #include <linux/prctl.h>
19 #include <linux/ptrace.h>
20 #include <linux/seccomp.h>
21 #include <poll.h>
22 #include <pthread.h>
23 #include <semaphore.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <linux/elf.h>
29 #include <sys/uio.h>
30
31 #define _GNU_SOURCE
32 #include <unistd.h>
33 #include <sys/syscall.h>
34
35 #include "test_harness.h"
36
37 #ifndef PR_SET_PTRACER
38 # define PR_SET_PTRACER 0x59616d61
39 #endif
40
41 #ifndef PR_SET_NO_NEW_PRIVS
42 #define PR_SET_NO_NEW_PRIVS 38
43 #define PR_GET_NO_NEW_PRIVS 39
44 #endif
45
46 #ifndef PR_SECCOMP_EXT
47 #define PR_SECCOMP_EXT 43
48 #endif
49
50 #ifndef SECCOMP_EXT_ACT
51 #define SECCOMP_EXT_ACT 1
52 #endif
53
54 #ifndef SECCOMP_EXT_ACT_TSYNC
55 #define SECCOMP_EXT_ACT_TSYNC 1
56 #endif
57
58 #ifndef SECCOMP_MODE_STRICT
59 #define SECCOMP_MODE_STRICT 1
60 #endif
61
62 #ifndef SECCOMP_MODE_FILTER
63 #define SECCOMP_MODE_FILTER 2
64 #endif
65
66 #ifndef SECCOMP_RET_KILL
67 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
68 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
69 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
70 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
71 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
72
73 /* Masks for the return value sections. */
74 #define SECCOMP_RET_ACTION      0x7fff0000U
75 #define SECCOMP_RET_DATA        0x0000ffffU
76
77 struct seccomp_data {
78         int nr;
79         __u32 arch;
80         __u64 instruction_pointer;
81         __u64 args[6];
82 };
83 #endif
84
85 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
86
87 #define SIBLING_EXIT_UNKILLED   0xbadbeef
88 #define SIBLING_EXIT_FAILURE    0xbadface
89 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
90
91 TEST(mode_strict_support)
92 {
93         long ret;
94
95         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
96         ASSERT_EQ(0, ret) {
97                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
98         }
99         syscall(__NR_exit, 1);
100 }
101
102 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
103 {
104         long ret;
105
106         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
107         ASSERT_EQ(0, ret) {
108                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
109         }
110         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
111                 NULL, NULL, NULL);
112         EXPECT_FALSE(true) {
113                 TH_LOG("Unreachable!");
114         }
115 }
116
117 /* Note! This doesn't test no new privs behavior */
118 TEST(no_new_privs_support)
119 {
120         long ret;
121
122         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
123         EXPECT_EQ(0, ret) {
124                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
125         }
126 }
127
128 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
129 TEST(mode_filter_support)
130 {
131         long ret;
132
133         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
134         ASSERT_EQ(0, ret) {
135                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
136         }
137         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
138         EXPECT_EQ(-1, ret);
139         EXPECT_EQ(EFAULT, errno) {
140                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
141         }
142 }
143
144 TEST(mode_filter_without_nnp)
145 {
146         struct sock_filter filter[] = {
147                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
148         };
149         struct sock_fprog prog = {
150                 .len = (unsigned short)ARRAY_SIZE(filter),
151                 .filter = filter,
152         };
153         long ret;
154
155         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
156         ASSERT_LE(0, ret) {
157                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
158         }
159         errno = 0;
160         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
161         /* Succeeds with CAP_SYS_ADMIN, fails without */
162         /* TODO(wad) check caps not euid */
163         if (geteuid()) {
164                 EXPECT_EQ(-1, ret);
165                 EXPECT_EQ(EACCES, errno);
166         } else {
167                 EXPECT_EQ(0, ret);
168         }
169 }
170
171 #define MAX_INSNS_PER_PATH 32768
172
173 TEST(filter_size_limits)
174 {
175         int i;
176         int count = BPF_MAXINSNS + 1;
177         struct sock_filter allow[] = {
178                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
179         };
180         struct sock_filter *filter;
181         struct sock_fprog prog = { };
182         long ret;
183
184         filter = calloc(count, sizeof(*filter));
185         ASSERT_NE(NULL, filter);
186
187         for (i = 0; i < count; i++)
188                 filter[i] = allow[0];
189
190         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
191         ASSERT_EQ(0, ret);
192
193         prog.filter = filter;
194         prog.len = count;
195
196         /* Too many filter instructions in a single filter. */
197         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
198         ASSERT_NE(0, ret) {
199                 TH_LOG("Installing %d insn filter was allowed", prog.len);
200         }
201
202         /* One less is okay, though. */
203         prog.len -= 1;
204         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
205         ASSERT_EQ(0, ret) {
206                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
207         }
208 }
209
210 TEST(filter_chain_limits)
211 {
212         int i;
213         int count = BPF_MAXINSNS;
214         struct sock_filter allow[] = {
215                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
216         };
217         struct sock_filter *filter;
218         struct sock_fprog prog = { };
219         long ret;
220
221         filter = calloc(count, sizeof(*filter));
222         ASSERT_NE(NULL, filter);
223
224         for (i = 0; i < count; i++)
225                 filter[i] = allow[0];
226
227         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
228         ASSERT_EQ(0, ret);
229
230         prog.filter = filter;
231         prog.len = 1;
232
233         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
234         ASSERT_EQ(0, ret);
235
236         prog.len = count;
237
238         /* Too many total filter instructions. */
239         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
240                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
241                 if (ret != 0)
242                         break;
243         }
244         ASSERT_NE(0, ret) {
245                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
246                        i, count, i * (count + 4));
247         }
248 }
249
250 TEST(mode_filter_cannot_move_to_strict)
251 {
252         struct sock_filter filter[] = {
253                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
254         };
255         struct sock_fprog prog = {
256                 .len = (unsigned short)ARRAY_SIZE(filter),
257                 .filter = filter,
258         };
259         long ret;
260
261         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
262         ASSERT_EQ(0, ret);
263
264         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
265         ASSERT_EQ(0, ret);
266
267         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
268         EXPECT_EQ(-1, ret);
269         EXPECT_EQ(EINVAL, errno);
270 }
271
272
273 TEST(mode_filter_get_seccomp)
274 {
275         struct sock_filter filter[] = {
276                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
277         };
278         struct sock_fprog prog = {
279                 .len = (unsigned short)ARRAY_SIZE(filter),
280                 .filter = filter,
281         };
282         long ret;
283
284         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
285         ASSERT_EQ(0, ret);
286
287         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
288         EXPECT_EQ(0, ret);
289
290         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
291         ASSERT_EQ(0, ret);
292
293         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
294         EXPECT_EQ(2, ret);
295 }
296
297
298 TEST(ALLOW_all)
299 {
300         struct sock_filter filter[] = {
301                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
302         };
303         struct sock_fprog prog = {
304                 .len = (unsigned short)ARRAY_SIZE(filter),
305                 .filter = filter,
306         };
307         long ret;
308
309         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
310         ASSERT_EQ(0, ret);
311
312         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
313         ASSERT_EQ(0, ret);
314 }
315
316 TEST(empty_prog)
317 {
318         struct sock_filter filter[] = {
319         };
320         struct sock_fprog prog = {
321                 .len = (unsigned short)ARRAY_SIZE(filter),
322                 .filter = filter,
323         };
324         long ret;
325
326         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
327         ASSERT_EQ(0, ret);
328
329         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
330         EXPECT_EQ(-1, ret);
331         EXPECT_EQ(EINVAL, errno);
332 }
333
334 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
335 {
336         struct sock_filter filter[] = {
337                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
338         };
339         struct sock_fprog prog = {
340                 .len = (unsigned short)ARRAY_SIZE(filter),
341                 .filter = filter,
342         };
343         long ret;
344
345         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346         ASSERT_EQ(0, ret);
347
348         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349         ASSERT_EQ(0, ret);
350         EXPECT_EQ(0, syscall(__NR_getpid)) {
351                 TH_LOG("getpid() shouldn't ever return");
352         }
353 }
354
355 /* return code >= 0x80000000 is unused. */
356 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
357 {
358         struct sock_filter filter[] = {
359                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
360         };
361         struct sock_fprog prog = {
362                 .len = (unsigned short)ARRAY_SIZE(filter),
363                 .filter = filter,
364         };
365         long ret;
366
367         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
368         ASSERT_EQ(0, ret);
369
370         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
371         ASSERT_EQ(0, ret);
372         EXPECT_EQ(0, syscall(__NR_getpid)) {
373                 TH_LOG("getpid() shouldn't ever return");
374         }
375 }
376
377 TEST_SIGNAL(KILL_all, SIGSYS)
378 {
379         struct sock_filter filter[] = {
380                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
381         };
382         struct sock_fprog prog = {
383                 .len = (unsigned short)ARRAY_SIZE(filter),
384                 .filter = filter,
385         };
386         long ret;
387
388         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
389         ASSERT_EQ(0, ret);
390
391         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
392         ASSERT_EQ(0, ret);
393 }
394
395 TEST_SIGNAL(KILL_one, SIGSYS)
396 {
397         struct sock_filter filter[] = {
398                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
399                         offsetof(struct seccomp_data, nr)),
400                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
401                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
402                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
403         };
404         struct sock_fprog prog = {
405                 .len = (unsigned short)ARRAY_SIZE(filter),
406                 .filter = filter,
407         };
408         long ret;
409         pid_t parent = getppid();
410
411         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
412         ASSERT_EQ(0, ret);
413
414         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
415         ASSERT_EQ(0, ret);
416
417         EXPECT_EQ(parent, syscall(__NR_getppid));
418         /* getpid() should never return. */
419         EXPECT_EQ(0, syscall(__NR_getpid));
420 }
421
422 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
423 {
424         struct sock_filter filter[] = {
425                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
426                         offsetof(struct seccomp_data, nr)),
427                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
428                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
429                 /* Only both with lower 32-bit for now. */
430                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
431                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
432                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
433                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
434         };
435         struct sock_fprog prog = {
436                 .len = (unsigned short)ARRAY_SIZE(filter),
437                 .filter = filter,
438         };
439         long ret;
440         pid_t parent = getppid();
441         pid_t pid = getpid();
442
443         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
444         ASSERT_EQ(0, ret);
445
446         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
447         ASSERT_EQ(0, ret);
448
449         EXPECT_EQ(parent, syscall(__NR_getppid));
450         EXPECT_EQ(pid, syscall(__NR_getpid));
451         /* getpid() should never return. */
452         EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
453 }
454
455 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
456 {
457         struct sock_filter filter[] = {
458                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
459                         offsetof(struct seccomp_data, nr)),
460                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
461                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
462                 /* Only both with lower 32-bit for now. */
463                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
464                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
465                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
466                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
467         };
468         struct sock_fprog prog = {
469                 .len = (unsigned short)ARRAY_SIZE(filter),
470                 .filter = filter,
471         };
472         long ret;
473         pid_t parent = getppid();
474         pid_t pid = getpid();
475
476         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
477         ASSERT_EQ(0, ret);
478
479         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
480         ASSERT_EQ(0, ret);
481
482         EXPECT_EQ(parent, syscall(__NR_getppid));
483         EXPECT_EQ(pid, syscall(__NR_getpid));
484         /* getpid() should never return. */
485         EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
486 }
487
488 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
489 TEST(arg_out_of_range)
490 {
491         struct sock_filter filter[] = {
492                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
493                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494         };
495         struct sock_fprog prog = {
496                 .len = (unsigned short)ARRAY_SIZE(filter),
497                 .filter = filter,
498         };
499         long ret;
500
501         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
502         ASSERT_EQ(0, ret);
503
504         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
505         EXPECT_EQ(-1, ret);
506         EXPECT_EQ(EINVAL, errno);
507 }
508
509 TEST(ERRNO_valid)
510 {
511         struct sock_filter filter[] = {
512                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
513                         offsetof(struct seccomp_data, nr)),
514                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
515                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
516                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
517         };
518         struct sock_fprog prog = {
519                 .len = (unsigned short)ARRAY_SIZE(filter),
520                 .filter = filter,
521         };
522         long ret;
523         pid_t parent = getppid();
524
525         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
526         ASSERT_EQ(0, ret);
527
528         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
529         ASSERT_EQ(0, ret);
530
531         EXPECT_EQ(parent, syscall(__NR_getppid));
532         EXPECT_EQ(-1, read(0, NULL, 0));
533         EXPECT_EQ(E2BIG, errno);
534 }
535
536 TEST(ERRNO_zero)
537 {
538         struct sock_filter filter[] = {
539                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
540                         offsetof(struct seccomp_data, nr)),
541                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
542                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
543                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
544         };
545         struct sock_fprog prog = {
546                 .len = (unsigned short)ARRAY_SIZE(filter),
547                 .filter = filter,
548         };
549         long ret;
550         pid_t parent = getppid();
551
552         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
553         ASSERT_EQ(0, ret);
554
555         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
556         ASSERT_EQ(0, ret);
557
558         EXPECT_EQ(parent, syscall(__NR_getppid));
559         /* "errno" of 0 is ok. */
560         EXPECT_EQ(0, read(0, NULL, 0));
561 }
562
563 TEST(ERRNO_capped)
564 {
565         struct sock_filter filter[] = {
566                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
567                         offsetof(struct seccomp_data, nr)),
568                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
569                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
570                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
571         };
572         struct sock_fprog prog = {
573                 .len = (unsigned short)ARRAY_SIZE(filter),
574                 .filter = filter,
575         };
576         long ret;
577         pid_t parent = getppid();
578
579         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
580         ASSERT_EQ(0, ret);
581
582         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
583         ASSERT_EQ(0, ret);
584
585         EXPECT_EQ(parent, syscall(__NR_getppid));
586         EXPECT_EQ(-1, read(0, NULL, 0));
587         EXPECT_EQ(4095, errno);
588 }
589
590 FIXTURE_DATA(TRAP) {
591         struct sock_fprog prog;
592 };
593
594 FIXTURE_SETUP(TRAP)
595 {
596         struct sock_filter filter[] = {
597                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
598                         offsetof(struct seccomp_data, nr)),
599                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
600                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
601                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
602         };
603
604         memset(&self->prog, 0, sizeof(self->prog));
605         self->prog.filter = malloc(sizeof(filter));
606         ASSERT_NE(NULL, self->prog.filter);
607         memcpy(self->prog.filter, filter, sizeof(filter));
608         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
609 }
610
611 FIXTURE_TEARDOWN(TRAP)
612 {
613         if (self->prog.filter)
614                 free(self->prog.filter);
615 }
616
617 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
618 {
619         long ret;
620
621         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
622         ASSERT_EQ(0, ret);
623
624         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
625         ASSERT_EQ(0, ret);
626         syscall(__NR_getpid);
627 }
628
629 /* Ensure that SIGSYS overrides SIG_IGN */
630 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
631 {
632         long ret;
633
634         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
635         ASSERT_EQ(0, ret);
636
637         signal(SIGSYS, SIG_IGN);
638
639         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
640         ASSERT_EQ(0, ret);
641         syscall(__NR_getpid);
642 }
643
644 static struct siginfo TRAP_info;
645 static volatile int TRAP_nr;
646 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
647 {
648         memcpy(&TRAP_info, info, sizeof(TRAP_info));
649         TRAP_nr = nr;
650 }
651
652 TEST_F(TRAP, handler)
653 {
654         int ret, test;
655         struct sigaction act;
656         sigset_t mask;
657
658         memset(&act, 0, sizeof(act));
659         sigemptyset(&mask);
660         sigaddset(&mask, SIGSYS);
661
662         act.sa_sigaction = &TRAP_action;
663         act.sa_flags = SA_SIGINFO;
664         ret = sigaction(SIGSYS, &act, NULL);
665         ASSERT_EQ(0, ret) {
666                 TH_LOG("sigaction failed");
667         }
668         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
669         ASSERT_EQ(0, ret) {
670                 TH_LOG("sigprocmask failed");
671         }
672
673         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
674         ASSERT_EQ(0, ret);
675         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
676         ASSERT_EQ(0, ret);
677         TRAP_nr = 0;
678         memset(&TRAP_info, 0, sizeof(TRAP_info));
679         /* Expect the registers to be rolled back. (nr = error) may vary
680          * based on arch. */
681         ret = syscall(__NR_getpid);
682         /* Silence gcc warning about volatile. */
683         test = TRAP_nr;
684         EXPECT_EQ(SIGSYS, test);
685         struct local_sigsys {
686                 void *_call_addr;       /* calling user insn */
687                 int _syscall;           /* triggering system call number */
688                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
689         } *sigsys = (struct local_sigsys *)
690 #ifdef si_syscall
691                 &(TRAP_info.si_call_addr);
692 #else
693                 &TRAP_info.si_pid;
694 #endif
695         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
696         /* Make sure arch is non-zero. */
697         EXPECT_NE(0, sigsys->_arch);
698         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
699 }
700
701 FIXTURE_DATA(precedence) {
702         struct sock_fprog allow;
703         struct sock_fprog trace;
704         struct sock_fprog error;
705         struct sock_fprog trap;
706         struct sock_fprog kill;
707 };
708
709 FIXTURE_SETUP(precedence)
710 {
711         struct sock_filter allow_insns[] = {
712                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
713         };
714         struct sock_filter trace_insns[] = {
715                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
716                         offsetof(struct seccomp_data, nr)),
717                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
718                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
719                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
720         };
721         struct sock_filter error_insns[] = {
722                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
723                         offsetof(struct seccomp_data, nr)),
724                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
725                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
726                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
727         };
728         struct sock_filter trap_insns[] = {
729                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
730                         offsetof(struct seccomp_data, nr)),
731                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
732                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
733                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
734         };
735         struct sock_filter kill_insns[] = {
736                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
737                         offsetof(struct seccomp_data, nr)),
738                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
739                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
740                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
741         };
742
743         memset(self, 0, sizeof(*self));
744 #define FILTER_ALLOC(_x) \
745         self->_x.filter = malloc(sizeof(_x##_insns)); \
746         ASSERT_NE(NULL, self->_x.filter); \
747         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
748         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
749         FILTER_ALLOC(allow);
750         FILTER_ALLOC(trace);
751         FILTER_ALLOC(error);
752         FILTER_ALLOC(trap);
753         FILTER_ALLOC(kill);
754 }
755
756 FIXTURE_TEARDOWN(precedence)
757 {
758 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
759         FILTER_FREE(allow);
760         FILTER_FREE(trace);
761         FILTER_FREE(error);
762         FILTER_FREE(trap);
763         FILTER_FREE(kill);
764 }
765
766 TEST_F(precedence, allow_ok)
767 {
768         pid_t parent, res = 0;
769         long ret;
770
771         parent = getppid();
772         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
773         ASSERT_EQ(0, ret);
774
775         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
776         ASSERT_EQ(0, ret);
777         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
778         ASSERT_EQ(0, ret);
779         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
780         ASSERT_EQ(0, ret);
781         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
782         ASSERT_EQ(0, ret);
783         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
784         ASSERT_EQ(0, ret);
785         /* Should work just fine. */
786         res = syscall(__NR_getppid);
787         EXPECT_EQ(parent, res);
788 }
789
790 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
791 {
792         pid_t parent, res = 0;
793         long ret;
794
795         parent = getppid();
796         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
797         ASSERT_EQ(0, ret);
798
799         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
800         ASSERT_EQ(0, ret);
801         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
802         ASSERT_EQ(0, ret);
803         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
804         ASSERT_EQ(0, ret);
805         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
806         ASSERT_EQ(0, ret);
807         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
808         ASSERT_EQ(0, ret);
809         /* Should work just fine. */
810         res = syscall(__NR_getppid);
811         EXPECT_EQ(parent, res);
812         /* getpid() should never return. */
813         res = syscall(__NR_getpid);
814         EXPECT_EQ(0, res);
815 }
816
817 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
818 {
819         pid_t parent;
820         long ret;
821
822         parent = getppid();
823         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
824         ASSERT_EQ(0, ret);
825
826         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
827         ASSERT_EQ(0, ret);
828         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
829         ASSERT_EQ(0, ret);
830         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
831         ASSERT_EQ(0, ret);
832         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
833         ASSERT_EQ(0, ret);
834         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
835         ASSERT_EQ(0, ret);
836         /* Should work just fine. */
837         EXPECT_EQ(parent, syscall(__NR_getppid));
838         /* getpid() should never return. */
839         EXPECT_EQ(0, syscall(__NR_getpid));
840 }
841
842 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
843 {
844         pid_t parent;
845         long ret;
846
847         parent = getppid();
848         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
849         ASSERT_EQ(0, ret);
850
851         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
852         ASSERT_EQ(0, ret);
853         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
854         ASSERT_EQ(0, ret);
855         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
856         ASSERT_EQ(0, ret);
857         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
858         ASSERT_EQ(0, ret);
859         /* Should work just fine. */
860         EXPECT_EQ(parent, syscall(__NR_getppid));
861         /* getpid() should never return. */
862         EXPECT_EQ(0, syscall(__NR_getpid));
863 }
864
865 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
866 {
867         pid_t parent;
868         long ret;
869
870         parent = getppid();
871         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
872         ASSERT_EQ(0, ret);
873
874         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
875         ASSERT_EQ(0, ret);
876         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
877         ASSERT_EQ(0, ret);
878         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
879         ASSERT_EQ(0, ret);
880         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
881         ASSERT_EQ(0, ret);
882         /* Should work just fine. */
883         EXPECT_EQ(parent, syscall(__NR_getppid));
884         /* getpid() should never return. */
885         EXPECT_EQ(0, syscall(__NR_getpid));
886 }
887
888 TEST_F(precedence, errno_is_third)
889 {
890         pid_t parent;
891         long ret;
892
893         parent = getppid();
894         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
895         ASSERT_EQ(0, ret);
896
897         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
898         ASSERT_EQ(0, ret);
899         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
900         ASSERT_EQ(0, ret);
901         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
902         ASSERT_EQ(0, ret);
903         /* Should work just fine. */
904         EXPECT_EQ(parent, syscall(__NR_getppid));
905         EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907
908 TEST_F(precedence, errno_is_third_in_any_order)
909 {
910         pid_t parent;
911         long ret;
912
913         parent = getppid();
914         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915         ASSERT_EQ(0, ret);
916
917         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
918         ASSERT_EQ(0, ret);
919         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
920         ASSERT_EQ(0, ret);
921         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
922         ASSERT_EQ(0, ret);
923         /* Should work just fine. */
924         EXPECT_EQ(parent, syscall(__NR_getppid));
925         EXPECT_EQ(0, syscall(__NR_getpid));
926 }
927
928 TEST_F(precedence, trace_is_fourth)
929 {
930         pid_t parent;
931         long ret;
932
933         parent = getppid();
934         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
935         ASSERT_EQ(0, ret);
936
937         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
938         ASSERT_EQ(0, ret);
939         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
940         ASSERT_EQ(0, ret);
941         /* Should work just fine. */
942         EXPECT_EQ(parent, syscall(__NR_getppid));
943         /* No ptracer */
944         EXPECT_EQ(-1, syscall(__NR_getpid));
945 }
946
947 TEST_F(precedence, trace_is_fourth_in_any_order)
948 {
949         pid_t parent;
950         long ret;
951
952         parent = getppid();
953         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
954         ASSERT_EQ(0, ret);
955
956         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
957         ASSERT_EQ(0, ret);
958         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
959         ASSERT_EQ(0, ret);
960         /* Should work just fine. */
961         EXPECT_EQ(parent, syscall(__NR_getppid));
962         /* No ptracer */
963         EXPECT_EQ(-1, syscall(__NR_getpid));
964 }
965
966 #ifndef PTRACE_O_TRACESECCOMP
967 #define PTRACE_O_TRACESECCOMP   0x00000080
968 #endif
969
970 /* Catch the Ubuntu 12.04 value error. */
971 #if PTRACE_EVENT_SECCOMP != 7
972 #undef PTRACE_EVENT_SECCOMP
973 #endif
974
975 #ifndef PTRACE_EVENT_SECCOMP
976 #define PTRACE_EVENT_SECCOMP 7
977 #endif
978
979 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
980 bool tracer_running;
981 void tracer_stop(int sig)
982 {
983         tracer_running = false;
984 }
985
986 typedef void tracer_func_t(struct __test_metadata *_metadata,
987                            pid_t tracee, int status, void *args);
988
989 void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
990             tracer_func_t tracer_func, void *args)
991 {
992         int ret = -1;
993         struct sigaction action = {
994                 .sa_handler = tracer_stop,
995         };
996
997         /* Allow external shutdown. */
998         tracer_running = true;
999         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1000
1001         errno = 0;
1002         while (ret == -1 && errno != EINVAL)
1003                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1004         ASSERT_EQ(0, ret) {
1005                 kill(tracee, SIGKILL);
1006         }
1007         /* Wait for attach stop */
1008         wait(NULL);
1009
1010         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1011         ASSERT_EQ(0, ret) {
1012                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1013                 kill(tracee, SIGKILL);
1014         }
1015         ptrace(PTRACE_CONT, tracee, NULL, 0);
1016
1017         /* Unblock the tracee */
1018         ASSERT_EQ(1, write(fd, "A", 1));
1019         ASSERT_EQ(0, close(fd));
1020
1021         /* Run until we're shut down. Must assert to stop execution. */
1022         while (tracer_running) {
1023                 int status;
1024
1025                 if (wait(&status) != tracee)
1026                         continue;
1027                 if (WIFSIGNALED(status) || WIFEXITED(status))
1028                         /* Child is dead. Time to go. */
1029                         return;
1030
1031                 /* Make sure this is a seccomp event. */
1032                 ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1033
1034                 tracer_func(_metadata, tracee, status, args);
1035
1036                 ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1037                 ASSERT_EQ(0, ret);
1038         }
1039         /* Directly report the status of our test harness results. */
1040         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1041 }
1042
1043 /* Common tracer setup/teardown functions. */
1044 void cont_handler(int num)
1045 { }
1046 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1047                           tracer_func_t func, void *args)
1048 {
1049         char sync;
1050         int pipefd[2];
1051         pid_t tracer_pid;
1052         pid_t tracee = getpid();
1053
1054         /* Setup a pipe for clean synchronization. */
1055         ASSERT_EQ(0, pipe(pipefd));
1056
1057         /* Fork a child which we'll promote to tracer */
1058         tracer_pid = fork();
1059         ASSERT_LE(0, tracer_pid);
1060         signal(SIGALRM, cont_handler);
1061         if (tracer_pid == 0) {
1062                 close(pipefd[0]);
1063                 tracer(_metadata, pipefd[1], tracee, func, args);
1064                 syscall(__NR_exit, 0);
1065         }
1066         close(pipefd[1]);
1067         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1068         read(pipefd[0], &sync, 1);
1069         close(pipefd[0]);
1070
1071         return tracer_pid;
1072 }
1073 void teardown_trace_fixture(struct __test_metadata *_metadata,
1074                             pid_t tracer)
1075 {
1076         if (tracer) {
1077                 int status;
1078                 /*
1079                  * Extract the exit code from the other process and
1080                  * adopt it for ourselves in case its asserts failed.
1081                  */
1082                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1083                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1084                 if (WEXITSTATUS(status))
1085                         _metadata->passed = 0;
1086         }
1087 }
1088
1089 /* "poke" tracer arguments and function. */
1090 struct tracer_args_poke_t {
1091         unsigned long poke_addr;
1092 };
1093
1094 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1095                  void *args)
1096 {
1097         int ret;
1098         unsigned long msg;
1099         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1100
1101         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1102         EXPECT_EQ(0, ret);
1103         /* If this fails, don't try to recover. */
1104         ASSERT_EQ(0x1001, msg) {
1105                 kill(tracee, SIGKILL);
1106         }
1107         /*
1108          * Poke in the message.
1109          * Registers are not touched to try to keep this relatively arch
1110          * agnostic.
1111          */
1112         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1113         EXPECT_EQ(0, ret);
1114 }
1115
1116 FIXTURE_DATA(TRACE_poke) {
1117         struct sock_fprog prog;
1118         pid_t tracer;
1119         long poked;
1120         struct tracer_args_poke_t tracer_args;
1121 };
1122
1123 FIXTURE_SETUP(TRACE_poke)
1124 {
1125         struct sock_filter filter[] = {
1126                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1127                         offsetof(struct seccomp_data, nr)),
1128                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1129                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1130                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1131         };
1132
1133         self->poked = 0;
1134         memset(&self->prog, 0, sizeof(self->prog));
1135         self->prog.filter = malloc(sizeof(filter));
1136         ASSERT_NE(NULL, self->prog.filter);
1137         memcpy(self->prog.filter, filter, sizeof(filter));
1138         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1139
1140         /* Set up tracer args. */
1141         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1142
1143         /* Launch tracer. */
1144         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1145                                            &self->tracer_args);
1146 }
1147
1148 FIXTURE_TEARDOWN(TRACE_poke)
1149 {
1150         teardown_trace_fixture(_metadata, self->tracer);
1151         if (self->prog.filter)
1152                 free(self->prog.filter);
1153 }
1154
1155 TEST_F(TRACE_poke, read_has_side_effects)
1156 {
1157         ssize_t ret;
1158
1159         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1160         ASSERT_EQ(0, ret);
1161
1162         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1163         ASSERT_EQ(0, ret);
1164
1165         EXPECT_EQ(0, self->poked);
1166         ret = read(-1, NULL, 0);
1167         EXPECT_EQ(-1, ret);
1168         EXPECT_EQ(0x1001, self->poked);
1169 }
1170
1171 TEST_F(TRACE_poke, getpid_runs_normally)
1172 {
1173         long ret;
1174
1175         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1176         ASSERT_EQ(0, ret);
1177
1178         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1179         ASSERT_EQ(0, ret);
1180
1181         EXPECT_EQ(0, self->poked);
1182         EXPECT_NE(0, syscall(__NR_getpid));
1183         EXPECT_EQ(0, self->poked);
1184 }
1185
1186 #if defined(__x86_64__)
1187 # define ARCH_REGS      struct user_regs_struct
1188 # define SYSCALL_NUM    orig_rax
1189 # define SYSCALL_RET    rax
1190 #elif defined(__i386__)
1191 # define ARCH_REGS      struct user_regs_struct
1192 # define SYSCALL_NUM    orig_eax
1193 # define SYSCALL_RET    eax
1194 #elif defined(__arm__)
1195 # define ARCH_REGS      struct pt_regs
1196 # define SYSCALL_NUM    ARM_r7
1197 # define SYSCALL_RET    ARM_r0
1198 #elif defined(__aarch64__)
1199 # define ARCH_REGS      struct user_pt_regs
1200 # define SYSCALL_NUM    regs[8]
1201 # define SYSCALL_RET    regs[0]
1202 #else
1203 # error "Do not know how to find your architecture's registers and syscalls"
1204 #endif
1205
1206 /* Architecture-specific syscall fetching routine. */
1207 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1208 {
1209         struct iovec iov;
1210         ARCH_REGS regs;
1211
1212         iov.iov_base = &regs;
1213         iov.iov_len = sizeof(regs);
1214         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1215                 TH_LOG("PTRACE_GETREGSET failed");
1216                 return -1;
1217         }
1218
1219         return regs.SYSCALL_NUM;
1220 }
1221
1222 /* Architecture-specific syscall changing routine. */
1223 void change_syscall(struct __test_metadata *_metadata,
1224                     pid_t tracee, int syscall)
1225 {
1226         struct iovec iov;
1227         int ret;
1228         ARCH_REGS regs;
1229
1230         iov.iov_base = &regs;
1231         iov.iov_len = sizeof(regs);
1232         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1233         EXPECT_EQ(0, ret);
1234
1235 #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
1236         {
1237                 regs.SYSCALL_NUM = syscall;
1238         }
1239
1240 #elif defined(__arm__)
1241 # ifndef PTRACE_SET_SYSCALL
1242 #  define PTRACE_SET_SYSCALL   23
1243 # endif
1244         {
1245                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1246                 EXPECT_EQ(0, ret);
1247         }
1248
1249 #else
1250         ASSERT_EQ(1, 0) {
1251                 TH_LOG("How is the syscall changed on this architecture?");
1252         }
1253 #endif
1254
1255         /* If syscall is skipped, change return value. */
1256         if (syscall == -1)
1257                 regs.SYSCALL_RET = 1;
1258
1259         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1260         EXPECT_EQ(0, ret);
1261 }
1262
1263 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1264                     int status, void *args)
1265 {
1266         int ret;
1267         unsigned long msg;
1268
1269         /* Make sure we got the right message. */
1270         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1271         EXPECT_EQ(0, ret);
1272
1273         switch (msg) {
1274         case 0x1002:
1275                 /* change getpid to getppid. */
1276                 change_syscall(_metadata, tracee, __NR_getppid);
1277                 break;
1278         case 0x1003:
1279                 /* skip gettid. */
1280                 change_syscall(_metadata, tracee, -1);
1281                 break;
1282         case 0x1004:
1283                 /* do nothing (allow getppid) */
1284                 break;
1285         default:
1286                 EXPECT_EQ(0, msg) {
1287                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1288                         kill(tracee, SIGKILL);
1289                 }
1290         }
1291
1292 }
1293
1294 FIXTURE_DATA(TRACE_syscall) {
1295         struct sock_fprog prog;
1296         pid_t tracer, mytid, mypid, parent;
1297 };
1298
1299 FIXTURE_SETUP(TRACE_syscall)
1300 {
1301         struct sock_filter filter[] = {
1302                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1303                         offsetof(struct seccomp_data, nr)),
1304                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1305                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1306                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1307                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1308                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1309                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1310                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1311         };
1312
1313         memset(&self->prog, 0, sizeof(self->prog));
1314         self->prog.filter = malloc(sizeof(filter));
1315         ASSERT_NE(NULL, self->prog.filter);
1316         memcpy(self->prog.filter, filter, sizeof(filter));
1317         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1318
1319         /* Prepare some testable syscall results. */
1320         self->mytid = syscall(__NR_gettid);
1321         ASSERT_GT(self->mytid, 0);
1322         ASSERT_NE(self->mytid, 1) {
1323                 TH_LOG("Running this test as init is not supported. :)");
1324         }
1325
1326         self->mypid = getpid();
1327         ASSERT_GT(self->mypid, 0);
1328         ASSERT_EQ(self->mytid, self->mypid);
1329
1330         self->parent = getppid();
1331         ASSERT_GT(self->parent, 0);
1332         ASSERT_NE(self->parent, self->mypid);
1333
1334         /* Launch tracer. */
1335         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1336 }
1337
1338 FIXTURE_TEARDOWN(TRACE_syscall)
1339 {
1340         teardown_trace_fixture(_metadata, self->tracer);
1341         if (self->prog.filter)
1342                 free(self->prog.filter);
1343 }
1344
1345 TEST_F(TRACE_syscall, syscall_allowed)
1346 {
1347         long ret;
1348
1349         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1350         ASSERT_EQ(0, ret);
1351
1352         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1353         ASSERT_EQ(0, ret);
1354
1355         /* getppid works as expected (no changes). */
1356         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1357         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1358 }
1359
1360 TEST_F(TRACE_syscall, syscall_redirected)
1361 {
1362         long ret;
1363
1364         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1365         ASSERT_EQ(0, ret);
1366
1367         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1368         ASSERT_EQ(0, ret);
1369
1370         /* getpid has been redirected to getppid as expected. */
1371         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1372         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1373 }
1374
1375 TEST_F(TRACE_syscall, syscall_dropped)
1376 {
1377         long ret;
1378
1379         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1380         ASSERT_EQ(0, ret);
1381
1382         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1383         ASSERT_EQ(0, ret);
1384
1385         /* gettid has been skipped and an altered return value stored. */
1386         EXPECT_EQ(1, syscall(__NR_gettid));
1387         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1388 }
1389
1390 #ifndef __NR_seccomp
1391 # if defined(__i386__)
1392 #  define __NR_seccomp 354
1393 # elif defined(__x86_64__)
1394 #  define __NR_seccomp 317
1395 # elif defined(__arm__)
1396 #  define __NR_seccomp 383
1397 # elif defined(__aarch64__)
1398 #  define __NR_seccomp 277
1399 # else
1400 #  warning "seccomp syscall number unknown for this architecture"
1401 #  define __NR_seccomp 0xffff
1402 # endif
1403 #endif
1404
1405 #ifndef SECCOMP_SET_MODE_STRICT
1406 #define SECCOMP_SET_MODE_STRICT 0
1407 #endif
1408
1409 #ifndef SECCOMP_SET_MODE_FILTER
1410 #define SECCOMP_SET_MODE_FILTER 1
1411 #endif
1412
1413 #ifndef SECCOMP_FLAG_FILTER_TSYNC
1414 #define SECCOMP_FLAG_FILTER_TSYNC 1
1415 #endif
1416
1417 #ifndef seccomp
1418 int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
1419 {
1420         errno = 0;
1421         return syscall(__NR_seccomp, op, flags, filter);
1422 }
1423 #endif
1424
1425 TEST(seccomp_syscall)
1426 {
1427         struct sock_filter filter[] = {
1428                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1429         };
1430         struct sock_fprog prog = {
1431                 .len = (unsigned short)ARRAY_SIZE(filter),
1432                 .filter = filter,
1433         };
1434         long ret;
1435
1436         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1437         ASSERT_EQ(0, ret) {
1438                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1439         }
1440
1441         /* Reject insane operation. */
1442         ret = seccomp(-1, 0, &prog);
1443         EXPECT_EQ(EINVAL, errno) {
1444                 TH_LOG("Did not reject crazy op value!");
1445         }
1446
1447         /* Reject strict with flags or pointer. */
1448         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1449         EXPECT_EQ(EINVAL, errno) {
1450                 TH_LOG("Did not reject mode strict with flags!");
1451         }
1452         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1453         EXPECT_EQ(EINVAL, errno) {
1454                 TH_LOG("Did not reject mode strict with uargs!");
1455         }
1456
1457         /* Reject insane args for filter. */
1458         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1459         EXPECT_EQ(EINVAL, errno) {
1460                 TH_LOG("Did not reject crazy filter flags!");
1461         }
1462         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1463         EXPECT_EQ(EFAULT, errno) {
1464                 TH_LOG("Did not reject NULL filter!");
1465         }
1466
1467         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1468         EXPECT_EQ(0, errno) {
1469                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1470                         strerror(errno));
1471         }
1472 }
1473
1474 TEST(seccomp_syscall_mode_lock)
1475 {
1476         struct sock_filter filter[] = {
1477                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1478         };
1479         struct sock_fprog prog = {
1480                 .len = (unsigned short)ARRAY_SIZE(filter),
1481                 .filter = filter,
1482         };
1483         long ret;
1484
1485         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1486         ASSERT_EQ(0, ret) {
1487                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1488         }
1489
1490         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1491         EXPECT_EQ(0, ret) {
1492                 TH_LOG("Could not install filter!");
1493         }
1494
1495         /* Make sure neither entry point will switch to strict. */
1496         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1497         EXPECT_EQ(EINVAL, errno) {
1498                 TH_LOG("Switched to mode strict!");
1499         }
1500
1501         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1502         EXPECT_EQ(EINVAL, errno) {
1503                 TH_LOG("Switched to mode strict!");
1504         }
1505 }
1506
1507 TEST(TSYNC_first)
1508 {
1509         struct sock_filter filter[] = {
1510                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1511         };
1512         struct sock_fprog prog = {
1513                 .len = (unsigned short)ARRAY_SIZE(filter),
1514                 .filter = filter,
1515         };
1516         long ret;
1517
1518         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1519         ASSERT_EQ(0, ret) {
1520                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1521         }
1522
1523         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1524                       &prog);
1525         EXPECT_EQ(0, ret) {
1526                 TH_LOG("Could not install initial filter with TSYNC!");
1527         }
1528 }
1529
1530 #define TSYNC_SIBLINGS 2
1531 struct tsync_sibling {
1532         pthread_t tid;
1533         pid_t system_tid;
1534         sem_t *started;
1535         pthread_cond_t *cond;
1536         pthread_mutex_t *mutex;
1537         int diverge;
1538         int num_waits;
1539         struct sock_fprog *prog;
1540         struct __test_metadata *metadata;
1541 };
1542
1543 FIXTURE_DATA(TSYNC) {
1544         struct sock_fprog root_prog, apply_prog;
1545         struct tsync_sibling sibling[TSYNC_SIBLINGS];
1546         sem_t started;
1547         pthread_cond_t cond;
1548         pthread_mutex_t mutex;
1549         int sibling_count;
1550 };
1551
1552 FIXTURE_SETUP(TSYNC)
1553 {
1554         struct sock_filter root_filter[] = {
1555                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1556         };
1557         struct sock_filter apply_filter[] = {
1558                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1559                         offsetof(struct seccomp_data, nr)),
1560                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1561                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1562                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1563         };
1564
1565         memset(&self->root_prog, 0, sizeof(self->root_prog));
1566         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1567         memset(&self->sibling, 0, sizeof(self->sibling));
1568         self->root_prog.filter = malloc(sizeof(root_filter));
1569         ASSERT_NE(NULL, self->root_prog.filter);
1570         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1571         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1572
1573         self->apply_prog.filter = malloc(sizeof(apply_filter));
1574         ASSERT_NE(NULL, self->apply_prog.filter);
1575         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1576         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1577
1578         self->sibling_count = 0;
1579         pthread_mutex_init(&self->mutex, NULL);
1580         pthread_cond_init(&self->cond, NULL);
1581         sem_init(&self->started, 0, 0);
1582         self->sibling[0].tid = 0;
1583         self->sibling[0].cond = &self->cond;
1584         self->sibling[0].started = &self->started;
1585         self->sibling[0].mutex = &self->mutex;
1586         self->sibling[0].diverge = 0;
1587         self->sibling[0].num_waits = 1;
1588         self->sibling[0].prog = &self->root_prog;
1589         self->sibling[0].metadata = _metadata;
1590         self->sibling[1].tid = 0;
1591         self->sibling[1].cond = &self->cond;
1592         self->sibling[1].started = &self->started;
1593         self->sibling[1].mutex = &self->mutex;
1594         self->sibling[1].diverge = 0;
1595         self->sibling[1].prog = &self->root_prog;
1596         self->sibling[1].num_waits = 1;
1597         self->sibling[1].metadata = _metadata;
1598 }
1599
1600 FIXTURE_TEARDOWN(TSYNC)
1601 {
1602         int sib = 0;
1603
1604         if (self->root_prog.filter)
1605                 free(self->root_prog.filter);
1606         if (self->apply_prog.filter)
1607                 free(self->apply_prog.filter);
1608
1609         for ( ; sib < self->sibling_count; ++sib) {
1610                 struct tsync_sibling *s = &self->sibling[sib];
1611                 void *status;
1612
1613                 if (!s->tid)
1614                         continue;
1615                 if (pthread_kill(s->tid, 0)) {
1616                         pthread_cancel(s->tid);
1617                         pthread_join(s->tid, &status);
1618                 }
1619         }
1620         pthread_mutex_destroy(&self->mutex);
1621         pthread_cond_destroy(&self->cond);
1622         sem_destroy(&self->started);
1623 }
1624
1625 void *tsync_sibling(void *data)
1626 {
1627         long ret = 0;
1628         struct tsync_sibling *me = data;
1629
1630         me->system_tid = syscall(__NR_gettid);
1631
1632         pthread_mutex_lock(me->mutex);
1633         if (me->diverge) {
1634                 /* Just re-apply the root prog to fork the tree */
1635                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1636                                 me->prog, 0, 0);
1637         }
1638         sem_post(me->started);
1639         /* Return outside of started so parent notices failures. */
1640         if (ret) {
1641                 pthread_mutex_unlock(me->mutex);
1642                 return (void *)SIBLING_EXIT_FAILURE;
1643         }
1644         do {
1645                 pthread_cond_wait(me->cond, me->mutex);
1646                 me->num_waits = me->num_waits - 1;
1647         } while (me->num_waits);
1648         pthread_mutex_unlock(me->mutex);
1649
1650         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1651         if (!ret)
1652                 return (void *)SIBLING_EXIT_NEWPRIVS;
1653         read(0, NULL, 0);
1654         return (void *)SIBLING_EXIT_UNKILLED;
1655 }
1656
1657 void tsync_start_sibling(struct tsync_sibling *sibling)
1658 {
1659         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1660 }
1661
1662 TEST_F(TSYNC, siblings_fail_prctl)
1663 {
1664         long ret;
1665         void *status;
1666         struct sock_filter filter[] = {
1667                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1668                         offsetof(struct seccomp_data, nr)),
1669                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1670                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1671                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1672         };
1673         struct sock_fprog prog = {
1674                 .len = (unsigned short)ARRAY_SIZE(filter),
1675                 .filter = filter,
1676         };
1677
1678         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1679                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1680         }
1681
1682         /* Check prctl failure detection by requesting sib 0 diverge. */
1683         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1684         ASSERT_EQ(0, ret) {
1685                 TH_LOG("setting filter failed");
1686         }
1687
1688         self->sibling[0].diverge = 1;
1689         tsync_start_sibling(&self->sibling[0]);
1690         tsync_start_sibling(&self->sibling[1]);
1691
1692         while (self->sibling_count < TSYNC_SIBLINGS) {
1693                 sem_wait(&self->started);
1694                 self->sibling_count++;
1695         }
1696
1697         /* Signal the threads to clean up*/
1698         pthread_mutex_lock(&self->mutex);
1699         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1700                 TH_LOG("cond broadcast non-zero");
1701         }
1702         pthread_mutex_unlock(&self->mutex);
1703
1704         /* Ensure diverging sibling failed to call prctl. */
1705         pthread_join(self->sibling[0].tid, &status);
1706         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1707         pthread_join(self->sibling[1].tid, &status);
1708         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1709 }
1710
1711 TEST_F(TSYNC, two_siblings_with_ancestor)
1712 {
1713         long ret;
1714         void *status;
1715
1716         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1717                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1718         }
1719
1720         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1721         ASSERT_EQ(0, ret) {
1722                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1723         }
1724         tsync_start_sibling(&self->sibling[0]);
1725         tsync_start_sibling(&self->sibling[1]);
1726
1727         while (self->sibling_count < TSYNC_SIBLINGS) {
1728                 sem_wait(&self->started);
1729                 self->sibling_count++;
1730         }
1731
1732         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1733                       &self->apply_prog);
1734         ASSERT_EQ(0, ret) {
1735                 TH_LOG("Could install filter on all threads!");
1736         }
1737         /* Tell the siblings to test the policy */
1738         pthread_mutex_lock(&self->mutex);
1739         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1740                 TH_LOG("cond broadcast non-zero");
1741         }
1742         pthread_mutex_unlock(&self->mutex);
1743         /* Ensure they are both killed and don't exit cleanly. */
1744         pthread_join(self->sibling[0].tid, &status);
1745         EXPECT_EQ(0x0, (long)status);
1746         pthread_join(self->sibling[1].tid, &status);
1747         EXPECT_EQ(0x0, (long)status);
1748 }
1749
1750 TEST_F(TSYNC, two_sibling_want_nnp)
1751 {
1752         void *status;
1753
1754         /* start siblings before any prctl() operations */
1755         tsync_start_sibling(&self->sibling[0]);
1756         tsync_start_sibling(&self->sibling[1]);
1757         while (self->sibling_count < TSYNC_SIBLINGS) {
1758                 sem_wait(&self->started);
1759                 self->sibling_count++;
1760         }
1761
1762         /* Tell the siblings to test no policy */
1763         pthread_mutex_lock(&self->mutex);
1764         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1765                 TH_LOG("cond broadcast non-zero");
1766         }
1767         pthread_mutex_unlock(&self->mutex);
1768
1769         /* Ensure they are both upset about lacking nnp. */
1770         pthread_join(self->sibling[0].tid, &status);
1771         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1772         pthread_join(self->sibling[1].tid, &status);
1773         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1774 }
1775
1776 TEST_F(TSYNC, two_siblings_with_no_filter)
1777 {
1778         long ret;
1779         void *status;
1780
1781         /* start siblings before any prctl() operations */
1782         tsync_start_sibling(&self->sibling[0]);
1783         tsync_start_sibling(&self->sibling[1]);
1784         while (self->sibling_count < TSYNC_SIBLINGS) {
1785                 sem_wait(&self->started);
1786                 self->sibling_count++;
1787         }
1788
1789         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1790                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1791         }
1792
1793         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1794                       &self->apply_prog);
1795         ASSERT_EQ(0, ret) {
1796                 TH_LOG("Could install filter on all threads!");
1797         }
1798
1799         /* Tell the siblings to test the policy */
1800         pthread_mutex_lock(&self->mutex);
1801         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1802                 TH_LOG("cond broadcast non-zero");
1803         }
1804         pthread_mutex_unlock(&self->mutex);
1805
1806         /* Ensure they are both killed and don't exit cleanly. */
1807         pthread_join(self->sibling[0].tid, &status);
1808         EXPECT_EQ(0x0, (long)status);
1809         pthread_join(self->sibling[1].tid, &status);
1810         EXPECT_EQ(0x0, (long)status);
1811 }
1812
1813 TEST_F(TSYNC, two_siblings_with_one_divergence)
1814 {
1815         long ret;
1816         void *status;
1817
1818         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1819                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1820         }
1821
1822         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1823         ASSERT_EQ(0, ret) {
1824                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1825         }
1826         self->sibling[0].diverge = 1;
1827         tsync_start_sibling(&self->sibling[0]);
1828         tsync_start_sibling(&self->sibling[1]);
1829
1830         while (self->sibling_count < TSYNC_SIBLINGS) {
1831                 sem_wait(&self->started);
1832                 self->sibling_count++;
1833         }
1834
1835         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1836                       &self->apply_prog);
1837         ASSERT_EQ(self->sibling[0].system_tid, ret) {
1838                 TH_LOG("Did not fail on diverged sibling.");
1839         }
1840
1841         /* Wake the threads */
1842         pthread_mutex_lock(&self->mutex);
1843         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1844                 TH_LOG("cond broadcast non-zero");
1845         }
1846         pthread_mutex_unlock(&self->mutex);
1847
1848         /* Ensure they are both unkilled. */
1849         pthread_join(self->sibling[0].tid, &status);
1850         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1851         pthread_join(self->sibling[1].tid, &status);
1852         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1853 }
1854
1855 TEST_F(TSYNC, two_siblings_not_under_filter)
1856 {
1857         long ret, sib;
1858         void *status;
1859
1860         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1861                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1862         }
1863
1864         /*
1865          * Sibling 0 will have its own seccomp policy
1866          * and Sibling 1 will not be under seccomp at
1867          * all. Sibling 1 will enter seccomp and 0
1868          * will cause failure.
1869          */
1870         self->sibling[0].diverge = 1;
1871         tsync_start_sibling(&self->sibling[0]);
1872         tsync_start_sibling(&self->sibling[1]);
1873
1874         while (self->sibling_count < TSYNC_SIBLINGS) {
1875                 sem_wait(&self->started);
1876                 self->sibling_count++;
1877         }
1878
1879         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1880         ASSERT_EQ(0, ret) {
1881                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1882         }
1883
1884         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1885                       &self->apply_prog);
1886         ASSERT_EQ(ret, self->sibling[0].system_tid) {
1887                 TH_LOG("Did not fail on diverged sibling.");
1888         }
1889         sib = 1;
1890         if (ret == self->sibling[0].system_tid)
1891                 sib = 0;
1892
1893         pthread_mutex_lock(&self->mutex);
1894
1895         /* Increment the other siblings num_waits so we can clean up
1896          * the one we just saw.
1897          */
1898         self->sibling[!sib].num_waits += 1;
1899
1900         /* Signal the thread to clean up*/
1901         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1902                 TH_LOG("cond broadcast non-zero");
1903         }
1904         pthread_mutex_unlock(&self->mutex);
1905         pthread_join(self->sibling[sib].tid, &status);
1906         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1907         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1908         while (!kill(self->sibling[sib].system_tid, 0))
1909                 sleep(0.1);
1910         /* Switch to the remaining sibling */
1911         sib = !sib;
1912
1913         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1914                       &self->apply_prog);
1915         ASSERT_EQ(0, ret) {
1916                 TH_LOG("Expected the remaining sibling to sync");
1917         };
1918
1919         pthread_mutex_lock(&self->mutex);
1920
1921         /* If remaining sibling didn't have a chance to wake up during
1922          * the first broadcast, manually reduce the num_waits now.
1923          */
1924         if (self->sibling[sib].num_waits > 1)
1925                 self->sibling[sib].num_waits = 1;
1926         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1927                 TH_LOG("cond broadcast non-zero");
1928         }
1929         pthread_mutex_unlock(&self->mutex);
1930         pthread_join(self->sibling[sib].tid, &status);
1931         EXPECT_EQ(0, (long)status);
1932         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1933         while (!kill(self->sibling[sib].system_tid, 0))
1934                 sleep(0.1);
1935
1936         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1937                       &self->apply_prog);
1938         ASSERT_EQ(0, ret);  /* just us chickens */
1939 }
1940
1941 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
1942 TEST(syscall_restart)
1943 {
1944         long ret;
1945         unsigned long msg;
1946         pid_t child_pid;
1947         int pipefd[2];
1948         int status;
1949         siginfo_t info = { };
1950         struct sock_filter filter[] = {
1951                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1952                          offsetof(struct seccomp_data, nr)),
1953
1954 #ifdef __NR_sigreturn
1955                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
1956 #endif
1957                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
1958                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
1959                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
1960                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
1961                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
1962
1963                 /* Allow __NR_write for easy logging. */
1964                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
1965                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1966                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1967                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
1968                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
1969         };
1970         struct sock_fprog prog = {
1971                 .len = (unsigned short)ARRAY_SIZE(filter),
1972                 .filter = filter,
1973         };
1974
1975         ASSERT_EQ(0, pipe(pipefd));
1976
1977         child_pid = fork();
1978         ASSERT_LE(0, child_pid);
1979         if (child_pid == 0) {
1980                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
1981                 char buf = ' ';
1982                 struct pollfd fds = {
1983                         .fd = pipefd[0],
1984                         .events = POLLIN,
1985                 };
1986
1987                 /* Attach parent as tracer and stop. */
1988                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
1989                 EXPECT_EQ(0, raise(SIGSTOP));
1990
1991                 EXPECT_EQ(0, close(pipefd[1]));
1992
1993                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1994                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1995                 }
1996
1997                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1998                 EXPECT_EQ(0, ret) {
1999                         TH_LOG("Failed to install filter!");
2000                 }
2001
2002                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2003                         TH_LOG("Failed to read() sync from parent");
2004                 }
2005                 EXPECT_EQ('.', buf) {
2006                         TH_LOG("Failed to get sync data from read()");
2007                 }
2008
2009                 /* Start poll to be interrupted. */
2010                 errno = 0;
2011                 EXPECT_EQ(1, poll(&fds, 1, -1)) {
2012                         TH_LOG("Call to poll() failed (errno %d)", errno);
2013                 }
2014
2015                 /* Read final sync from parent. */
2016                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2017                         TH_LOG("Failed final read() from parent");
2018                 }
2019                 EXPECT_EQ('!', buf) {
2020                         TH_LOG("Failed to get final data from read()");
2021                 }
2022
2023                 /* Directly report the status of our test harness results. */
2024                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2025                                                      : EXIT_FAILURE);
2026         }
2027         EXPECT_EQ(0, close(pipefd[0]));
2028
2029         /* Attach to child, setup options, and release. */
2030         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2031         ASSERT_EQ(true, WIFSTOPPED(status));
2032         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2033                             PTRACE_O_TRACESECCOMP));
2034         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2035         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2036
2037         /* Wait for poll() to start. */
2038         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2039         ASSERT_EQ(true, WIFSTOPPED(status));
2040         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2041         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2042         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2043         ASSERT_EQ(0x100, msg);
2044         EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
2045
2046         /* Might as well check siginfo for sanity while we're here. */
2047         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2048         ASSERT_EQ(SIGTRAP, info.si_signo);
2049         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2050         EXPECT_EQ(0, info.si_errno);
2051         EXPECT_EQ(getuid(), info.si_uid);
2052         /* Verify signal delivery came from child (seccomp-triggered). */
2053         EXPECT_EQ(child_pid, info.si_pid);
2054
2055         /* Interrupt poll with SIGSTOP (which we'll need to handle). */
2056         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2057         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2058         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2059         ASSERT_EQ(true, WIFSTOPPED(status));
2060         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2061         /* Verify signal delivery came from parent now. */
2062         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2063         EXPECT_EQ(getpid(), info.si_pid);
2064
2065         /* Restart poll with SIGCONT, which triggers restart_syscall. */
2066         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2067         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2068         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2069         ASSERT_EQ(true, WIFSTOPPED(status));
2070         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2071         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2072
2073         /* Wait for restart_syscall() to start. */
2074         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2075         ASSERT_EQ(true, WIFSTOPPED(status));
2076         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2077         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2078         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2079         ASSERT_EQ(0x200, msg);
2080         ret = get_syscall(_metadata, child_pid);
2081 #if defined(__arm__)
2082         /* FIXME: ARM does not expose true syscall in registers. */
2083         EXPECT_EQ(__NR_poll, ret);
2084 #else
2085         EXPECT_EQ(__NR_restart_syscall, ret);
2086 #endif
2087
2088         /* Write again to end poll. */
2089         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2090         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2091         EXPECT_EQ(0, close(pipefd[1]));
2092
2093         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2094         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2095                 _metadata->passed = 0;
2096 }
2097
2098 /*
2099  * TODO:
2100  * - add microbenchmarks
2101  * - expand NNP testing
2102  * - better arch-specific TRACE and TRAP handlers.
2103  * - endianness checking when appropriate
2104  * - 64-bit arg prodding
2105  * - arch value testing (x86 modes especially)
2106  * - ...
2107  */
2108
2109 TEST_HARNESS_MAIN