2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #warning "Sandbox disabled! Please report issues upstream instead of disabling the sandbox."
24 sandbox_server_process(void)
30 sandbox_executor_process(void)
32 log_notice(NULL, "Sandbox disabled! "
33 "Please report issues upstream instead of disabling the sandbox.");
37 sandbox_logger_process(void)
42 #elif defined(__FreeBSD__)
44 #include <sys/capsicum.h>
47 sandbox_server_process(void)
49 if (cap_enter() == -1)
54 sandbox_executor_process(void)
57 * We cannot capsicum the executor process because it needs to
58 * fork(2)+execve(2) cgi scripts
64 sandbox_logger_process(void)
66 if (cap_enter() == -1)
70 #elif defined(__linux__)
72 #include <sys/prctl.h>
73 #include <sys/syscall.h>
74 #include <sys/syscall.h>
75 #include <sys/types.h>
77 #include <linux/audit.h>
78 #include <linux/filter.h>
79 #include <linux/seccomp.h>
88 # include "landlock_shim.h"
91 /* uncomment to enable debugging. ONLY FOR DEVELOPMENT */
92 /* #define SC_DEBUG */
95 # define SC_FAIL SECCOMP_RET_TRAP
97 # define SC_FAIL SECCOMP_RET_KILL
100 #if (BYTE_ORDER == LITTLE_ENDIAN)
102 # define SC_ARG_HI sizeof(uint32_t)
103 #elif (BYTE_ORDER == BIG_ENDIAN)
104 # define SC_ARG_LO sizeof(uint32_t)
107 # error "Uknown endian"
110 /* make the filter more readable */
111 #define SC_ALLOW(nr) \
112 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_##nr, 0, 1), \
113 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW)
116 * SC_ALLOW_ARG and the SECCOMP_AUDIT_ARCH below are courtesy of
117 * https://roy.marples.name/git/dhcpcd/blob/HEAD:/src/privsep-linux.c
119 #define SC_ALLOW_ARG(_nr, _arg, _val) \
120 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, (_nr), 0, 6), \
121 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, \
122 offsetof(struct seccomp_data, args[(_arg)]) + SC_ARG_LO), \
123 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, \
124 ((_val) & 0xffffffff), 0, 3), \
125 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, \
126 offsetof(struct seccomp_data, args[(_arg)]) + SC_ARG_HI), \
127 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, \
128 (((uint32_t)((uint64_t)(_val) >> 32)) & 0xffffffff), 0, 1), \
129 BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW), \
130 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, \
131 offsetof(struct seccomp_data, nr))
134 * I personally find this quite nutty. Why can a system header not
135 * define a default for this?
137 #if defined(__i386__)
138 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_I386
139 #elif defined(__x86_64__)
140 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_X86_64
141 #elif defined(__arc__)
143 # if (BYTE_ORDER == LITTLE_ENDIAN)
144 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARCOMPACT
146 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARCOMPACTBE
148 # elif defined(__HS__)
149 # if (BYTE_ORDER == LITTLE_ENDIAN)
150 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARCV2
152 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARCV2BE
155 # error "Platform does not support seccomp filter yet"
157 #elif defined(__arm__)
161 # if (BYTE_ORDER == LITTLE_ENDIAN)
162 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARM
164 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ARMEB
166 #elif defined(__aarch64__)
167 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_AARCH64
168 #elif defined(__alpha__)
169 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_ALPHA
170 #elif defined(__hppa__)
171 # if defined(__LP64__)
172 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_PARISC64
174 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_PARISC
176 #elif defined(__ia64__)
177 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_IA64
178 #elif defined(__microblaze__)
179 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_MICROBLAZE
180 #elif defined(__m68k__)
181 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_M68K
182 #elif defined(__mips__)
183 # if defined(__MIPSEL__)
184 # if defined(__LP64__)
185 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_MIPSEL64
187 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_MIPSEL
189 # elif defined(__LP64__)
190 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_MIPS64
192 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_MIPS
194 #elif defined(__nds32__)
195 # if (BYTE_ORDER == LITTLE_ENDIAN)
196 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_NDS32
198 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_NDS32BE
200 #elif defined(__nios2__)
201 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_NIOS2
202 #elif defined(__or1k__)
203 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_OPENRISC
204 #elif defined(__powerpc64__)
205 # if (BYTE_ORDER == LITTLE_ENDIAN)
206 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_PPC64LE
208 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_PPC64
210 #elif defined(__powerpc__)
211 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_PPC
212 #elif defined(__riscv)
213 # if defined(__LP64__)
214 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_RISCV64
216 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_RISCV32
218 #elif defined(__s390x__)
219 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_S390X
220 #elif defined(__s390__)
221 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_S390
222 #elif defined(__sh__)
223 # if defined(__LP64__)
224 # if (BYTE_ORDER == LITTLE_ENDIAN)
225 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SHEL64
227 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SH64
230 # if (BYTE_ORDER == LITTLE_ENDIAN)
231 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SHEL
233 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SH
236 #elif defined(__sparc__)
237 # if defined(__arch64__)
238 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SPARC64
240 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_SPARC
242 #elif defined(__xtensa__)
243 # define SECCOMP_AUDIT_ARCH AUDIT_ARCH_XTENSA
245 # error "Platform does not support seccomp filter yet"
248 static struct sock_filter filter[] = {
249 /* load the *current* architecture */
250 BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
251 (offsetof(struct seccomp_data, arch))),
252 /* ensure it's the same that we've been compiled on */
253 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
254 SECCOMP_AUDIT_ARCH, 1, 0),
255 /* if not, kill the program */
256 BPF_STMT(BPF_RET | BPF_K, SC_FAIL),
258 /* load the syscall number */
259 BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
260 (offsetof(struct seccomp_data, nr))),
271 #ifdef __NR_clock_gettime
272 SC_ALLOW(clock_gettime),
274 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
275 SECCOMP_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
277 #ifdef __NR_clock_gettime64
278 SC_ALLOW(clock_gettime64),
283 #ifdef __NR_epoll_ctl
286 #ifdef __NR_epoll_pwait
287 SC_ALLOW(epoll_pwait),
289 #ifdef __NR_epoll_wait
290 SC_ALLOW(epoll_wait),
295 #ifdef __NR_exit_group
296 SC_ALLOW(exit_group),
310 #ifdef __NR_getdents64
311 SC_ALLOW(getdents64),
316 #ifdef __NR_getrandom
319 #ifdef __NR_gettimeofday
320 SC_ALLOW(gettimeofday),
323 /* allow ioctl only on fd 1, glibc doing stuff? */
324 SC_ALLOW_ARG(__NR_ioctl, 0, 1),
341 #ifdef __NR_newfstatat
342 SC_ALLOW(newfstatat),
350 #ifdef __NR_prlimit64
362 #ifdef __NR_rt_sigaction
363 SC_ALLOW(rt_sigaction),
365 #ifdef __NR_rt_sigreturn
366 SC_ALLOW(rt_sigreturn),
381 /* disallow everything else */
382 BPF_STMT(BPF_RET | BPF_K, SC_FAIL),
391 sandbox_seccomp_violation(int signum, siginfo_t *info, void *ctx)
396 fprintf(stderr, "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)\n",
397 __func__, info->si_arch, info->si_syscall, info->si_call_addr);
402 sandbox_seccomp_catch_sigsys(void)
404 struct sigaction act;
407 memset(&act, 0, sizeof(act));
409 sigaddset(&mask, SIGSYS);
411 act.sa_sigaction = &sandbox_seccomp_violation;
412 act.sa_flags = SA_SIGINFO;
413 if (sigaction(SIGSYS, &act, NULL) == -1)
414 fatal("%s: sigaction(SIGSYS): %s",
415 __func__, strerror(errno));
417 if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
418 fatal("%s: sigprocmask(SIGSYS): %s\n",
419 __func__, strerror(errno));
421 #endif /* SC_DEBUG */
425 gmid_create_landlock_rs(struct landlock_ruleset_attr *attr, size_t len,
430 fd = landlock_create_ruleset(attr, len, 0);
434 fatal("%s: failed to create ruleset. "
435 "Landlock doesn't seem to be supported by the "
436 "current kernel.", __func__);
438 log_warn(NULL, "%s: failed to create ruleset. "
439 "Landlock seems to be currently disabled; "
440 "continuing without it.", __func__);
443 fatal("%s: failed to create ruleset: %s",
444 __func__, strerror(errno));
452 server_landlock(void)
459 * These are all the actions that we want to either allow or
460 * disallow. Things like LANDLOCK_ACCESS_FS_EXECUTE are
461 * omitted because are already handled by seccomp.
463 struct landlock_ruleset_attr ruleset_attr = {
464 .handled_access_fs = LANDLOCK_ACCESS_FS_WRITE_FILE |
465 LANDLOCK_ACCESS_FS_READ_FILE |
466 LANDLOCK_ACCESS_FS_READ_DIR |
467 LANDLOCK_ACCESS_FS_MAKE_CHAR |
468 LANDLOCK_ACCESS_FS_MAKE_DIR |
469 LANDLOCK_ACCESS_FS_MAKE_REG |
470 LANDLOCK_ACCESS_FS_MAKE_SOCK |
471 LANDLOCK_ACCESS_FS_MAKE_FIFO |
472 LANDLOCK_ACCESS_FS_MAKE_BLOCK |
473 LANDLOCK_ACCESS_FS_MAKE_SYM,
477 * These are all the actions allowed for the root directories
478 * of the vhosts. All the other rules mentioned in
479 * ruleset_attr and omitted here are implicitly disallowed.
481 struct landlock_path_beneath_attr path_beneath = {
482 .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
483 LANDLOCK_ACCESS_FS_READ_DIR,
486 fd = gmid_create_landlock_rs(&ruleset_attr, sizeof(ruleset_attr), 0);
490 TAILQ_FOREACH(h, &hosts, vhosts) {
491 TAILQ_FOREACH(l, &h->locations, locations) {
495 path_beneath.parent_fd = open(l->dir, O_PATH);
496 if (path_beneath.parent_fd == -1)
497 fatal("%s: can't open %s for landlock: %s",
498 __func__, l->dir, strerror(errno));
500 err = landlock_add_rule(fd, LANDLOCK_RULE_PATH_BENEATH,
503 fatal("%s: landlock_add_rule(%s) failed: %s",
504 __func__, l->dir, strerror(errno));
506 close(path_beneath.parent_fd);
514 logger_landlock(void)
519 * These are all the possible actions. The logger receives
520 * files descriptor so it doesn't need *ANY* fs access. It's
521 * easier to remove FS access than come up with a seccomp
524 struct landlock_ruleset_attr ruleset_attr = {
525 .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE |
526 LANDLOCK_ACCESS_FS_WRITE_FILE |
527 LANDLOCK_ACCESS_FS_READ_FILE |
528 LANDLOCK_ACCESS_FS_READ_DIR |
529 LANDLOCK_ACCESS_FS_REMOVE_DIR |
530 LANDLOCK_ACCESS_FS_REMOVE_FILE |
531 LANDLOCK_ACCESS_FS_MAKE_CHAR |
532 LANDLOCK_ACCESS_FS_MAKE_DIR |
533 LANDLOCK_ACCESS_FS_MAKE_REG |
534 LANDLOCK_ACCESS_FS_MAKE_SOCK |
535 LANDLOCK_ACCESS_FS_MAKE_FIFO |
536 LANDLOCK_ACCESS_FS_MAKE_BLOCK |
537 LANDLOCK_ACCESS_FS_MAKE_SYM,
541 * Disallow every action.
543 struct landlock_path_beneath_attr path_beneath = {
547 fd = gmid_create_landlock_rs(&ruleset_attr, sizeof(ruleset_attr), 0);
551 path_beneath.parent_fd = open("/", O_PATH);
552 if (path_beneath.parent_fd == -1)
553 fatal("%s: can't open / for landlock: %s",
554 __func__, strerror(errno));
556 err = landlock_add_rule(fd, LANDLOCK_RULE_PATH_BENEATH,
559 fatal("%s: landlock_add_rule(/) failed: %s",
560 __func__, strerror(errno));
561 close(path_beneath.parent_fd);
568 sandbox_server_process(void)
571 struct sock_fprog prog = {
572 .len = (unsigned short) (sizeof(filter) / sizeof(filter[0])),
577 sandbox_seccomp_catch_sigsys();
581 log_warn(NULL, "loading landlock...");
582 fd = server_landlock();
584 (void)fd; /* avoid unused var warning */
587 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
588 fatal("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
589 __func__, strerror(errno));
593 if (landlock_restrict_self(fd, 0))
594 fatal("%s: landlock_restrict_self: %s",
595 __func__, strerror(errno));
600 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1)
601 fatal("%s: prctl(PR_SET_SECCOMP): %s\n",
602 __func__, strerror(errno));
606 sandbox_executor_process(void)
609 * We cannot use seccomp for the executor process because we
610 * don't know what the child will do. Also, our filter will
611 * be inherited so the child cannot set its own seccomp
618 sandbox_logger_process(void)
621 * Here we could use a seccomp filter to allow only recvfd,
622 * write/writev and memory allocations, but syslog is a beast
623 * and I don't know what syscalls it could end up doing.
624 * Landlock is a simpler beast, use it to disallow any file
631 if ((fd = logger_landlock()) == -1)
634 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
635 fatal("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
636 __func__, strerror(errno));
638 if (landlock_restrict_self(fd, 0))
639 fatal("%s: landlock_restrict_self: %s"
640 __func__, strerror(errno));
648 #elif defined(__OpenBSD__)
653 sandbox_server_process(void)
658 TAILQ_FOREACH(h, &hosts, vhosts) {
659 TAILQ_FOREACH(l, &h->locations, locations) {
663 if (unveil(l->dir, "r") == -1)
664 fatal("unveil %s for domain %s",
670 if (pledge("stdio recvfd rpath inet", NULL) == -1)
675 sandbox_executor_process(void)
682 TAILQ_FOREACH(h, &hosts, vhosts) {
683 TAILQ_FOREACH(l, &h->locations, locations) {
687 /* r so we can chdir into the directory */
688 if (unveil(l->dir, "rx") == -1)
689 fatal("unveil %s for domain %s",
694 for (i = 0; i < FCGI_MAX; i++) {
696 if (f->path != NULL) {
697 if (unveil(f->path, "rw") == -1)
698 fatal("unveil %s", f->path);
701 if (f->prog != NULL) {
702 if (unveil(f->prog, "rx") == -1)
703 fatal("unveil %s", f->prog);
708 * rpath: to chdir into the correct directory
710 * dns inet unix: FastCGI
712 if (pledge("stdio rpath sendfd proc exec dns inet unix", NULL))
717 sandbox_logger_process(void)
719 if (pledge("stdio recvfd", NULL) == -1)
725 #warning "No sandbox method known for this OS"
728 sandbox_server_process(void)
734 sandbox_executor_process(void)
736 log_notice(NULL, "no sandbox method known for this OS");
740 sandbox_logger_process(void)