diff --git a/arch.h b/arch.h
new file mode 100644
index 0000000..c9ccbe4
--- /dev/null
+++ b/arch.h
@@ -0,0 +1,21 @@
+/* Copyright © 2024 Arista Networks, Inc. All rights reserved.
+ *
+ * Use of this source code is governed by the MIT license that can be found
+ * in the LICENSE file.
+ */
+
+#ifndef ARCH_H_
+# define ARCH_H_
+
+# include "config.h"
+
+# define ARCH_STR_(x) #x
+# define ARCH_STR(x) ARCH_STR_(x)
+
+/* *INDENT-OFF* - formatters try to add spaces here */
+# define ARCH_HEADER_BASE arch/ARCH
+/* *INDENT-ON* */
+
+# include ARCH_STR(ARCH_HEADER_BASE/syscall.h)
+
+#endif /* !ARCH_H_ */
diff --git a/arch/x86/gen-syscall.bash b/arch/x86/gen-syscall.bash
new file mode 100755
index 0000000..3502ed9
--- /dev/null
+++ b/arch/x86/gen-syscall.bash
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+# This script generates the classic BPF program to intercept system calls
+# in x86 userspace.
+
+# From asm/unistd_64.h
+declare -A x86_64_syscalls=(
+	["mknod"]="133"
+	["mknodat"]="259"
+)
+
+# From asm/unistd_32.h
+declare -A i386_syscalls=(
+	["mknod"]="14"
+	["mknodat"]="297"
+)
+
+prelude=(
+	# Check that we're running on x86_64 or i386
+	'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch)))'
+	'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_I386, $(($i386_offset-2)), 0)'
+	'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0)'
+	'BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS)'
+
+	# The x32 ABI (not to be confused with the i386 ABI!) uses the
+	# same system call numbers as x86_64, but set bit 30. Clear it so we share
+	# the same table.
+	'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr)))'
+	'BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, X32_SYSCALL_BIT, 0, 1)'
+	'BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, X32_SYSCALL_BIT)'
+)
+
+syscall_jump=(
+	'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, $nr, 0, 1)'
+	'BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF)'
+)
+
+i386_offset=$((${#prelude[@]} + ${#syscall_jump[@]}*${#x86_64_syscalls[@]} + 1))
+
+# NOTE: indentation is done with tabs. Do not use spaces, do not remove tabs,
+# lest you break all HEREDOCs.
+
+gen_source() {
+	cat <<-EOF
+	/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+	
+	#include <stddef.h>
+	#include <linux/audit.h>
+	#include <linux/bpf_common.h>
+	#include <linux/filter.h>
+	#include <linux/seccomp.h>
+	
+	/* For the x32 ABI, all system call numbers have bit 30 set */
+	#define X32_SYSCALL_BIT 0x40000000
+	
+	const struct sock_filter syscall_filter[] = {
+	EOF
+
+	for stmt in "${prelude[@]}"; do
+		eval "echo $'\t'\"$stmt\","
+	done
+
+	for syscall in "${!x86_64_syscalls[@]}"; do
+		nr=${x86_64_syscalls[$syscall]}
+		for stmt in "${syscall_jump[@]}"; do
+			eval "echo $'\t'\"$stmt\","
+		done
+	done
+
+	echo $'\t''BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),'
+	echo $'\t''BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),'
+
+	for syscall in "${!i386_syscalls[@]}"; do
+		nr=${i386_syscalls[$syscall]}
+		for stmt in "${syscall_jump[@]}"; do
+			eval "echo $'\t'\"$stmt\","
+		done
+	done
+
+	echo $'\t''BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),'
+
+	cat <<-EOF
+	};
+	
+	const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);
+
+	/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+	EOF
+}
+
+gen_header() {
+	cat <<-EOF
+	/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+
+	extern const struct sock_filter syscall_filter[];
+	extern const size_t syscall_filter_length;
+
+	EOF
+
+	for syscall in "${!x86_64_syscalls[@]}"; do
+		echo "#define BST_NR_${syscall} ${x86_64_syscalls[$syscall]}"
+	done
+
+	for syscall in "${!i386_syscalls[@]}"; do
+		echo "#define BST_NR_${syscall}_32 ${i386_syscalls[$syscall]}"
+	done
+
+	max=0
+	for syscall in "${!x86_64_syscalls[@]}"; do
+		(( ${x86_64_syscalls[$syscall]} > max )) && max=${x86_64_syscalls[$syscall]}
+	done
+
+	max32=0
+	for syscall in "${!i386_syscalls[@]}"; do
+		(( ${i386_syscalls[$syscall]} > max32 )) && max32=${i386_syscalls[$syscall]}
+	done
+
+	cat <<-EOF
+
+	#define BST_SECCOMP_32 1
+
+	#define BST_NR_MAX $max
+	#define BST_NR_MAX32 $max32
+
+	/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+	EOF
+}
+
+gen_source > arch/x86/syscall.c
+gen_header > arch/x86/syscall.h
diff --git a/arch/x86/syscall.c b/arch/x86/syscall.c
new file mode 100644
index 0000000..fc426c9
--- /dev/null
+++ b/arch/x86/syscall.c
@@ -0,0 +1,35 @@
+/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+
+#include <stddef.h>
+#include <linux/audit.h>
+#include <linux/bpf_common.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+/* For the x32 ABI, all system call numbers have bit 30 set */
+#define X32_SYSCALL_BIT 0x40000000
+
+const struct sock_filter syscall_filter[] = {
+	BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_I386, 10, 0),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
+	BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+	BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
+	BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, X32_SYSCALL_BIT, 0, 1),
+	BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, X32_SYSCALL_BIT),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 133, 0, 1),
+	BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 259, 0, 1),
+	BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
+	BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+	BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 14, 0, 1),
+	BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 297, 0, 1),
+	BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
+	BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+};
+
+const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);
+
+/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
diff --git a/arch/x86/syscall.h b/arch/x86/syscall.h
new file mode 100644
index 0000000..8ed4914
--- /dev/null
+++ b/arch/x86/syscall.h
@@ -0,0 +1,16 @@
+/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
+
+extern const struct sock_filter syscall_filter[];
+extern const size_t syscall_filter_length;
+
+#define BST_NR_mknod 133
+#define BST_NR_mknodat 259
+#define BST_NR_mknod_32 14
+#define BST_NR_mknodat_32 297
+
+#define BST_SECCOMP_32 1
+
+#define BST_NR_MAX 259
+#define BST_NR_MAX32 297
+
+/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
diff --git a/arch/x86_64 b/arch/x86_64
new file mode 120000
index 0000000..f4bad79
--- /dev/null
+++ b/arch/x86_64
@@ -0,0 +1 @@
+x86
\ No newline at end of file
diff --git a/capable.h b/capable.h
index 92e62e5..7a3d598 100644
--- a/capable.h
+++ b/capable.h
@@ -20,6 +20,7 @@
 # define BST_CAP_SETUID         ((uint64_t) 1 << CAP_SETUID)
 # define BST_CAP_SETGID         ((uint64_t) 1 << CAP_SETGID)
 # define BST_CAP_SYS_CHROOT     ((uint64_t) 1 << CAP_SYS_CHROOT)
+# define BST_CAP_MKNOD          ((uint64_t) 1 << CAP_MKNOD)
 
 extern int deny_new_capabilities;
 
diff --git a/config.h.in b/config.h.in
index 14292b9..c596351 100644
--- a/config.h.in
+++ b/config.h.in
@@ -12,8 +12,13 @@
 # define LIBEXECDIR "@libexecdir@"
 # define VERSION "@version@"
 
+#mesondefine ARCH
+#mesondefine ARCH_X86
+#mesondefine ARCH_X86_64
+
+#mesondefine HAVE_SECCOMP_UNOTIFY
+#mesondefine HAVE_SYSTEMD
 #mesondefine HAVE_SYS_mount_setattr
 #mesondefine HAVE_close_range
-#mesondefine HAVE_SYSTEMD
 
 #endif /* !CONFIG_H_ */
diff --git a/enter.c b/enter.c
index 2c8357e..64e4106 100644
--- a/enter.c
+++ b/enter.c
@@ -27,8 +27,10 @@
 #include "bst_limits.h"
 #include "capable.h"
 #include "compat.h"
+#include "config.h"
 #include "enter.h"
 #include "errutil.h"
+#include "fd.h"
 #include "mount.h"
 #include "net.h"
 #include "ns.h"
@@ -40,6 +42,10 @@
 #include "util.h"
 #include "fd.h"
 
+#ifdef HAVE_SECCOMP_UNOTIFY
+# include "sec.h"
+#endif
+
 static inline size_t append_argv(char **argv, size_t argc, char *arg)
 {
 	if (argc >= ARG_MAX) {
@@ -456,6 +462,14 @@ int enter(struct entry_settings *opts)
 	}
 	ns_enter_postfork(namespaces, ns_len);
 
+#ifdef HAVE_SECCOMP_UNOTIFY
+		int seccomp_fd = sec_seccomp_install_filter();
+		if (seccomp_fd != -1) {
+			send_fd(outer_helper.fd, seccomp_fd);
+			close(seccomp_fd);
+		}
+#endif
+
 	outer_helper_close(&outer_helper);
 
 	int rtnl = init_rtnetlink_socket();
diff --git a/meson.build b/meson.build
index ae645e5..51984f8 100644
--- a/meson.build
+++ b/meson.build
@@ -51,18 +51,26 @@ if get_option('optimization') != '0'
 		language: ['c'])
 endif
 
+arch = host_machine.cpu_family()
+
 config = configuration_data()
 config.set('package', meson.project_name())
 config.set('bindir', bindir)
 config.set('libexecdir', libexecdir)
 config.set('version', version)
 
+config.set('ARCH', arch)
+config.set('ARCH_@0@'.format(arch.to_upper()), 1)
+
 config.set('HAVE_SYS_mount_setattr', cc.has_header_symbol('syscall.h', 'SYS_mount_setattr'))
 config.set('HAVE_close_range', cc.has_function('close_range'))
 
 libdbus = dependency('dbus-1', required: false)
 config.set('HAVE_SYSTEMD', libdbus.found())
 
+has_seccomp_unotify = cc.has_header_symbol('linux/seccomp.h', 'SECCOMP_FILTER_FLAG_NEW_LISTENER')
+config.set('HAVE_SECCOMP_UNOTIFY', has_seccomp_unotify)
+
 configure_file(input: 'config.h.in', output: 'config.h', configuration: config)
 
 bst_init_sources = [
@@ -113,6 +121,14 @@ if libdbus.found()
 	bst_sources += ['cgroup_systemd.c']
 endif
 
+if has_seccomp_unotify
+	bst_sources += [
+		'arch/@0@/syscall.c'.format(arch),
+		'proc.c',
+		'sec.c',
+	]
+endif
+
 executable('bst', bst_sources, install: true, dependencies: [libdbus])
 
 if not get_option('no-setcap-or-suid')
@@ -125,6 +141,7 @@ if not get_option('no-setcap-or-suid')
 			'cap_sys_admin',
 			'cap_sys_chroot',
 			'cap_sys_ptrace',
+			'cap_mknod',
 		],
 		'bst-unpersist': [
 			'cap_sys_admin',
diff --git a/outer.c b/outer.c
index 5dc402a..860f95e 100644
--- a/outer.c
+++ b/outer.c
@@ -23,6 +23,7 @@
 #include "capable.h"
 #include "cgroup.h"
 #include "compat.h"
+#include "config.h"
 #include "enter.h"
 #include "fd.h"
 #include "outer.h"
@@ -31,6 +32,10 @@
 #include "userns.h"
 #include "util.h"
 
+#ifdef HAVE_SECCOMP_UNOTIFY
+# include "sec.h"
+#endif
+
 enum {
 	/* This should be enough for defining our mappings. If we assign
 	   340 mappings, and since each line would contain at most
@@ -404,7 +409,13 @@ void outer_helper_spawn(struct outer_helper *helper)
 	ssize_t count = write(fd, &ok, sizeof (ok));
 	assert((ssize_t)(sizeof (ok)) == count);
 
+#ifdef HAVE_SECCOMP_UNOTIFY
+	int seccomp_fd = recv_fd(fd);
+	sec_seccomp_supervisor(seccomp_fd);
+	__builtin_unreachable();
+#else
 	_exit(0);
+#endif
 }
 
 void outer_helper_sendpid(const struct outer_helper *helper, pid_t pid)
diff --git a/proc.c b/proc.c
new file mode 100644
index 0000000..af7f922
--- /dev/null
+++ b/proc.c
@@ -0,0 +1,31 @@
+/* Copyright © 2022 Arista Networks, Inc. All rights reserved.
+ *
+ * Use of this source code is governed by the MIT license that can be found
+ * in the LICENSE file.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "proc.h"
+
+int proc_read_status(int procfd, struct proc_status *out)
+{
+	memset(out, 0, sizeof (*out));
+
+	int statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC);
+	if (statusfd == -1) {
+		return -1;
+	}
+
+	FILE *f = fdopen(statusfd, "r");
+
+	char line[4096];
+	while (fgets(line, sizeof (line) - 1, f)) {
+		sscanf(line, "Umask:\t%o\n", &out->umask);
+	}
+
+	fclose(f);
+	return 0;
+}
diff --git a/proc.h b/proc.h
new file mode 100644
index 0000000..c204e6b
--- /dev/null
+++ b/proc.h
@@ -0,0 +1,16 @@
+/* Copyright © 2022 Arista Networks, Inc. All rights reserved.
+ *
+ * Use of this source code is governed by the MIT license that can be found
+ * in the LICENSE file.
+ */
+
+#ifndef PROC_H_
+# define PROC_H_
+
+struct proc_status {
+	mode_t umask;
+};
+
+int proc_read_status(int procfd, struct proc_status *out);
+
+#endif /* !PROC_H_ */
diff --git a/sec.c b/sec.c
new file mode 100644
index 0000000..2d394ca
--- /dev/null
+++ b/sec.c
@@ -0,0 +1,477 @@
+/* Copyright © 2024 Arista Networks, Inc. All rights reserved.
+ *
+ * Use of this source code is governed by the MIT license that can be found
+ * in the LICENSE file.
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "arch.h"
+#include "capable.h"
+#include "proc.h"
+#include "sec.h"
+#include "util.h"
+
+typedef int syscall_handler_func(int, int, struct seccomp_notif *);
+
+enum {
+	SYSCALL_HANDLED,
+	SYSCALL_CONTINUE,
+};
+
+static int self_mnt_nsfd(void) {
+
+	static int fd = -1;
+
+	if (fd == -1) {
+		fd = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
+		if (fd == -1) {
+			err(1, "open /proc/self/ns/mnt");
+		}
+	}
+
+	return fd;
+}
+
+static int check_seccomp_cookie(int seccomp_fd, __u64 *id)
+{
+	return ioctl(seccomp_fd, SECCOMP_IOCTL_NOTIF_ID_VALID, id);
+}
+
+static int resolve_dirfd(int procfd, int dirfd)
+{
+	int realdirfd = -1;
+	if (dirfd == AT_FDCWD) {
+		make_capable(BST_CAP_SYS_PTRACE | BST_CAP_DAC_OVERRIDE);
+		realdirfd = openat(procfd, "cwd", O_PATH | O_CLOEXEC);
+		reset_capabilities();
+	} else {
+		char fdpath[PATH_MAX+1];
+		if ((size_t) snprintf(fdpath, PATH_MAX, "fd/%d", dirfd) >= sizeof (fdpath)) {
+			warnx("fd/%d takes more than PATH_MAX bytes.", dirfd);
+			return -EINVAL;
+		}
+
+		make_capable(BST_CAP_SYS_PTRACE | BST_CAP_DAC_OVERRIDE);
+		realdirfd = openat(procfd, fdpath, O_PATH | O_CLOEXEC);
+		reset_capabilities();
+	}
+	if (realdirfd == -1) {
+		warn("open");
+		return -EINVAL;
+	}
+	return realdirfd;
+}
+
+struct arg_buf {
+	uintptr_t addr;
+	size_t size;
+	void *buf;
+};
+
+typedef int runproc_func(int procfd, void *cookie);
+
+static int run_in_process_context(int seccomp_fd, int procfd,
+		struct seccomp_notif *req,
+		struct arg_buf *in,
+		struct arg_buf *out,
+		void *cookie,
+		runproc_func *fn)
+{
+	int rc = 0;
+
+	make_capable(BST_CAP_SYS_PTRACE | BST_CAP_DAC_OVERRIDE);
+
+	int selfmnt = self_mnt_nsfd();
+	int memfd = openat(procfd, "mem", O_RDWR | O_CLOEXEC);
+	int mntns = openat(procfd, "ns/mnt", O_RDONLY | O_CLOEXEC);
+
+	reset_capabilities();
+
+	if (memfd == -1) {
+		warn("open /proc/<pid>/mem");
+		rc = -EINVAL;
+		goto error_close;
+	}
+
+	if (mntns == -1) {
+		warn("open /proc/<pid>/ns/mnt");
+		rc = -EINVAL;
+		goto error_close;
+	}
+
+	for (struct arg_buf *a = in; a && a->addr; a++) {
+		size_t total = 0;
+		while (total < a->size) {
+			ssize_t nread = pread(memfd, a->buf, a->size, a->addr);
+			if (nread == -1) {
+				warn("pread %lx:%zu", a->addr, a->size);
+				rc = -EFAULT;
+				goto error_close;
+			}
+			if (nread == 0) {
+				break;
+			}
+			total += nread;
+		}
+		a->size = total;
+	}
+
+	/* Check again that the process is alive and blocked on the syscall. This
+	   handles cases where the syscall got interrupted by a signal handler
+	   and the program state changed before we read the pathname or other
+	   information from proc. */
+
+	if (check_seccomp_cookie(seccomp_fd, &req->id) == -1) {
+		rc = -errno;
+		goto error_close;
+	}
+
+	make_capable(BST_CAP_SYS_ADMIN | BST_CAP_SYS_CHROOT);
+	int rc2 = setns(mntns, CLONE_NEWNS);
+	reset_capabilities();
+
+	if (rc2 == -1) {
+		warn("setns");
+		rc = -EOPNOTSUPP;
+		goto error;
+	}
+
+	if ((rc = fn(procfd, cookie)) == -1) {
+		goto error;
+	}
+
+	for (struct arg_buf *a = out; a && a->addr; a++) {
+		while (a->size > 0) {
+			ssize_t nwrite = pwrite(memfd, a->buf, a->size, a->addr);
+			if (nwrite == -1) {
+				warn("pwrite %lx:%zu", a->addr, a->size);
+				rc = -EFAULT;
+				goto error;
+			}
+			a->size -= nwrite;
+			a->addr += nwrite;
+		}
+	}
+
+error:
+	make_capable(BST_CAP_SYS_ADMIN | BST_CAP_SYS_CHROOT);
+	rc2 = setns(selfmnt, CLONE_NEWNS);
+	reset_capabilities();
+
+	if (rc2 == -1) {
+		err(1, "setns");
+	}
+
+error_close:
+	close(mntns);
+	close(memfd);
+	return rc;
+}
+
+struct mknodat_args {
+	int dirfd;
+	mode_t mode;
+	dev_t dev;
+	char pathname[PATH_MAX];
+};
+
+static int sec__mknodat_callback(int procfd, void *cookie)
+{
+	struct mknodat_args *args = cookie;
+
+	struct proc_status status;
+	if (proc_read_status(procfd, &status) == -1) {
+		warn("proc_read_status /proc/<pid>/status");
+		return -EINVAL;
+	}
+
+	mode_t old_umask = umask(status.umask);
+
+	make_capable(BST_CAP_MKNOD);
+
+	int rc = 0;
+	if (mknodat(args->dirfd, args->pathname, args->mode, args->dev) == -1) {
+		rc = -errno;
+	}
+
+	reset_capabilities();
+
+	if (old_umask != (mode_t) -1) {
+		umask(old_umask);
+	}
+
+	return rc;
+}
+
+static int sec__mknodat_impl(int seccomp_fd, int procfd,
+		struct seccomp_notif *req,
+		int dirfd,
+		uintptr_t pathnameaddr,
+		mode_t mode,
+		dev_t dev)
+{
+	if ((mode & S_IFCHR) == 0 || (mode & S_IFBLK) == 0) {
+		/* Fallthrough for non-privileged operations -- the caller already
+		   has the rights to do this themselves. */
+		return SYSCALL_CONTINUE;
+	}
+
+	/* Is this one of the safe devices? */
+
+	struct devtype {
+		mode_t type;
+		dev_t  dev;
+	};
+
+	const struct devtype safe_devices[] = {
+		{ .type = S_IFCHR, .dev = makedev(0, 0) }, // whiteout device
+		{ .type = S_IFCHR, .dev = makedev(1, 3) }, // null device
+		{ .type = S_IFCHR, .dev = makedev(1, 5) }, // zero device
+		{ .type = S_IFCHR, .dev = makedev(1, 7) }, // full device
+		{ .type = S_IFCHR, .dev = makedev(1, 8) }, // random device
+		{ .type = S_IFCHR, .dev = makedev(1, 9) }, // urandom device
+		{ .type = S_IFCHR, .dev = makedev(5, 0) }, // tty device
+	};
+
+	for (size_t i = 0; i < lengthof(safe_devices); i++) {
+		if ((mode & S_IFMT) == safe_devices[i].type && dev == safe_devices[i].dev) {
+			goto safe;
+		}
+	}
+	return SYSCALL_CONTINUE;
+
+safe: {}
+	/* The device is safe to create -- perform shenanigans */
+
+	int realdirfd = resolve_dirfd(procfd, dirfd);
+	if (realdirfd < 0) {
+		return realdirfd;
+	}
+
+	struct mknodat_args args = {
+		.dirfd = realdirfd,
+		.dev = dev,
+		.mode = mode,
+	};
+
+	struct arg_buf in[] = {
+		{
+			.addr = pathnameaddr,
+			.buf  = &args.pathname[0],
+			.size = PATH_MAX-1,
+		},
+		{
+			.addr = 0,
+		},
+	};
+
+	int rc = run_in_process_context(seccomp_fd, procfd, req, in, NULL, &args, sec__mknodat_callback);
+
+	close(realdirfd);
+	return rc;
+}
+
+static int sec__mknod(int seccomp_fd, int procfd, struct seccomp_notif *req)
+{
+	uintptr_t pathnameaddr = req->data.args[0];
+	mode_t mode = req->data.args[1];
+	dev_t dev = req->data.args[2];
+
+	return sec__mknodat_impl(seccomp_fd, procfd, req, AT_FDCWD, pathnameaddr, mode, dev);
+}
+
+static int sec__mknodat(int seccomp_fd, int procfd, struct seccomp_notif *req)
+{
+	int dirfd = req->data.args[0];
+	uintptr_t pathnameaddr = req->data.args[1];
+	mode_t mode = req->data.args[2];
+	dev_t dev = req->data.args[3];
+
+	return sec__mknodat_impl(seccomp_fd, procfd, req, dirfd, pathnameaddr, mode, dev);
+}
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	return syscall(__NR_seccomp, op, flags, args);
+}
+
+int sec_seccomp_install_filter(void)
+{
+	struct sock_fprog prog = {
+		.len    = syscall_filter_length,
+		.filter = (struct sock_filter *)syscall_filter,
+	};
+
+	int fd = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_NEW_LISTENER, &prog);
+	if (fd == -1) {
+		if (errno == EBUSY) {
+			// We're likely running bst in bst; ignore the error, and return
+			// a useless file descriptor to pass to the seccomp supervisor
+			return epoll_create1(EPOLL_CLOEXEC);
+		}
+		err(1, "seccomp SECCOMP_SET_MODE_FILTER");
+	}
+	return fd;
+}
+
+static void sec_seccomp_dispatch_syscall(int seccomp_fd,
+		struct seccomp_notif *req,
+		struct seccomp_notif_resp *resp)
+{
+	static syscall_handler_func *const syscall_table[BST_NR_MAX+1] = {
+#ifdef BST_NR_mknod
+		[BST_NR_mknod]   = sec__mknod,
+#endif
+		[BST_NR_mknodat] = sec__mknodat,
+	};
+
+#ifdef BST_SECCOMP_32
+	syscall_handler_func *syscall_table_32[BST_NR_MAX32+1] = {
+#ifdef BST_NR_mknod_32
+		[BST_NR_mknod_32]   = sec__mknod,
+#endif
+		[BST_NR_mknodat_32] = sec__mknodat,
+	};
+#endif
+
+	resp->id = req->id;
+
+	syscall_handler_func *const *table = syscall_table;
+	size_t nr_syscall = lengthof(syscall_table);
+#ifdef ARCH_X86_64
+#ifdef BST_SECCOMP_32
+	if (req->data.arch == AUDIT_ARCH_I386) {
+		table = syscall_table_32;
+		nr_syscall = lengthof(syscall_table_32);
+	}
+#endif
+	if (req->data.arch == AUDIT_ARCH_X86_64) {
+		/* x32 system calls are the same as x86_64, except they have bit 30
+		 * set; we're not making any difference here, so reset it */
+		req->data.nr &= ~0x40000000;
+	}
+#endif
+
+	if (req->data.nr <= 0 || (size_t) req->data.nr >= nr_syscall) {
+		goto fallthrough;
+	}
+	syscall_handler_func *fn = table[(size_t) req->data.nr];
+	if (!fn) {
+		goto fallthrough;
+	}
+
+	char procpath[PATH_MAX+1];
+	if ((size_t) snprintf(procpath, PATH_MAX, "/proc/%d", req->pid) >= sizeof (procpath)) {
+		errx(1, "/proc/%d takes more than PATH_MAX bytes.", req->pid);
+	}
+
+	int procfd = open(procpath, O_PATH | O_DIRECTORY | O_CLOEXEC);
+	if (procfd == -1) {
+		if (errno == ENOENT) {
+			goto fallthrough;
+		}
+		err(1, "open");
+	}
+
+	int rc = fn(seccomp_fd, procfd, req);
+	close(procfd);
+
+	if (rc < 0) {
+		resp->error = rc;
+	} else if (rc == SYSCALL_CONTINUE) {
+		goto fallthrough;
+	}
+
+send:
+	if (ioctl(seccomp_fd, SECCOMP_IOCTL_NOTIF_SEND, resp) == -1) {
+		// ENOENT is normal -- this means the syscall got interrupted by a
+		// signal.
+		if (errno != ENOENT) {
+			warn("ioctl SECCOMP_IOCTL_NOTIF_SEND");
+		}
+	}
+	return;
+
+fallthrough:
+	resp->flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
+	goto send;
+}
+
+noreturn void sec_seccomp_supervisor(int seccomp_fd)
+{
+	/* Run the seccomp supervisor. This supervisor is a privileged helper
+	   that runs safe syscalls on behalf of the unprivileged child in a
+	   user namespace.
+
+	   Use-cases include:
+	   * Allowing mknod on devices deemed "safe", like /dev/null, or the
+	     overlayfs whiteout file.
+	   * Allow devtmpfs mount with our custom bst_devtmpfs logic.
+	
+	   For now, this is intended to be a blocking loop -- if we need other
+	   long-running agents down the line we might need to consider using
+	   an epoll loop or forking these into other processes. */
+
+	struct seccomp_notif_sizes sizes;
+
+	if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) == -1)
+		err(1, "seccomp SECCOMP_GET_NOTIF_SIZES");
+
+	struct seccomp_notif *req = malloc(sizes.seccomp_notif);
+	if (req == NULL)
+		err(1, "malloc");
+
+	/* When allocating the response buffer, we must allow for the fact
+	   that the user-space binary may have been built with user-space
+	   headers where 'struct seccomp_notif_resp' is bigger than the
+	   response buffer expected by the (older) kernel. Therefore, we
+	   allocate a buffer that is the maximum of the two sizes. This
+	   ensures that if the supervisor places bytes into the response
+	   structure that are past the response size that the kernel expects,
+	   then the supervisor is not touching an invalid memory location. */
+
+	size_t resp_size = sizes.seccomp_notif_resp;
+	if (sizeof (struct seccomp_notif_resp) > resp_size)
+		resp_size = sizeof (struct seccomp_notif_resp);
+
+	struct seccomp_notif_resp *resp = malloc(resp_size);
+	if (resp == NULL)
+		err(1, "malloc");
+
+	for (;;) {
+		memset(req,  0, sizes.seccomp_notif);
+		memset(resp, 0, resp_size);
+
+		if (ioctl(seccomp_fd, SECCOMP_IOCTL_NOTIF_RECV, req) == -1) {
+			switch (errno) {
+			case EINTR:
+				continue;
+			case ENOTTY:
+				/* seccomp running in seccomp, which is not supported/needed */
+				_exit(0);
+			}
+			err(1, "ioctl SECCOMP_IOCTL_NOTIF_RECV");
+		}
+
+		sec_seccomp_dispatch_syscall(seccomp_fd, req, resp);
+	}
+}
+
diff --git a/sec.h b/sec.h
new file mode 100644
index 0000000..1da2ce3
--- /dev/null
+++ b/sec.h
@@ -0,0 +1,15 @@
+/* Copyright © 2022 Arista Networks, Inc. All rights reserved.
+ *
+ * Use of this source code is governed by the MIT license that can be found
+ * in the LICENSE file.
+ */
+
+#ifndef SEC_H_
+# define SEC_H_
+
+# include <stdnoreturn.h>
+
+int sec_seccomp_install_filter(void);
+noreturn void sec_seccomp_supervisor(int);
+
+#endif /* !SEC_H_ */