Skip to content

Commit

Permalink
fd: mark fds with CLOEXEC instead of closing them
Browse files Browse the repository at this point in the history
--close-fd would fail to work on some systems when entering spacetimes
where /proc would not be mounted, because it would attempt to iterate
over all file descriptors as listed in /proc/self/fd right before
execve, and of course this cannot work when /proc isn't there.

This fixes the problem by doing the operation earlier, after the setup
script has ran, but before pivoting roots; of course, we can't close the
range outright, since we're likely to use some of these file
descriptors, but we can instead set the CLOEXEC bit on them.
  • Loading branch information
Snaipe committed Oct 27, 2023
1 parent 588d370 commit b65ff75
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
32 changes: 22 additions & 10 deletions compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <dirent.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -56,9 +57,9 @@ int bst_close_range(unsigned int from, unsigned int to, unsigned int flags)
errno = ENOSYS;
#endif

if (rc == -1 && errno == ENOSYS) {
/* The system call is not implemented. Fall back to the good old
fashioned method.
if (rc == -1 && (errno == ENOSYS || errno == EINVAL)) {
/* The system call is not implemented, or it doesn't support
CLOSE_RANGE_CLOEXEC. Fall back to the good old fashioned method.
Note that this isn't particularly efficient. bst_close_range is
itself called in a loop, which means traversing the list of fds
Expand Down Expand Up @@ -88,13 +89,24 @@ int bst_close_range(unsigned int from, unsigned int to, unsigned int flags)
continue;
}

/* Note: close takes a signed int, while close_range takes unsigned
ints. I'm not too sure how negative file descriptors are handled
(and I don't care much to be honest) so I'll just hope that the
system call just reads out an unsigned integer kernel-side. */

if (close((int) fd) == -1) {
err(1, "bst_close_range: close %d", fd);
/* Note: close/fcntl takes a signed int, while close_range takes
unsigned ints. I'm not too sure how negative file descriptors
are handled (and I don't care much to be honest) so I'll just
hope that the system call just reads out an unsigned integer
kernel-side. */

if (flags & BST_CLOSE_RANGE_CLOEXEC) {
int fdflags = fcntl((int) fd, F_GETFD);
if (fdflags == -1) {
err(1, "bst_close_range: fcntl %d F_GETFD", fd);
}
if (fcntl((int) fd, F_SETFD, fdflags | FD_CLOEXEC) == -1) {
err(1, "bst_close_range: fcntl %d F_SETFD", fd);
}
} else {
if (close((int) fd) == -1) {
err(1, "bst_close_range: close %d", fd);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

/* From the kernel headers */
# define BST_CLOSE_RANGE_UNSHARE (1U << 1)
# define BST_CLOSE_RANGE_CLOEXEC (1U << 2)

size_t strlcpy(char *restrict dst, const char *restrict src, size_t size);
unsigned int parse_fd(char *optarg);
Expand Down
11 changes: 6 additions & 5 deletions enter.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,12 @@ int enter(struct entry_settings *opts)
}
}

for (const struct close_range *range = opts->close_fds; range < opts->close_fds + opts->nclose_fds; ++range) {
if (bst_close_range(range->from, range->to, BST_CLOSE_RANGE_CLOEXEC) == -1) {
err(1, "close_range %d %d", range->from, range->to);
}
}

/*
* Only mount a a cgroup hierarchy over sys/fs/cgroup if:
* 1) The user has not specified --no_cgroup_remount
Expand Down Expand Up @@ -807,11 +813,6 @@ int enter(struct entry_settings *opts)
}
const char *init = opts->init + rootlen;

for (const struct close_range *range = opts->close_fds; range < opts->close_fds + opts->nclose_fds; ++range) {
if (bst_close_range(range->from, range->to, BST_CLOSE_RANGE_UNSHARE) == -1) {
err(1, "close_range %d %d", range->from, range->to);
}
}
execve(init, argv, opts->envp);
err(1, "execve %s", init);
}
Expand Down

0 comments on commit b65ff75

Please sign in to comment.