From b8b21a10b3e4308805d30a0a9a9eb3e9ce186972 Mon Sep 17 00:00:00 2001 From: Markus Bauer Date: Sat, 1 Mar 2025 14:39:05 +0100 Subject: [PATCH 1/3] Add fakeroot - combined mount and chroot. (cherry picked from commit ad2587b553f3f448e78fbebea4375d6fb336dfd8) --- examples/fakeroot.rs | 24 ++++++++ src/child.rs | 7 +++ src/config.rs | 7 +++ src/fakeroot.rs | 140 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 5 files changed, 179 insertions(+) create mode 100644 examples/fakeroot.rs create mode 100644 src/fakeroot.rs diff --git a/examples/fakeroot.rs b/examples/fakeroot.rs new file mode 100644 index 000000000..c41c002cf --- /dev/null +++ b/examples/fakeroot.rs @@ -0,0 +1,24 @@ +extern crate unshare; + +use std::process::exit; + + +fn main() { + let mut cmd = unshare::Command::new("/usr/bin/ls"); + cmd.arg("-l"); + cmd.arg("/"); + + cmd.fakeroot_enable("/dev/shm/sandbox_root"); + cmd.fakeroot_mount("/bin", "/bin", true); + cmd.fakeroot_mount("/etc", "/etc", true); + cmd.fakeroot_mount("/lib", "/lib", true); + cmd.fakeroot_mount("/lib64", "/lib64", true); + cmd.fakeroot_mount("/usr", "/usr", true); + cmd.current_dir("/"); + + match cmd.status().unwrap() { + // propagate signal + unshare::ExitStatus::Exited(x) => exit(x as i32), + unshare::ExitStatus::Signaled(x, _) => exit((128+x as i32) as i32), + } +} diff --git a/src/child.rs b/src/child.rs index 5f30c3680..707b81fac 100644 --- a/src/child.rs +++ b/src/child.rs @@ -11,6 +11,7 @@ use libc::{SIG_DFL, SIG_SETMASK}; use crate::run::{ChildInfo, MAX_PID_LEN}; use crate::error::ErrorCode as Err; +use crate::fakeroot::build_fakeroot; // And at this point we've reached a special time in the life of the // child. The child must now be considered hamstrung and unable to @@ -143,6 +144,12 @@ pub unsafe fn child_after_clone(child: &ChildInfo) -> ! { } }); + child.cfg.fake_root_base.as_ref().map(|base| { + if !build_fakeroot(base, child.cfg.fake_root_proc.as_ref(), child.cfg.fake_root_mounts.as_ref()) { + fail(Err::ChangeRoot, epipe); + } + }); + child.keep_caps.as_ref().map(|caps| { let header = ffi::CapsHeader { version: ffi::CAPS_V3, diff --git a/src/config.rs b/src/config.rs index ea4fdeebe..ba83ed5b8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,6 +6,7 @@ use nix::sys::signal::{Signal, SIGKILL}; use nix::sched::CloneFlags; use libc::{uid_t, gid_t}; +use crate::fakeroot::{FakeRootMount}; use crate::idmap::{UidMap, GidMap}; use crate::namespace::Namespace; use crate::stdio::Closing; @@ -23,6 +24,9 @@ pub struct Config { pub restore_sigmask: bool, pub make_group_leader: bool, // TODO(tailhook) session leader + pub fake_root_base: Option, + pub fake_root_proc: Option, + pub fake_root_mounts: Vec, } impl Default for Config { @@ -38,6 +42,9 @@ impl Default for Config { setns_namespaces: HashMap::new(), restore_sigmask: true, make_group_leader: false, + fake_root_base: None, + fake_root_mounts: Vec::new(), + fake_root_proc: None, } } } diff --git a/src/fakeroot.rs b/src/fakeroot.rs new file mode 100644 index 000000000..58fb16490 --- /dev/null +++ b/src/fakeroot.rs @@ -0,0 +1,140 @@ +use crate::ffi_util::ToCString; +use crate::{Command, Namespace}; +use libc::{MNT_DETACH, MS_BIND, MS_PRIVATE, MS_RDONLY, MS_REC, MS_REMOUNT}; +use std::ffi::{c_char, c_void, CString}; +use std::path::Path; + +pub struct FakeRootMount { + mountpoint: CString, + mountpoint_outer: CString, + src: CString, + readonly: bool, +} + +impl Command { + /// Enable "fakeroot" - the command will be rooted in a custom root directory. + /// + /// By default, the root directory is empty, share necessary directories with fakeroot_mount(). + /// This will automatically unshare the mount namespace. + /// It might be necessary to also unshare the user namespace. + /// + /// The "base" directory must be an empty directory, preferably on a tmpfs. + /// The directory will be created if missing. + /// "/dev/shm/unshare_root" should work fine, or "/run/user//unshare_root". + /// + /// Do NOT combine with manual pivot_root/chroot, fakeroot will take care of it. + pub fn fakeroot_enable(&mut self, base: &str) { + self.unshare(&[Namespace::Mount]); + self.config.fake_root_base = Some(base.to_cstring()); + self.config.fake_root_proc = Some(format!("{}/proc", base).to_cstring()); + } + + /// Add an existing directory to the fakeroot. + /// + /// fakeroot_enable() must be called first, otherwise this function will panic. + /// + /// Example usage: + /// cmd.fakeroot_mount("/bin", "/bin", true); + /// cmd.fakeroot_mount("/etc", "/etc", true); + /// cmd.fakeroot_mount("/lib", "/lib", true); + /// cmd.fakeroot_mount("/lib64", "/lib64", true); + /// cmd.fakeroot_mount("/usr", "/usr", true); + pub fn fakeroot_mount>(&mut self, src: P, dst: &str, readonly: bool) { + let base = self + .config + .fake_root_base + .as_ref() + .expect("call fakeroot_enable() first!") + .to_str() + .unwrap(); + self.config.fake_root_mounts.push(FakeRootMount { + mountpoint: dst.to_cstring(), + mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), + src: src.as_ref().to_cstring(), + readonly, + }); + } +} + +/// This syscall sequence is more or less taken from nsjail (https://github.com/google/nsjail). +pub(crate) unsafe fn build_fakeroot( + base: &CString, + proc: Option<&CString>, + mountpoints: &[FakeRootMount], +) -> bool { + // define some libc constants + let null_char = 0 as *const c_char; + let null_void = 0 as *const c_void; + let slash = b"/\0".as_ptr() as *const c_char; + let dot = b".\0".as_ptr() as *const c_char; + let tmpfs = b"tmpfs\0".as_ptr() as *const c_char; + let procfs = b"proc\0".as_ptr() as *const c_char; + + // keep all mount changes private + libc::mkdir(base.as_ptr(), 0o777); + if libc::mount(slash, slash, null_char, MS_PRIVATE | MS_REC, null_void) < 0 { + return false; + } + + // create fakeroot filesystem + if libc::mount(null_char, base.as_ptr(), tmpfs, 0, null_void) < 0 { + return false; + } + + // mount directories - still read-write (because MS_BIND + MS_RDONLY are not supported) + for mount in mountpoints { + libc::mkdir(mount.mountpoint_outer.as_ptr(), 0o777); + if libc::mount( + mount.src.as_ptr(), + mount.mountpoint_outer.as_ptr(), + null_char, + MS_PRIVATE | MS_REC | MS_BIND, + null_void, + ) < 0 + { + return false; + } + } + + // mount new "/proc" (if available, for example: not in docker). No error if failing. + proc.map(|proc| { + libc::mkdir(proc.as_ptr(), 0o777); + libc::mount(null_char, proc.as_ptr(), procfs, 0, null_void); + }); + + // chroot jail (try pivot_root first, use classic chroot if not available) + if libc::syscall(libc::SYS_pivot_root, base.as_ptr(), base.as_ptr()) >= 0 { + libc::umount2(slash, MNT_DETACH); + } else { + libc::chdir(base.as_ptr()); + libc::mount(dot, slash, null_char, 0, null_void); + if libc::chroot(dot) < 0 { + return false; + } + } + + // make directories actually read-only + libc::mount( + slash, + slash, + null_char, + MS_REMOUNT | MS_BIND | MS_RDONLY, + null_void, + ); + for mount in mountpoints { + if mount.readonly { + if libc::mount( + mount.mountpoint.as_ptr(), + mount.mountpoint.as_ptr(), + null_char, + MS_REMOUNT | MS_BIND | MS_RDONLY, + null_void, + ) < 0 + { + return false; + } + } + } + + true +} diff --git a/src/lib.rs b/src/lib.rs index 06f395c0c..f6b7e07b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,6 +50,7 @@ mod wait; mod stdio; mod debug; mod zombies; +mod fakeroot; pub use crate::error::Error; pub use crate::status::ExitStatus; From 2b13c29d1d1570dfc122b8782bcb458fceced1d0 Mon Sep 17 00:00:00 2001 From: Markus Bauer Date: Sat, 1 Mar 2025 16:47:13 +0100 Subject: [PATCH 2/3] Add tmpfs support to fakeroot (cherry picked from commit 07b42cf59d17319f3afa6df92e1ac28211f7a428) --- examples/fakeroot.rs | 2 ++ src/child.rs | 2 +- src/config.rs | 2 -- src/fakeroot.rs | 50 ++++++++++++++++++++++++++++++-------------- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/examples/fakeroot.rs b/examples/fakeroot.rs index c41c002cf..324bf5fac 100644 --- a/examples/fakeroot.rs +++ b/examples/fakeroot.rs @@ -13,6 +13,8 @@ fn main() { cmd.fakeroot_mount("/etc", "/etc", true); cmd.fakeroot_mount("/lib", "/lib", true); cmd.fakeroot_mount("/lib64", "/lib64", true); + cmd.fakeroot_filesystem("proc", "/proc"); + cmd.fakeroot_filesystem("tmpfs", "/tmp"); cmd.fakeroot_mount("/usr", "/usr", true); cmd.current_dir("/"); diff --git a/src/child.rs b/src/child.rs index 707b81fac..6f4b86a58 100644 --- a/src/child.rs +++ b/src/child.rs @@ -145,7 +145,7 @@ pub unsafe fn child_after_clone(child: &ChildInfo) -> ! { }); child.cfg.fake_root_base.as_ref().map(|base| { - if !build_fakeroot(base, child.cfg.fake_root_proc.as_ref(), child.cfg.fake_root_mounts.as_ref()) { + if !build_fakeroot(base, child.cfg.fake_root_mounts.as_ref()) { fail(Err::ChangeRoot, epipe); } }); diff --git a/src/config.rs b/src/config.rs index ba83ed5b8..8ef9dac23 100644 --- a/src/config.rs +++ b/src/config.rs @@ -25,7 +25,6 @@ pub struct Config { pub make_group_leader: bool, // TODO(tailhook) session leader pub fake_root_base: Option, - pub fake_root_proc: Option, pub fake_root_mounts: Vec, } @@ -44,7 +43,6 @@ impl Default for Config { make_group_leader: false, fake_root_base: None, fake_root_mounts: Vec::new(), - fake_root_proc: None, } } } diff --git a/src/fakeroot.rs b/src/fakeroot.rs index 58fb16490..2ae2be712 100644 --- a/src/fakeroot.rs +++ b/src/fakeroot.rs @@ -9,6 +9,7 @@ pub struct FakeRootMount { mountpoint_outer: CString, src: CString, readonly: bool, + is_special_fs: bool, // "src" is a filesystem type like "proc" or "tmpfs" } impl Command { @@ -26,7 +27,6 @@ impl Command { pub fn fakeroot_enable(&mut self, base: &str) { self.unshare(&[Namespace::Mount]); self.config.fake_root_base = Some(base.to_cstring()); - self.config.fake_root_proc = Some(format!("{}/proc", base).to_cstring()); } /// Add an existing directory to the fakeroot. @@ -52,23 +52,42 @@ impl Command { mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), src: src.as_ref().to_cstring(), readonly, + is_special_fs: false, + }); + } + + /// Add a new filesystem to the fakeroot. + /// + /// fakeroot_enable() must be called first, otherwise this function will panic. + /// + /// Example usage: + /// cmd.fakeroot_filesystem("tmpfs", "/tmp"); + pub fn fakeroot_filesystem(&mut self, fstype: &str, dst: &str) { + let base = self + .config + .fake_root_base + .as_ref() + .expect("call fakeroot_enable() first!") + .to_str() + .unwrap(); + self.config.fake_root_mounts.push(FakeRootMount { + mountpoint: dst.to_cstring(), + mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), + src: fstype.to_cstring(), + readonly: false, + is_special_fs: true, }); } } /// This syscall sequence is more or less taken from nsjail (https://github.com/google/nsjail). -pub(crate) unsafe fn build_fakeroot( - base: &CString, - proc: Option<&CString>, - mountpoints: &[FakeRootMount], -) -> bool { +pub(crate) unsafe fn build_fakeroot(base: &CString, mountpoints: &[FakeRootMount]) -> bool { // define some libc constants let null_char = 0 as *const c_char; let null_void = 0 as *const c_void; let slash = b"/\0".as_ptr() as *const c_char; let dot = b".\0".as_ptr() as *const c_char; let tmpfs = b"tmpfs\0".as_ptr() as *const c_char; - let procfs = b"proc\0".as_ptr() as *const c_char; // keep all mount changes private libc::mkdir(base.as_ptr(), 0o777); @@ -84,11 +103,16 @@ pub(crate) unsafe fn build_fakeroot( // mount directories - still read-write (because MS_BIND + MS_RDONLY are not supported) for mount in mountpoints { libc::mkdir(mount.mountpoint_outer.as_ptr(), 0o777); + let (src, fstype, flags) = if mount.is_special_fs { + (null_char, mount.src.as_ptr(), 0) + } else { + (mount.src.as_ptr(), null_char, MS_PRIVATE | MS_REC | MS_BIND) + }; if libc::mount( - mount.src.as_ptr(), + src, mount.mountpoint_outer.as_ptr(), - null_char, - MS_PRIVATE | MS_REC | MS_BIND, + fstype, + flags, null_void, ) < 0 { @@ -96,12 +120,6 @@ pub(crate) unsafe fn build_fakeroot( } } - // mount new "/proc" (if available, for example: not in docker). No error if failing. - proc.map(|proc| { - libc::mkdir(proc.as_ptr(), 0o777); - libc::mount(null_char, proc.as_ptr(), procfs, 0, null_void); - }); - // chroot jail (try pivot_root first, use classic chroot if not available) if libc::syscall(libc::SYS_pivot_root, base.as_ptr(), base.as_ptr()) >= 0 { libc::umount2(slash, MNT_DETACH); From 8fe8800dacf4f5ab2e86164443e4155853ddcdba Mon Sep 17 00:00:00 2001 From: Markus Bauer Date: Sun, 2 Mar 2025 12:31:32 +0100 Subject: [PATCH 3/3] Add files/devices and subdirectories to fakeroot (cherry picked from commit d582527a21cdb4f74c8c0abb6ee780460f70edcd) --- examples/fakeroot.rs | 1 + src/child.rs | 5 +++- src/config.rs | 4 +++ src/fakeroot.rs | 71 ++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 75 insertions(+), 6 deletions(-) diff --git a/examples/fakeroot.rs b/examples/fakeroot.rs index 324bf5fac..e902f7e13 100644 --- a/examples/fakeroot.rs +++ b/examples/fakeroot.rs @@ -10,6 +10,7 @@ fn main() { cmd.fakeroot_enable("/dev/shm/sandbox_root"); cmd.fakeroot_mount("/bin", "/bin", true); + cmd.fakeroot_mount_file("/dev/urandom", "/dev/urandom", false); cmd.fakeroot_mount("/etc", "/etc", true); cmd.fakeroot_mount("/lib", "/lib", true); cmd.fakeroot_mount("/lib64", "/lib64", true); diff --git a/src/child.rs b/src/child.rs index 6f4b86a58..4d6c623d1 100644 --- a/src/child.rs +++ b/src/child.rs @@ -145,7 +145,10 @@ pub unsafe fn child_after_clone(child: &ChildInfo) -> ! { }); child.cfg.fake_root_base.as_ref().map(|base| { - if !build_fakeroot(base, child.cfg.fake_root_mounts.as_ref()) { + if !build_fakeroot(base, + child.cfg.fake_root_mkdirs.as_ref(), + child.cfg.fake_root_touchs.as_ref(), + child.cfg.fake_root_mounts.as_ref()) { fail(Err::ChangeRoot, epipe); } }); diff --git a/src/config.rs b/src/config.rs index 8ef9dac23..6840c9c35 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,6 +26,8 @@ pub struct Config { // TODO(tailhook) session leader pub fake_root_base: Option, pub fake_root_mounts: Vec, + pub fake_root_mkdirs: Vec, + pub fake_root_touchs: Vec, } impl Default for Config { @@ -43,6 +45,8 @@ impl Default for Config { make_group_leader: false, fake_root_base: None, fake_root_mounts: Vec::new(), + fake_root_mkdirs: Vec::new(), + fake_root_touchs: Vec::new(), } } } diff --git a/src/fakeroot.rs b/src/fakeroot.rs index 2ae2be712..9bc0f023f 100644 --- a/src/fakeroot.rs +++ b/src/fakeroot.rs @@ -1,6 +1,8 @@ use crate::ffi_util::ToCString; use crate::{Command, Namespace}; -use libc::{MNT_DETACH, MS_BIND, MS_PRIVATE, MS_RDONLY, MS_REC, MS_REMOUNT}; +use libc::{ + MNT_DETACH, MS_BIND, MS_PRIVATE, MS_RDONLY, MS_REC, MS_REMOUNT, O_CLOEXEC, O_CREAT, O_RDONLY, +}; use std::ffi::{c_char, c_void, CString}; use std::path::Path; @@ -29,6 +31,16 @@ impl Command { self.config.fake_root_base = Some(base.to_cstring()); } + fn fakeroot_mkdir(&mut self, base: &str, dir: &Path) { + dir.parent().map(|parent_dir| { + if dir != parent_dir { + self.fakeroot_mkdir(base, parent_dir); + let outer_dir = format!("{}/{}", base, dir.to_str().unwrap()); + self.config.fake_root_mkdirs.push(outer_dir.to_cstring()); + } + }); + } + /// Add an existing directory to the fakeroot. /// /// fakeroot_enable() must be called first, otherwise this function will panic. @@ -46,7 +58,39 @@ impl Command { .as_ref() .expect("call fakeroot_enable() first!") .to_str() - .unwrap(); + .unwrap() + .to_owned(); + self.fakeroot_mkdir(base.as_ref(), Path::new(dst)); + self.config.fake_root_mounts.push(FakeRootMount { + mountpoint: dst.to_cstring(), + mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), + src: src.as_ref().to_cstring(), + readonly, + is_special_fs: false, + }); + } + + /// Add an existing file or device to the fakeroot. + /// + /// fakeroot_enable() must be called first, otherwise this function will panic. + /// + /// Example usage: + /// cmd.fakeroot_mount_file("/dev/urandom", "/dev/urandom", false); + pub fn fakeroot_mount_file>(&mut self, src: P, dst: &str, readonly: bool) { + let base = self + .config + .fake_root_base + .as_ref() + .expect("call fakeroot_enable() first!") + .to_str() + .unwrap() + .to_owned(); + Path::new(dst).parent().map(|parent_dir| { + self.fakeroot_mkdir(base.as_ref(), parent_dir); + }); + self.config + .fake_root_touchs + .push(format!("{}/{}", base, dst).to_cstring()); self.config.fake_root_mounts.push(FakeRootMount { mountpoint: dst.to_cstring(), mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), @@ -69,7 +113,9 @@ impl Command { .as_ref() .expect("call fakeroot_enable() first!") .to_str() - .unwrap(); + .unwrap() + .to_owned(); + self.fakeroot_mkdir(base.as_ref(), Path::new(dst)); self.config.fake_root_mounts.push(FakeRootMount { mountpoint: dst.to_cstring(), mountpoint_outer: format!("{}/{}", base, dst).to_cstring(), @@ -81,7 +127,12 @@ impl Command { } /// This syscall sequence is more or less taken from nsjail (https://github.com/google/nsjail). -pub(crate) unsafe fn build_fakeroot(base: &CString, mountpoints: &[FakeRootMount]) -> bool { +pub(crate) unsafe fn build_fakeroot( + base: &CString, + mkdirs: &[CString], + touchs: &[CString], + mountpoints: &[FakeRootMount], +) -> bool { // define some libc constants let null_char = 0 as *const c_char; let null_void = 0 as *const c_void; @@ -100,9 +151,19 @@ pub(crate) unsafe fn build_fakeroot(base: &CString, mountpoints: &[FakeRootMount return false; } + // create mount points + for dir in mkdirs { + libc::mkdir(dir.as_ptr(), 0o777); + } + for file in touchs { + let fd = libc::open(file.as_ptr(), O_RDONLY | O_CREAT | O_CLOEXEC); + if fd >= 0 { + libc::close(fd); + } + } + // mount directories - still read-write (because MS_BIND + MS_RDONLY are not supported) for mount in mountpoints { - libc::mkdir(mount.mountpoint_outer.as_ptr(), 0o777); let (src, fstype, flags) = if mount.is_special_fs { (null_char, mount.src.as_ptr(), 0) } else {