Skip to content

Commit 52cf971

Browse files
committed
WIP: optimize process spawning on Linux
By avoiding allocations and sorting when copying environment variables Add Rust CI workflow analysis
1 parent f4d794e commit 52cf971

File tree

3 files changed

+100
-8
lines changed

3 files changed

+100
-8
lines changed

library/std/src/sys/unix/process/process_common.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ pub struct Command {
9393
/// `args`, followed by a `null`. Be careful when modifying `program` or
9494
/// `args` to properly update this as well.
9595
argv: Argv,
96-
env: CommandEnv,
96+
pub env: CommandEnv,
9797

9898
program_kind: ProgramKind,
9999
cwd: Option<CString>,
100100
uid: Option<uid_t>,
101101
gid: Option<gid_t>,
102-
saw_nul: bool,
102+
pub saw_nul: bool,
103103
closures: Vec<Box<dyn FnMut() -> io::Result<()> + Send + Sync>>,
104104
groups: Option<Box<[gid_t]>>,
105105
stdin: Option<Stdio>,
@@ -402,7 +402,7 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString {
402402

403403
// Helper type to manage ownership of the strings within a C-style array.
404404
pub struct CStringArray {
405-
items: Vec<CString>,
405+
pub items: Vec<CString>,
406406
ptrs: Vec<*const c_char>,
407407
}
408408

@@ -426,7 +426,10 @@ impl CStringArray {
426426
}
427427
}
428428

429-
fn construct_envp(env: BTreeMap<OsString, OsString>, saw_nul: &mut bool) -> CStringArray {
429+
pub(crate) fn construct_envp(
430+
env: BTreeMap<OsString, OsString>,
431+
saw_nul: &mut bool,
432+
) -> CStringArray {
430433
let mut result = CStringArray::with_capacity(env.len());
431434
for (mut k, v) in env {
432435
// Reserve additional space for '=' and null terminator

library/std/src/sys/unix/process/process_unix.rs

+91-2
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use crate::io::{self, Error, ErrorKind};
33
use crate::mem;
44
use crate::num::NonZeroI32;
55
use crate::sys;
6-
use crate::sys::cvt;
76
use crate::sys::process::process_common::*;
7+
use crate::sys::{cvt, memchr};
88
use core::ffi::NonZero_c_int;
99

1010
#[cfg(target_os = "linux")]
1111
use crate::os::linux::process::PidFd;
1212
#[cfg(target_os = "linux")]
1313
use crate::os::unix::io::AsRawFd;
14-
14+
use crate::sys::os::{env_read_lock, environ};
1515
#[cfg(any(
1616
target_os = "macos",
1717
target_os = "watchos",
@@ -29,6 +29,8 @@ use libc::RTP_ID as pid_t;
2929
#[cfg(not(target_os = "vxworks"))]
3030
use libc::{c_int, pid_t};
3131

32+
use crate::collections::HashSet;
33+
use crate::ffi::{CStr, CString};
3234
#[cfg(not(any(
3335
target_os = "vxworks",
3436
target_os = "l4re",
@@ -68,6 +70,90 @@ cfg_if::cfg_if! {
6870
// Command
6971
////////////////////////////////////////////////////////////////////////////////
7072

73+
#[cfg(target_os = "linux")]
74+
fn count_env_vars() -> usize {
75+
let mut count = 0;
76+
unsafe {
77+
let _guard = env_read_lock();
78+
let mut environ = *environ();
79+
while !(*environ).is_null() {
80+
environ = environ.add(1);
81+
count += 1;
82+
}
83+
}
84+
count
85+
}
86+
87+
/// Super-duper optimized version of capturing environment variables, that tries to avoid
88+
/// unnecessary allocations and sorting.
89+
#[cfg(target_os = "linux")]
90+
fn capture_envp(cmd: &mut Command) -> CStringArray {
91+
use crate::os::unix::ffi::OsStrExt;
92+
93+
// Count the upper bound of environment variables (vars from the environ + vars coming from the
94+
// command).
95+
let env_count_upper_bound = count_env_vars() + cmd.env.vars.len();
96+
97+
let mut env_array = CStringArray::with_capacity(env_count_upper_bound);
98+
99+
// Remember which vars were already set by the user.
100+
// If the user value is Some, we will add the variable to `env_array` and modify `visited`.
101+
// If the user value is None, we will only modify `visited`.
102+
// In either case, a variable with the same name from `environ` will not be added to `env_array`.
103+
let mut visited: HashSet<&[u8]> = HashSet::with_capacity(cmd.env.vars.len());
104+
105+
// First, add user defined variables to `env_array`, and mark the visited ones.
106+
for (key, maybe_value) in cmd.env.vars.iter() {
107+
if let Some(value) = maybe_value {
108+
// One extra byte for '=', and one extra byte for the NULL terminator.
109+
let mut env_var: Vec<u8> =
110+
Vec::with_capacity(key.as_bytes().len() + value.as_bytes().len() + 2);
111+
env_var.extend_from_slice(key.as_bytes());
112+
env_var.push(b'=');
113+
env_var.extend_from_slice(value.as_bytes());
114+
115+
if let Ok(item) = CString::new(env_var) {
116+
env_array.push(item);
117+
} else {
118+
cmd.saw_nul = true;
119+
return env_array;
120+
}
121+
}
122+
visited.insert(key.as_bytes());
123+
}
124+
125+
// Then, if we're not clearing the original environment, go through it, and add each variable
126+
// to env_array if we haven't seen it yet.
127+
if !cmd.env.clear {
128+
unsafe {
129+
let _guard = env_read_lock();
130+
let mut environ = *environ();
131+
if !environ.is_null() {
132+
while !(*environ).is_null() {
133+
let c_str = CStr::from_ptr(*environ);
134+
let key_value = c_str.to_bytes();
135+
if !key_value.is_empty() {
136+
if let Some(pos) = memchr::memchr(b'=', &key_value[1..]).map(|p| p + 1) {
137+
let key = &key_value[..pos];
138+
if !visited.contains(&key) {
139+
env_array.push(CString::from(c_str));
140+
}
141+
}
142+
}
143+
environ = environ.add(1);
144+
}
145+
}
146+
}
147+
}
148+
149+
env_array
150+
}
151+
152+
#[cfg(target_os = "linux")]
153+
pub fn capture_env_linux(cmd: &mut Command) -> Option<CStringArray> {
154+
if cmd.env.is_unchanged() { None } else { Some(capture_envp(cmd)) }
155+
}
156+
71157
impl Command {
72158
pub fn spawn(
73159
&mut self,
@@ -76,6 +162,9 @@ impl Command {
76162
) -> io::Result<(Process, StdioPipes)> {
77163
const CLOEXEC_MSG_FOOTER: [u8; 4] = *b"NOEX";
78164

165+
#[cfg(target_os = "linux")]
166+
let envp = capture_env_linux(self);
167+
#[cfg(not(target_os = "linux"))]
79168
let envp = self.capture_env();
80169

81170
if self.saw_nul() {

library/std/src/sys_common/process.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ use crate::sys::process::{EnvKey, ExitStatus, Process, StdioPipes};
1212
// Stores a set of changes to an environment
1313
#[derive(Clone)]
1414
pub struct CommandEnv {
15-
clear: bool,
15+
pub clear: bool,
1616
saw_path: bool,
17-
vars: BTreeMap<EnvKey, Option<OsString>>,
17+
pub vars: BTreeMap<EnvKey, Option<OsString>>,
1818
}
1919

2020
impl Default for CommandEnv {

0 commit comments

Comments
 (0)