Skip to content

Commit 3f0adc9

Browse files
committed
fix: unsound use of env::set_var, was breaking stdlib change to make unsafe
It is generally not safe to set env variables. The correct way to set a config value that needs to be overridden is to hold a copy internal to the library and only read from the environment.
1 parent 5fa5dda commit 3f0adc9

File tree

1 file changed

+23
-3
lines changed

1 file changed

+23
-3
lines changed

tokenizers/src/utils/parallelism.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use rayon::iter::IterBridge;
66
use rayon::prelude::*;
77
use rayon_cond::CondIterator;
88
use std::sync::atomic::AtomicBool;
9+
use std::sync::atomic::AtomicU8;
910
use std::sync::atomic::Ordering;
1011

1112
// Re-export rayon current_num_threads
@@ -14,19 +15,30 @@ pub use rayon::current_num_threads;
1415
pub const ENV_VARIABLE: &str = "TOKENIZERS_PARALLELISM";
1516

1617
static USED_PARALLELISM: AtomicBool = AtomicBool::new(false);
18+
static PARALLELISM: AtomicU8 = AtomicU8::new(0);
1719

1820
/// Check if the TOKENIZERS_PARALLELISM env variable has been explicitly set
1921
pub fn is_parallelism_configured() -> bool {
20-
std::env::var(ENV_VARIABLE).is_ok()
22+
std::env::var(ENV_VARIABLE).is_ok() || get_override_parallelism().is_some()
2123
}
2224

2325
/// Check if at some point we used a parallel iterator
2426
pub fn has_parallelism_been_used() -> bool {
2527
USED_PARALLELISM.load(Ordering::SeqCst)
2628
}
2729

30+
/// Get internally set parallelism
31+
fn get_override_parallelism() -> Option<bool> {
32+
match PARALLELISM.load(Ordering::SeqCst) {
33+
0 => None,
34+
1 => Some(false),
35+
2 => Some(true),
36+
_ => unreachable!(),
37+
}
38+
}
39+
2840
/// Get the currently set value for `TOKENIZERS_PARALLELISM` env variable
29-
pub fn get_parallelism() -> bool {
41+
fn get_env_parallelism() -> bool {
3042
match std::env::var(ENV_VARIABLE) {
3143
Ok(mut v) => {
3244
v.make_ascii_lowercase();
@@ -36,9 +48,17 @@ pub fn get_parallelism() -> bool {
3648
}
3749
}
3850

51+
pub fn get_parallelism() -> bool {
52+
if let Some(parallel) = get_override_parallelism() {
53+
parallel
54+
} else {
55+
get_env_parallelism()
56+
}
57+
}
58+
3959
/// Set the value for `TOKENIZERS_PARALLELISM` for the current process
4060
pub fn set_parallelism(val: bool) {
41-
std::env::set_var(ENV_VARIABLE, if val { "true" } else { "false" })
61+
PARALLELISM.store(if val { 2 } else { 1 }, Ordering::SeqCst);
4262
}
4363

4464
/// Allows to convert into an iterator that can be executed either parallelly or serially.

0 commit comments

Comments
 (0)