From 89bc3c0e7fed51c0b12fecdb3ffe958ca3161daf Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Mon, 1 Jun 2026 23:16:50 -0500 Subject: [PATCH] fix(snapshot): account function source bytes --- crates/bashkit/src/interpreter/mod.rs | 27 +++++-------- crates/bashkit/src/parser/mod.rs | 5 ++- .../tests/integration/snapshot_tests.rs | 40 +++++++++++++++++++ 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index e0df17d67..a57e640a3 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -779,6 +779,13 @@ fn deserialize_function_from_source( deserialize_function_from_source_with_limits(name, source, 100, 100_000) } +fn function_storage_bytes(func: &FunctionDef) -> usize { + func.source.as_ref().map_or_else( + || func.span.end.offset.saturating_sub(func.span.start.offset), + |source| source.len(), + ) +} + // Important decision: variable attributes (readonly/integer/lower/upper) and // namerefs are stored in dedicated maps rather than the `variables` HashMap with // `_READONLY_X` / `_INTEGER_X` / `_LOWER_X` / `_UPPER_X` / `_NAMEREF_X` keys. @@ -1814,11 +1821,7 @@ impl Interpreter { ) else { continue; }; - let body_bytes = parsed_func - .span - .end - .offset - .saturating_sub(parsed_func.span.start.offset); + let body_bytes = function_storage_bytes(&parsed_func); if function_memory_budget .check_function_insert(body_bytes, true, 0, &self.memory_limits) .is_err() @@ -1833,11 +1836,7 @@ impl Interpreter { self.traps = Arc::new(state.traps.clone()); // Recompute memory budget from restored state to prevent desync let func_count = self.functions.len(); - let func_bytes: usize = self - .functions - .values() - .map(|f| f.span.end.offset.saturating_sub(f.span.start.offset)) - .sum(); + let func_bytes: usize = self.functions.values().map(function_storage_bytes).sum(); self.memory_budget = crate::limits::MemoryBudget::recompute_from_state( &self.variables, &self.arrays, @@ -2366,18 +2365,14 @@ impl Interpreter { } Command::Function(func_def) => { // THREAT[TM-DOS-060]: Check function count/size budget - let body_bytes = func_def - .span - .end - .offset - .saturating_sub(func_def.span.start.offset); + let body_bytes = function_storage_bytes(func_def); let is_new = !self.functions.contains_key(&func_def.name); let old_body_bytes = if is_new { 0 } else { self.functions .get(&func_def.name) - .map(|f| f.span.end.offset.saturating_sub(f.span.start.offset)) + .map(function_storage_bytes) .unwrap_or(0) }; if self diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index 3c3f61e86..6f5663271 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -153,7 +153,10 @@ impl<'a> Parser<'a> { if self.current_token.is_some() { self.current_span.start.offset } else { - self.input.len() + // Important decision: EOF keeps `current_span` on the last real token; + // use that token end so skipped trailing comments are not retained in + // persistent function source snapshots. + self.current_span.end.offset } } diff --git a/crates/bashkit/tests/integration/snapshot_tests.rs b/crates/bashkit/tests/integration/snapshot_tests.rs index a61610ad4..5bdc21b4a 100644 --- a/crates/bashkit/tests/integration/snapshot_tests.rs +++ b/crates/bashkit/tests/integration/snapshot_tests.rs @@ -403,6 +403,46 @@ async fn snapshot_without_functions_skips_function_restore() { assert_eq!(result.stdout, "42\n1\n"); } +#[tokio::test] +async fn snapshot_function_source_excludes_trailing_eof_comment() { + let mut bash = Bash::new(); + let trailing = "x".repeat(2048); + bash.exec(&format!("trimmed() {{ :; }} #{trailing}")) + .await + .unwrap(); + + let bytes = bash.snapshot().unwrap(); + let json: serde_json::Value = serde_json::from_slice(&bytes[32..]).unwrap(); + let source = json["shell"]["functions"]["trimmed"]["source"] + .as_str() + .unwrap(); + + assert_eq!(source, "trimmed() { :; }"); +} + +#[tokio::test] +async fn snapshot_restore_counts_source_bytes_against_function_limit() { + let mut src = Bash::new(); + src.exec("large_source() { :; }").await.unwrap(); + let bytes = src.snapshot().unwrap(); + let mut json: serde_json::Value = serde_json::from_slice(&bytes[32..]).unwrap(); + json["shell"]["functions"]["large_source"] = serde_json::json!({ + "source": format!("large_source() {{ :; }} #{}", "x".repeat(2048)) + }); + + let rewritten: Snapshot = serde_json::from_value(json).unwrap(); + let bytes = rewritten.to_bytes().unwrap(); + let limits = MemoryLimits::new().max_function_body_bytes(256); + let mut restored = Bash::builder().memory_limits(limits).build(); + restored.restore_snapshot(&bytes).unwrap(); + + let result = restored + .exec("type large_source >/dev/null 2>&1; echo $?") + .await + .unwrap(); + assert_eq!(result.stdout, "1\n"); +} + #[tokio::test] async fn snapshot_restore_enforces_function_limits() { let mut src = Bash::new();