Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions crates/bashkit/src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,13 @@ pub trait Builtin: Send + Sync {
fn llm_hint(&self) -> Option<&'static str> {
None
}

/// Clear hidden per-instance state that must not survive snapshot restore.
///
/// Most builtins are stateless and keep the default no-op. Builtins with
/// session caches must override this so [`Bash::restore_snapshot`](crate::Bash::restore_snapshot)
/// restores the full observable boundary, not just shell variables and VFS bytes.
fn reset_session_state(&self) {}
}

/// Trait for custom builtins that parse arguments with [`clap`].
Expand Down Expand Up @@ -794,6 +801,13 @@ pub trait ClapBuiltin: Send + Sync {
fn llm_hint(&self) -> Option<&'static str> {
None
}

/// Clear hidden per-instance state that must not survive snapshot restore.
///
/// Most builtins are stateless and keep the default no-op. Builtins with
/// session caches must override this so [`Bash::restore_snapshot`](crate::Bash::restore_snapshot)
/// restores the full observable boundary, not just shell variables and VFS bytes.
fn reset_session_state(&self) {}
}

/// Mutable execution context for [`ClapBuiltin`] implementations.
Expand Down Expand Up @@ -925,6 +939,10 @@ where
fn llm_hint(&self) -> Option<&'static str> {
ClapBuiltin::llm_hint(self)
}

fn reset_session_state(&self) {
ClapBuiltin::reset_session_state(self);
}
}

fn clap_error_to_exec_result(error: clap::Error) -> ExecResult {
Expand Down
24 changes: 12 additions & 12 deletions crates/bashkit/src/builtins/sqlite/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl SqliteEngine {
backend: Backend::Vfs(io),
_db: db,
conn,
memory_path: None,
memory_path: Some(path_in_io.to_string()),
})
}

Expand Down Expand Up @@ -211,27 +211,27 @@ impl SqliteEngine {
}
}

/// Snapshot the database file bytes. Only meaningful for the memory
/// backend (which is what the Phase 1 path uses). Returns `None` for
/// the VFS backend, since persistence happens via the IO directly.
/// Snapshot the current database file bytes for cache invalidation.
///
/// We force a TRUNCATE-mode checkpoint before reading so that any pages
/// still in the WAL are folded into the main file. Without this step the
/// snapshot would be missing the just-written transaction.
pub(super) fn snapshot_bytes(&self) -> Option<Vec<u8>> {
let Backend::Memory(io) = &self.backend else {
return None;
};
let path = self.memory_path.as_deref()?;
let _ = self.conn.checkpoint(turso_core::CheckpointMode::Truncate {
upper_bound_inclusive: None,
});
let file = io.open_file(path, OpenFlags::None, false).ok()?;
let size = file.size().ok()? as usize;
if size == 0 {
return Some(Vec::new());
match &self.backend {
Backend::Memory(io) => {
let file = io.open_file(path, OpenFlags::None, false).ok()?;
let size = file.size().ok()? as usize;
if size == 0 {
return Some(Vec::new());
}
Some(read_all(&file, size))
}
Backend::Vfs(io) => io.file_bytes(path),
}
Some(read_all(&file, size))
}

/// For the VFS backend, flush any pages dirtied in memory back to the
Expand Down
27 changes: 24 additions & 3 deletions crates/bashkit/src/builtins/sqlite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ mod tests;
use async_trait::async_trait;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};

use crate::error::Result;
use crate::fs::FileSystem;
Expand Down Expand Up @@ -312,6 +313,13 @@ impl Builtin for Sqlite {
)
}

fn reset_session_state(&self) {
self.engine_cache
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.clear();
}

async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
let invocation_args: Vec<String> = ctx.args.to_vec();
if let Some(r) = check_help_version(&invocation_args, HELP_TEXT, Some("sqlite (turso 0.5)"))
Expand Down Expand Up @@ -687,10 +695,23 @@ async fn open_file_engine(
SqliteEngine::open_memory(initial.as_deref())
}
SqliteBackend::Vfs => {
static VFS_OPEN_COUNTER: AtomicU64 = AtomicU64::new(0);
let handle = vfs_io::current_handle_or_default();
let io = vfs_io::BashkitVfsIO::new_with_cap(fs.clone(), handle, limits.max_db_bytes);
let path_str = path.to_string_lossy().into_owned();
SqliteEngine::open_vfs(io, &path_str)
// Security: use a unique Turso-internal path so dropped engines cannot
// be resurrected from Turso's process-wide database registry after
// Bashkit snapshot restore. The IO maps this path back to `path`.
let io_path = format!(
":memory:bashkit-vfs-{}",
VFS_OPEN_COUNTER.fetch_add(1, Ordering::Relaxed)
);
let io = vfs_io::BashkitVfsIO::new_with_cap_and_path_alias(
fs.clone(),
handle,
limits.max_db_bytes,
io_path.clone(),
path.to_path_buf(),
);
SqliteEngine::open_vfs(io, &io_path)
}
}
}
Expand Down
32 changes: 26 additions & 6 deletions crates/bashkit/src/builtins/sqlite/vfs_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ impl File for VfsFile {
/// IO adapter exposing bashkit's [`FileSystem`] to turso.
pub(super) struct BashkitVfsIO {
fs: Arc<dyn FileSystem>,
/// Optional alias from Turso's process-local open path to the caller's VFS path.
path_alias: Option<(String, PathBuf)>,
/// All currently-open files keyed by path string. Used to flush dirty
/// buffers back to the VFS after SQL execution.
open_files: Mutex<HashMap<String, Arc<VfsFile>>>,
Expand All @@ -162,16 +164,17 @@ pub(super) struct BashkitVfsIO {
}

impl BashkitVfsIO {
/// Override the file-size cap. The builtin passes
/// `SqliteLimits::max_db_bytes` here so both VFS reads and writes share
/// the same per-database cap.
pub(super) fn new_with_cap(
/// Create an IO whose Turso-internal path is mapped back to a real VFS path.
pub(super) fn new_with_cap_and_path_alias(
fs: Arc<dyn FileSystem>,
handle: Handle,
max_file_bytes: usize,
io_path: String,
vfs_path: PathBuf,
) -> Arc<Self> {
Arc::new(Self {
fs,
path_alias: Some((io_path, vfs_path)),
open_files: Mutex::new(HashMap::new()),
handle,
max_file_bytes,
Expand All @@ -182,6 +185,23 @@ impl BashkitVfsIO {
self.open_files.lock().unwrap_or_else(|e| e.into_inner())
}

fn real_path_for(&self, path: &str) -> PathBuf {
if let Some((io_prefix, vfs_prefix)) = &self.path_alias
&& let Some(suffix) = path.strip_prefix(io_prefix)
{
let mut mapped = vfs_prefix.as_os_str().to_os_string();
mapped.push(suffix);
return PathBuf::from(mapped);
}
PathBuf::from(path)
}

/// Return bytes currently held for an opened Turso path.
pub(super) fn file_bytes(&self, path: &str) -> Option<Vec<u8>> {
let file = self.open_files_lock().get(path).cloned()?;
Some(lock_bytes(&file.bytes).clone())
}

/// Persist any dirty in-memory buffers back to the underlying `FileSystem`.
pub(super) async fn flush_dirty(&self) -> EngineResult<usize> {
let to_flush: Vec<Arc<VfsFile>> = {
Expand Down Expand Up @@ -234,7 +254,7 @@ impl IO for BashkitVfsIO {
if let Some(existing) = self.open_files_lock().get(path).cloned() {
return Ok(existing as Arc<dyn File>);
}
let pb = PathBuf::from(path);
let pb = self.real_path_for(path);
let cap = self.max_file_bytes;
let bytes_opt = run_async(&self.handle, {
let fs = self.fs.clone();
Expand Down Expand Up @@ -270,7 +290,7 @@ impl IO for BashkitVfsIO {
self.open_files_lock().remove(path);
run_async(&self.handle, {
let fs = self.fs.clone();
let pb = PathBuf::from(path);
let pb = self.real_path_for(path);
move || async move {
let _ = fs.remove(&pb, false).await;
}
Expand Down
11 changes: 11 additions & 0 deletions crates/bashkit/src/interpreter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,17 @@ impl Interpreter {
.clone()
}

/// Drop builtin-owned hidden state after snapshot restore.
///
/// Security: snapshots define the shell/VFS boundary. Builtin caches (for
/// example SQLite engines) must not retain stale state that can be read or
/// flushed back after the VFS has been restored.
pub(crate) fn reset_builtin_session_state(&self) {
for builtin in self.builtins.values() {
builtin.reset_session_state();
}
}

pub(crate) fn scoped_execution_extensions(
&self,
extensions: builtins::ExecutionExtensions,
Expand Down
4 changes: 4 additions & 0 deletions crates/bashkit/src/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ impl crate::Bash {
if let Some(ref vfs) = snap.vfs {
self.fs.vfs_restore(vfs)?;
}
// Security: restore invalidates builtin caches after the VFS swap so
// hidden state cannot leak across snapshot boundaries or overwrite the
// restored filesystem on the next command.
self.interpreter.reset_builtin_session_state();
// Shell state cannot fail past validation, and the VFS has already
// been restored atomically (or rejected) above.
self.interpreter.restore_shell_state(&snap.shell);
Expand Down
39 changes: 39 additions & 0 deletions crates/bashkit/tests/integration/sqlite_integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,45 @@ async fn snapshot_and_restore_round_trips_sqlite_state() {
assert_eq!(r.stdout.trim(), "preserved");
}

async fn assert_restore_into_existing_bash_clears_sqlite_cache(mut bash: Bash) {
let clean = Bash::builder().sqlite().env(OPT_IN.0, OPT_IN.1).build();
let clean_snap = clean.snapshot().expect("clean snapshot");

let setup = bash
.exec(r#"sqlite /tmp/leak.sqlite 'CREATE TABLE t(v); INSERT INTO t VALUES ("old-secret")'"#)
.await
.unwrap();
assert_eq!(setup.exit_code, 0, "stderr: {}", setup.stderr);

bash.restore_snapshot(&clean_snap)
.expect("restore clean snapshot");

let query = bash
.exec(r#"sqlite /tmp/leak.sqlite 'SELECT v FROM t'"#)
.await
.unwrap();
assert_ne!(
query.exit_code, 0,
"stale sqlite cache survived restore: stdout={} stderr={}",
query.stdout, query.stderr
);
assert!(
!query.stdout.contains("old-secret"),
"restored Bash leaked stale sqlite row: {}",
query.stdout
);
}

#[tokio::test]
async fn snapshot_restore_into_existing_bash_clears_sqlite_cache_memory_backend() {
assert_restore_into_existing_bash_clears_sqlite_cache(make_bash()).await;
}

#[tokio::test]
async fn snapshot_restore_into_existing_bash_clears_sqlite_cache_vfs_backend() {
assert_restore_into_existing_bash_clears_sqlite_cache(make_bash_vfs()).await;
}

#[tokio::test]
async fn cached_engine_respects_deleted_db_between_exec_calls() {
let mut bash = make_bash();
Expand Down
14 changes: 11 additions & 3 deletions specs/sqlite-builtin.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,20 @@ Consequences worth knowing:
owns the `Sqlite` trait object) drops, taking the engines with it.
Each `Bash::builder().sqlite()` produces its own cache, so two
parallel `Bash` instances do not cross-contaminate.
- **Snapshot restore invalidates caches.** `Bash::restore_snapshot()`
clears sqlite engine caches in the existing `Bash` after restoring the
VFS and before running another command. Cached connections therefore
cannot leak rows from the previous VFS image or flush stale bytes back
over restored files. Tested by
`snapshot_restore_into_existing_bash_clears_sqlite_cache_memory_backend`
and `snapshot_restore_into_existing_bash_clears_sqlite_cache_vfs_backend`.

Snapshot integration is automatic: because we always flush after every
exec, the on-disk image (within the VFS) is current at every point a
caller could legitimately call `bash.snapshot()`. Restore creates a
fresh `Bash` with an empty cache; the first `sqlite` call re-opens
the engine from the restored VFS bytes.
caller could legitimately call `bash.snapshot()`. Restoring into a
fresh `Bash` starts with an empty cache, and restoring into an existing
`Bash` invalidates its cache; the first `sqlite` call after restore
re-opens the engine from the restored VFS bytes.

### SQL policy: ATTACH / DETACH / VACUUM and PRAGMA deny list

Expand Down
Loading