From 4d6e5882b77cb74d41601e896b7b878a1a2d9882 Mon Sep 17 00:00:00 2001 From: Securifi Dev Date: Sun, 14 Jun 2026 15:21:15 +0000 Subject: [PATCH] feat(storage): expose compaction via StorageEngine::compact() Add compact() to the StorageEngine trait (default no-op), overridden by the pagedb-backed engine to call pagedb::Db::compact_now() and map CompactStats to a lite-owned CompactionOutcome. Add a NodeDbLite::compact() forwarder. In-memory engine keeps the no-op default. Lets consumers reclaim deferred-free space / bound on-disk growth between writes. Requires pagedb compact_now() to be Send-callable (NodeDB-Lab/pagedb#4). Addresses NodeDB-Lab/pagedb#2 (compaction not exposed to consumers). --- nodedb-lite/src/nodedb/health.rs | 15 +++++ nodedb-lite/src/storage/engine.rs | 28 ++++++++ nodedb-lite/src/storage/pagedb_storage.rs | 79 ++++++++++++++++++++++- 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/nodedb-lite/src/nodedb/health.rs b/nodedb-lite/src/nodedb/health.rs index 34b4693..f21a94b 100644 --- a/nodedb-lite/src/nodedb/health.rs +++ b/nodedb-lite/src/nodedb/health.rs @@ -10,6 +10,8 @@ use serde::Serialize; +use nodedb_types::error::NodeDbResult; + use crate::memory::{EngineId, PressureLevel}; use crate::storage::engine::StorageEngine; @@ -124,6 +126,19 @@ impl NodeDbLite { &self.storage } + /// Compact the backing storage engine, reclaiming dead pages and + /// truncating the file to bound on-disk growth. + /// + /// Forwards to [`StorageEngine::compact`]. For the pagedb-backed engine this + /// drains the deferred-free list and truncates `main.db`; for in-memory or + /// test engines it is a no-op returning a zero + /// [`CompactionOutcome`](crate::storage::engine::CompactionOutcome). + pub async fn compact( + &self, + ) -> NodeDbResult { + Ok(self.storage.compact().await?) + } + /// Get a structured health report. /// /// This is a cheap, non-blocking call — reads atomic counters and lock-free state. diff --git a/nodedb-lite/src/storage/engine.rs b/nodedb-lite/src/storage/engine.rs index 5eaf3eb..cf6c640 100644 --- a/nodedb-lite/src/storage/engine.rs +++ b/nodedb-lite/src/storage/engine.rs @@ -18,6 +18,23 @@ use nodedb_types::Namespace; /// `StorageEngine` trait's scan interface. pub type KvPair = (Vec, Vec); +/// Summary of what a [`StorageEngine::compact`] call reclaimed. +/// +/// Lite-owned (not a pagedb type) so the trait doesn't force pagedb types on +/// non-pagedb impls. The pagedb-backed engine maps `pagedb::CompactStats` into +/// this; other engines return the `Default` (all-zero) value from the trait's +/// default no-op `compact`. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct CompactionOutcome { + /// Number of underlying pages reclaimed (moved to free-list or freed by + /// repacking). Zero for engines with nothing to compact. + pub reclaimed_pages: u64, + /// Number of segment files repacked. + pub segments_repacked: u32, + /// Bytes truncated from the backing file by lowering the high-water mark. + pub file_bytes_freed: u64, +} + /// A write operation for batch writes. #[derive(Debug, Clone)] pub enum WriteOp { @@ -72,6 +89,17 @@ pub trait StorageEngine: Send + Sync + 'static { /// Useful for cold-start progress reporting and memory governor decisions. async fn count(&self, ns: Namespace) -> Result; + /// Compact the backing store, reclaiming dead pages and (when possible) + /// truncating the file to bound on-disk growth. + /// + /// The default implementation is a no-op returning a zero + /// [`CompactionOutcome`], so engines with nothing to compact (in-memory + /// stores, test doubles) need not override it. The pagedb-backed engine + /// overrides this to drain the deferred-free list and truncate `main.db`. + async fn compact(&self) -> Result { + Ok(CompactionOutcome::default()) + } + /// Range scan: return up to `limit` entries where key >= `start`. /// /// Results are ordered by key (lexicographic byte order). diff --git a/nodedb-lite/src/storage/pagedb_storage.rs b/nodedb-lite/src/storage/pagedb_storage.rs index 52d7427..0e6ec1f 100644 --- a/nodedb-lite/src/storage/pagedb_storage.rs +++ b/nodedb-lite/src/storage/pagedb_storage.rs @@ -26,7 +26,7 @@ use pagedb::vfs::traits::Vfs; use pagedb::{Db, RealmId}; use crate::error::LiteError; -use crate::storage::engine::{KvPair, StorageEngine, WriteOp}; +use crate::storage::engine::{CompactionOutcome, KvPair, StorageEngine, WriteOp}; use nodedb_types::Namespace; // ─── VFS aliases ───────────────────────────────────────────────────────────── @@ -453,6 +453,15 @@ where .collect()) } + async fn compact(&self) -> Result { + let stats = self.db.compact_now().await.map_err(LiteError::from)?; + Ok(CompactionOutcome { + reclaimed_pages: stats.main_db_pages_reclaimed, + segments_repacked: stats.segments_repacked, + file_bytes_freed: stats.bytes_truncated, + }) + } + fn as_vector_segment_ext( &self, ) -> Option<&dyn crate::storage::vector_segment_ext::VectorSegmentExt> { @@ -744,6 +753,15 @@ impl StorageEngine for PagedbStorage { .map(|(k, v)| (strip_prefix(&k).to_vec(), v)) .collect()) } + + async fn compact(&self) -> Result { + let stats = self.db.compact_now().await.map_err(LiteError::from)?; + Ok(CompactionOutcome { + reclaimed_pages: stats.main_db_pages_reclaimed, + segments_repacked: stats.segments_repacked, + file_bytes_freed: stats.bytes_truncated, + }) + } } // ─── VectorSegmentExt impl ──────────────────────────────────────────────────── @@ -1227,6 +1245,65 @@ mod tests { assert_eq!(results[2].0, &[4u8]); } + /// In-memory engine: `compact()` is a successful no-op (nothing to reclaim). + #[tokio::test] + async fn compact_mem_is_ok_noop() { + let s = make_storage().await; + s.put(Namespace::Vector, b"v1", b"hello").await.unwrap(); + s.put(Namespace::Graph, b"g1", b"world").await.unwrap(); + let outcome = s.compact().await.unwrap(); + // Data still readable after compaction. + assert_eq!( + s.get(Namespace::Vector, b"v1").await.unwrap().as_deref(), + Some(b"hello".as_slice()) + ); + // MemVfs has no file truncation, but the call must succeed regardless. + let _ = outcome.reclaimed_pages; + } + + /// Disk-backed engine on a tempdir: write rows (including churn that leaves + /// dead pages), then `compact()` must succeed and report a non-negative + /// outcome. Data must remain intact afterward. + #[cfg(not(target_arch = "wasm32"))] + #[tokio::test] + async fn compact_default_disk_is_ok() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("compact-test.db"); + let s = PagedbStorage::::open( + &path, + crate::storage::encryption::Encryption::Plaintext, + ) + .await + .unwrap(); + + // Churn: write then overwrite/delete a batch of keys so the + // deferred-free list has pages to reclaim. + for i in 0u32..200 { + let key = i.to_be_bytes(); + s.put(Namespace::Meta, &key, &vec![0xCDu8; 512]) + .await + .unwrap(); + } + for i in 0u32..150 { + let key = i.to_be_bytes(); + s.delete(Namespace::Meta, &key).await.unwrap(); + } + + let outcome = s.compact().await.unwrap(); + + // Surviving keys still readable. + let survivor = 175u32.to_be_bytes(); + assert!(s.get(Namespace::Meta, &survivor).await.unwrap().is_some()); + + // Outcome fields are well-formed (u64/u32 — always >= 0); just touch + // them so the assertion documents the reported shape. + let _ = ( + outcome.reclaimed_pages, + outcome.segments_repacked, + outcome.file_bytes_freed, + ); + } + /// Keys in namespace N must not appear in a scan of namespace N+1, and /// vice versa. Verifies the single-byte prefix boundary. #[tokio::test]