Skip to content

Commit 2c8270f

Browse files
committed
Updated with persistance in the index HNSW
1 parent aad5695 commit 2c8270f

File tree

9 files changed

+179
-18
lines changed

9 files changed

+179
-18
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,21 @@
22

33
[![License: MIT/Apache-2.0](https://img.shields.io/badge/License-MIT%2FApache--2.0-blue.svg)](LICENSE)
44
[![Status: Beta](https://img.shields.io/badge/Status-Beta-brightgreen.svg)](#current-status)
5-
[![Version](https://img.shields.io/badge/Version-0.1.2-blue.svg)](https://github.com/anaslimem/CortexaDB/releases)
5+
[![Version](https://img.shields.io/badge/Version-0.1.3-blue.svg)](https://github.com/anaslimem/CortexaDB/releases)
66

77
**CortexaDB** is a simple, fast, and hard-durable embedded database designed specifically for AI agent memory. It provides a single-file-like experience (no server required) but with native support for vectors, graphs, and temporal search.
88

99
Think of it as **SQLite, but with semantic and relational intelligence for your agents.**
1010

1111
---
1212

13+
## What's New in v0.1.3
14+
15+
- **Automatic HNSW Persistence** - HNSW index is now automatically saved to disk on checkpoint or database close, enabling fast restart without rebuilding the index
16+
- Improved reliability for production use
17+
18+
---
19+
1320
## Quickstart
1421

1522
### Python (Recommended)
@@ -89,6 +96,16 @@ CortexaDB uses **USearch** for high-performance approximate nearest neighbor sea
8996
| `exact` | Small datasets (<10K) | 100% | O(n) |
9097
| `hnsw` | Large datasets | 95%+ | O(log n) |
9198

99+
### Automatic Persistence
100+
101+
HNSW indexing now includes **automatic persistence**:
102+
103+
- On `checkpoint()` - HNSW index is saved to disk
104+
- On database close/drop - HNSW index is automatically saved
105+
- On restart - HNSW index is loaded from disk (fast recovery!)
106+
107+
No extra configuration needed - just use `index_mode="hnsw"` and it just works.
108+
92109
```python
93110
from cortexadb import CortexaDB, HashEmbedder
94111

@@ -218,7 +235,7 @@ We use a custom versioned serialization layer (with a "magic-byte" header). This
218235
---
219236

220237
## License & Status
221-
CortexaDB is currently in **Beta (v0.1.2)**. It is released under the **MIT** and **Apache-2.0** licenses.
238+
CortexaDB is currently in **Beta (v0.1.3)**. It is released under the **MIT** and **Apache-2.0** licenses.
222239
We are actively refining the API and welcome feedback!
223240

224241
---

crates/cortexadb-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cortexadb-core"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
edition = "2024"
55
authors = ["Anas Limem <limemanas0@gmail.com>"]
66
description = "Fast, embedded vector + graph memory for AI agents"

crates/cortexadb-core/src/index/combined.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use thiserror::Error;
44
use crate::core::memory_entry::MemoryId;
55
use crate::core::state_machine::StateMachine;
66
use crate::index::graph::GraphIndex;
7-
use crate::index::hnsw::HnswConfig;
7+
use crate::index::hnsw::{HnswBackend, HnswConfig};
88
use crate::index::temporal::TemporalIndex;
99
use crate::index::vector::{VectorBackendMode, VectorIndex};
1010

@@ -78,12 +78,22 @@ impl IndexLayer {
7878
Self { vector: VectorIndex::new(vector_dimension) }
7979
}
8080

81-
/// Create new index layer with HNSW enabled
81+
/// Create new index layer with HNSW enabled (fresh build)
8282
pub fn new_with_hnsw(vector_dimension: usize, hnsw_config: HnswConfig) -> Self {
83-
let vector = match VectorIndex::new_with_hnsw(vector_dimension, hnsw_config) {
84-
Ok(v) => v,
85-
Err(_) => VectorIndex::new(vector_dimension),
86-
};
83+
Self::new_with_loaded_hnsw(vector_dimension, hnsw_config, None)
84+
}
85+
86+
/// Create new index layer with optional pre-loaded HNSW backend
87+
pub fn new_with_loaded_hnsw(
88+
vector_dimension: usize,
89+
hnsw_config: HnswConfig,
90+
loaded_hnsw: Option<HnswBackend>,
91+
) -> Self {
92+
let vector =
93+
match VectorIndex::new_with_loaded_hnsw(vector_dimension, hnsw_config, loaded_hnsw) {
94+
Ok(v) => v,
95+
Err(_) => VectorIndex::new(vector_dimension),
96+
};
8797
Self { vector }
8898
}
8999

crates/cortexadb-core/src/index/hnsw.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::path::Path;
12
use std::sync::{Arc, Mutex};
23
use thiserror::Error;
34

@@ -142,4 +143,47 @@ impl HnswBackend {
142143
pub fn is_empty(&self) -> bool {
143144
self.len() == 0
144145
}
146+
147+
pub fn save_to_file(&self, path: &Path) -> Result<()> {
148+
let index = self.index.lock().map_err(|_| HnswError::LockError)?;
149+
150+
if let Some(parent) = path.parent() {
151+
std::fs::create_dir_all(parent)?;
152+
}
153+
154+
let path_str = path.to_string_lossy().to_string();
155+
index.save(&path_str).map_err(|e| HnswError::UsearchError(e.to_string()))?;
156+
Ok(())
157+
}
158+
159+
pub fn load_from_file(path: &Path, dimension: usize, config: HnswConfig) -> Result<Self> {
160+
if !path.exists() {
161+
return Err(HnswError::IoError(std::io::Error::new(
162+
std::io::ErrorKind::NotFound,
163+
"HNSW index file not found",
164+
)));
165+
}
166+
167+
let options = usearch::IndexOptions {
168+
dimensions: dimension,
169+
metric: usearch::MetricKind::Cos,
170+
quantization: usearch::ScalarKind::F32,
171+
connectivity: config.m,
172+
expansion_add: config.ef_construction,
173+
expansion_search: config.ef_search,
174+
..Default::default()
175+
};
176+
177+
let index =
178+
usearch::new_index(&options).map_err(|e| HnswError::UsearchError(e.to_string()))?;
179+
180+
let path_str = path.to_string_lossy().to_string();
181+
index.load(&path_str).map_err(|e| HnswError::UsearchError(e.to_string()))?;
182+
183+
Ok(Self { index: Arc::new(Mutex::new(index)), dimension, config })
184+
}
185+
186+
pub fn dimension(&self) -> usize {
187+
self.dimension
188+
}
145189
}

crates/cortexadb-core/src/index/vector.rs

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,18 +212,33 @@ impl VectorIndex {
212212
}
213213
}
214214

215-
/// Create a new vector index with HNSW enabled
215+
/// Create a new vector index with HNSW enabled (fresh build)
216216
pub fn new_with_hnsw(vector_dimension: usize, config: HnswConfig) -> Result<Self> {
217-
let hnsw_backend =
218-
HnswBackend::new(vector_dimension, config).map_err(|_e| VectorError::NoEmbeddings)?;
217+
Self::new_with_loaded_hnsw(vector_dimension, config, None)
218+
}
219+
220+
/// Create a new vector index with optional pre-loaded HNSW backend
221+
pub fn new_with_loaded_hnsw(
222+
vector_dimension: usize,
223+
config: HnswConfig,
224+
loaded_hnsw: Option<HnswBackend>,
225+
) -> Result<Self> {
226+
let hnsw_backend = match loaded_hnsw {
227+
Some(backend) => Some(Arc::new(backend)),
228+
None => {
229+
let backend = HnswBackend::new(vector_dimension, config)
230+
.map_err(|_e| VectorError::NoEmbeddings)?;
231+
Some(Arc::new(backend))
232+
}
233+
};
219234
Ok(Self {
220235
partitions: HashMap::new(),
221236
id_to_namespace: HashMap::new(),
222237
vector_dimension,
223238
backend_mode: VectorBackendMode::Exact,
224239
backend: Arc::new(ExactBackend),
225240
ann_provider: Arc::new(PrefixAnnCandidateProvider),
226-
hnsw_backend: Some(Arc::new(hnsw_backend)),
241+
hnsw_backend,
227242
})
228243
}
229244

@@ -255,6 +270,31 @@ impl VectorIndex {
255270
self.hnsw_backend.is_some()
256271
}
257272

273+
/// Save HNSW index to disk (no-op if HNSW not enabled)
274+
pub fn save_hnsw(&self, path: &std::path::Path) -> std::io::Result<()> {
275+
if let Some(ref hnsw) = self.hnsw_backend {
276+
hnsw.save_to_file(path)
277+
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;
278+
}
279+
Ok(())
280+
}
281+
282+
/// Load HNSW index from disk (returns None if file doesn't exist)
283+
pub fn load_hnsw(
284+
path: &std::path::Path,
285+
dimension: usize,
286+
config: HnswConfig,
287+
) -> std::io::Result<Option<HnswBackend>> {
288+
if !path.exists() {
289+
return Ok(None);
290+
}
291+
292+
match HnswBackend::load_from_file(path, dimension, config) {
293+
Ok(backend) => Ok(Some(backend)),
294+
Err(e) => Err(std::io::Error::new(std::io::ErrorKind::Other, e.to_string())),
295+
}
296+
}
297+
258298
pub fn backend_mode(&self) -> VectorBackendMode {
259299
self.backend_mode
260300
}

crates/cortexadb-core/src/store.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ pub struct CortexaDBStore {
103103
sync_thread: Option<JoinHandle<()>>,
104104
checkpoint_policy: CheckpointPolicy,
105105
checkpoint_path: std::path::PathBuf,
106+
hnsw_path: std::path::PathBuf,
106107
checkpoint_control: Arc<(Mutex<CheckpointRuntime>, Condvar)>,
107108
checkpoint_thread: Option<JoinHandle<()>>,
108109
capacity_policy: CapacityPolicy,
@@ -243,14 +244,38 @@ impl CortexaDBStore {
243244
checkpoint_path: std::path::PathBuf,
244245
index_mode: crate::index::hnsw::IndexMode,
245246
) -> Result<Self> {
247+
let hnsw_path = checkpoint_path.with_extension("hnsw");
248+
246249
let hnsw_config = match index_mode {
247250
crate::index::hnsw::IndexMode::Exact => None,
248251
crate::index::hnsw::IndexMode::Hnsw(config) => Some(config),
249252
};
253+
254+
let loaded_hnsw = if let Some(config) = hnsw_config.as_ref() {
255+
match crate::index::VectorIndex::load_hnsw(&hnsw_path, vector_dimension, config.clone())
256+
{
257+
Ok(Some(backend)) => {
258+
eprintln!("Loaded HNSW index from disk (fast recovery)");
259+
Some(backend)
260+
}
261+
Ok(None) => {
262+
eprintln!("No HNSW index file found, building fresh index");
263+
None
264+
}
265+
Err(e) => {
266+
eprintln!("Failed to load HNSW index, rebuilding: {}", e);
267+
None
268+
}
269+
}
270+
} else {
271+
None
272+
};
273+
250274
let indexes = Self::build_vector_index(
251275
engine.get_state_machine(),
252276
vector_dimension,
253277
hnsw_config.as_ref(),
278+
loaded_hnsw,
254279
)?;
255280
Self::assert_vector_index_in_sync_inner(engine.get_state_machine(), &indexes)?;
256281

@@ -297,6 +322,7 @@ impl CortexaDBStore {
297322
sync_thread,
298323
checkpoint_policy,
299324
checkpoint_path,
325+
hnsw_path,
300326
checkpoint_control,
301327
checkpoint_thread,
302328
capacity_policy,
@@ -320,6 +346,10 @@ impl CortexaDBStore {
320346
let last_applied_id = writer.engine.last_applied_id().0;
321347
save_checkpoint(&self.checkpoint_path, snapshot.state_machine(), last_applied_id)?;
322348

349+
if let Err(e) = snapshot.indexes.vector_index().save_hnsw(&self.hnsw_path) {
350+
eprintln!("Warning: Failed to save HNSW index: {}", e);
351+
}
352+
323353
// Truncate WAL prefix — only keep entries written after the checkpoint.
324354
let wal_path = writer.engine.wal_path().to_path_buf();
325355
WriteAheadLog::truncate_prefix(&wal_path, CommandId(last_applied_id))?;
@@ -515,6 +545,7 @@ impl CortexaDBStore {
515545
writer.engine.get_state_machine(),
516546
writer.indexes.vector.dimension(),
517547
None,
548+
None,
518549
)?;
519550

520551
let indexed = writer.indexes.vector.len();
@@ -786,12 +817,25 @@ impl CortexaDBStore {
786817
state_machine: &StateMachine,
787818
vector_dimension: usize,
788819
hnsw_config: Option<&crate::index::hnsw::HnswConfig>,
820+
loaded_hnsw: Option<crate::index::hnsw::HnswBackend>,
789821
) -> Result<IndexLayer> {
822+
let has_loaded_hnsw = loaded_hnsw.is_some();
790823
let indexes = match hnsw_config {
791-
Some(config) => IndexLayer::new_with_hnsw(vector_dimension, config.clone()),
824+
Some(config) => {
825+
if let Some(loaded) = loaded_hnsw {
826+
IndexLayer::new_with_loaded_hnsw(vector_dimension, config.clone(), Some(loaded))
827+
} else {
828+
IndexLayer::new_with_hnsw(vector_dimension, config.clone())
829+
}
830+
}
792831
None => IndexLayer::new(vector_dimension),
793832
};
794833
let mut indexes = indexes;
834+
835+
if has_loaded_hnsw {
836+
return Ok(indexes);
837+
}
838+
795839
for entry in state_machine.all_memories() {
796840
if let Some(embedding) = entry.embedding.clone() {
797841
indexes.vector_index_mut().index_in_namespace(
@@ -855,6 +899,12 @@ impl Drop for CortexaDBStore {
855899
if self.checkpoint_policy != CheckpointPolicy::Disabled {
856900
let _ = self.checkpoint_now();
857901
}
902+
903+
// Always save HNSW index on drop if it exists (automatic persistence)
904+
let snapshot = self.snapshot.load_full();
905+
if let Err(e) = snapshot.indexes.vector_index().save_hnsw(&self.hnsw_path) {
906+
eprintln!("Warning: Failed to save HNSW on drop: {}", e);
907+
}
858908
}
859909
}
860910

crates/cortexadb-py/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cortexadb-py"
3-
version = "0.1.0"
3+
version = "0.1.3"
44
edition = "2024"
55

66
[lib]

crates/cortexadb-py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "cortexadb"
7-
version = "0.1.2"
7+
version = "0.1.3"
88
requires-python = ">=3.9"
99
description = "Fast, embedded vector + graph memory for AI agents"
1010
authors = [

0 commit comments

Comments
 (0)