Skip to content

Commit 17ea67a

Browse files
authored
Merge pull request #31 from SilasMarvin/silas-rag-force
Introduce RAG and PostgresML support
2 parents 2c53880 + 9c17fc3 commit 17ea67a

File tree

39 files changed

+3401
-1120
lines changed

39 files changed

+3401
-1120
lines changed

Cargo.lock

Lines changed: 327 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,12 @@
1-
[package]
2-
name = "lsp-ai"
3-
version = "0.3.0"
1+
[workspace]
2+
members = [
3+
"crates/*",
4+
]
5+
resolver = "2"
6+
7+
[workspace.package]
48
edition = "2021"
59
license = "MIT"
610
description = "LSP-AI is an open-source language server that serves as a backend for AI-powered functionality, designed to assist and empower software engineers, not replace them."
711
repository = "https://github.com/SilasMarvin/lsp-ai"
812
readme = "README.md"
9-
10-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
11-
12-
[dependencies]
13-
anyhow = "1.0.75"
14-
lsp-server = "0.7.6"
15-
lsp-types = "0.95.0"
16-
ropey = "1.6.1"
17-
serde = "1.0.190"
18-
serde_json = "1.0.108"
19-
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
20-
rand = "0.8.5"
21-
tokenizers = "0.14.1"
22-
parking_lot = "0.12.1"
23-
once_cell = "1.19.0"
24-
directories = "5.0.1"
25-
llama-cpp-2 = { version = "0.1.55", optional = true }
26-
minijinja = { version = "1.0.12", features = ["loader"] }
27-
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
28-
tracing = "0.1.40"
29-
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
30-
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
31-
ignore = "0.4.22"
32-
pgml = "1.0.4"
33-
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
34-
indexmap = "2.2.5"
35-
async-trait = "0.1.78"
36-
37-
[features]
38-
default = []
39-
llama_cpp = ["dep:llama-cpp-2"]
40-
metal = ["llama-cpp-2/metal"]
41-
cuda = ["llama-cpp-2/cuda"]
42-
43-
[dev-dependencies]
44-
assert_cmd = "2.0.14"

crates/lsp-ai/Cargo.toml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
[package]
2+
name = "lsp-ai"
3+
version = "0.3.0"
4+
5+
description.workspace = true
6+
repository.workspace = true
7+
readme.workspace = true
8+
edition.workspace = true
9+
license.workspace = true
10+
11+
[dependencies]
12+
anyhow = "1.0.75"
13+
lsp-server = "0.7.6"
14+
lsp-types = "0.95.0"
15+
ropey = "1.6.1"
16+
serde = "1.0.190"
17+
serde_json = "1.0.108"
18+
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
19+
rand = "0.8.5"
20+
tokenizers = "0.14.1"
21+
parking_lot = "0.12.1"
22+
once_cell = "1.19.0"
23+
directories = "5.0.1"
24+
llama-cpp-2 = { version = "0.1.55", optional = true }
25+
minijinja = { version = "1.0.12", features = ["loader"] }
26+
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
27+
tracing = "0.1.40"
28+
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
29+
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
30+
ignore = "0.4.22"
31+
pgml = "1.0.4"
32+
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
33+
indexmap = "2.2.5"
34+
async-trait = "0.1.78"
35+
tree-sitter = "0.22"
36+
utils-tree-sitter = { path = "../utils-tree-sitter", features = ["all"], version = "0.1.0" }
37+
splitter-tree-sitter = { path = "../splitter-tree-sitter", version = "0.1.0" }
38+
text-splitter = { version = "0.13.3" }
39+
md5 = "0.7.0"
40+
41+
[build-dependencies]
42+
cc="*"
43+
44+
[features]
45+
default = []
46+
llama_cpp = ["dep:llama-cpp-2"]
47+
metal = ["llama-cpp-2/metal"]
48+
cuda = ["llama-cpp-2/cuda"]
49+
50+
[dev-dependencies]
51+
assert_cmd = "2.0.14"

src/config.rs renamed to crates/lsp-ai/src/config.rs

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,51 @@ impl Default for PostProcess {
2424
}
2525
}
2626

27+
#[derive(Debug, Clone, Deserialize)]
28+
pub enum ValidSplitter {
29+
#[serde(rename = "tree_sitter")]
30+
TreeSitter(TreeSitter),
31+
#[serde(rename = "text_sitter")]
32+
TextSplitter(TextSplitter),
33+
}
34+
35+
impl Default for ValidSplitter {
36+
fn default() -> Self {
37+
ValidSplitter::TreeSitter(TreeSitter::default())
38+
}
39+
}
40+
41+
const fn chunk_size_default() -> usize {
42+
1500
43+
}
44+
45+
const fn chunk_overlap_default() -> usize {
46+
0
47+
}
48+
49+
#[derive(Debug, Clone, Deserialize)]
50+
pub struct TreeSitter {
51+
#[serde(default = "chunk_size_default")]
52+
pub chunk_size: usize,
53+
#[serde(default = "chunk_overlap_default")]
54+
pub chunk_overlap: usize,
55+
}
56+
57+
impl Default for TreeSitter {
58+
fn default() -> Self {
59+
Self {
60+
chunk_size: 1500,
61+
chunk_overlap: 0,
62+
}
63+
}
64+
}
65+
66+
#[derive(Debug, Clone, Deserialize)]
67+
pub struct TextSplitter {
68+
#[serde(default = "chunk_size_default")]
69+
pub chunk_size: usize,
70+
}
71+
2772
#[derive(Debug, Clone, Deserialize)]
2873
pub enum ValidMemoryBackend {
2974
#[serde(rename = "file_store")]
@@ -67,15 +112,6 @@ impl ChatMessage {
67112
}
68113
}
69114

70-
#[derive(Debug, Clone, Deserialize)]
71-
#[serde(deny_unknown_fields)]
72-
pub struct Chat {
73-
pub completion: Option<Vec<ChatMessage>>,
74-
pub generation: Option<Vec<ChatMessage>>,
75-
pub chat_template: Option<String>,
76-
pub chat_format: Option<String>,
77-
}
78-
79115
#[derive(Clone, Debug, Deserialize)]
80116
#[allow(clippy::upper_case_acronyms)]
81117
#[serde(deny_unknown_fields)]
@@ -85,27 +121,52 @@ pub struct FIM {
85121
pub end: String,
86122
}
87123

124+
const fn max_crawl_memory_default() -> u64 {
125+
100_000_000
126+
}
127+
128+
const fn max_crawl_file_size_default() -> u64 {
129+
10_000_000
130+
}
131+
132+
#[derive(Clone, Debug, Deserialize)]
133+
#[serde(deny_unknown_fields)]
134+
pub struct Crawl {
135+
#[serde(default = "max_crawl_file_size_default")]
136+
pub max_file_size: u64,
137+
#[serde(default = "max_crawl_memory_default")]
138+
pub max_crawl_memory: u64,
139+
#[serde(default)]
140+
pub all_files: bool,
141+
}
142+
143+
#[derive(Clone, Debug, Deserialize)]
144+
pub struct PostgresMLEmbeddingModel {
145+
pub model: String,
146+
pub embed_parameters: Option<Value>,
147+
pub query_parameters: Option<Value>,
148+
}
149+
88150
#[derive(Clone, Debug, Deserialize)]
89151
#[serde(deny_unknown_fields)]
90152
pub struct PostgresML {
91153
pub database_url: Option<String>,
154+
pub crawl: Option<Crawl>,
92155
#[serde(default)]
93-
pub crawl: bool,
156+
pub splitter: ValidSplitter,
157+
pub embedding_model: Option<PostgresMLEmbeddingModel>,
94158
}
95159

96160
#[derive(Clone, Debug, Deserialize, Default)]
97161
#[serde(deny_unknown_fields)]
98162
pub struct FileStore {
99-
#[serde(default)]
100-
pub crawl: bool,
101-
}
102-
103-
const fn n_gpu_layers_default() -> u32 {
104-
1000
163+
pub crawl: Option<Crawl>,
105164
}
106165

107-
const fn n_ctx_default() -> u32 {
108-
1000
166+
impl FileStore {
167+
pub fn new_without_crawl() -> Self {
168+
Self { crawl: None }
169+
}
109170
}
110171

111172
#[derive(Clone, Debug, Deserialize)]
@@ -137,6 +198,17 @@ pub struct MistralFIM {
137198
pub max_requests_per_second: f32,
138199
}
139200

201+
#[cfg(feature = "llama_cpp")]
202+
const fn n_gpu_layers_default() -> u32 {
203+
1000
204+
}
205+
206+
#[cfg(feature = "llama_cpp")]
207+
const fn n_ctx_default() -> u32 {
208+
1000
209+
}
210+
211+
#[cfg(feature = "llama_cpp")]
140212
#[derive(Clone, Debug, Deserialize)]
141213
#[serde(deny_unknown_fields)]
142214
pub struct LLaMACPP {
@@ -230,15 +302,14 @@ pub struct ValidConfig {
230302

231303
#[derive(Clone, Debug, Deserialize, Default)]
232304
pub struct ValidClientParams {
233-
#[serde(alias = "rootURI")]
234-
_root_uri: Option<String>,
235-
_workspace_folders: Option<Vec<String>>,
305+
#[serde(alias = "rootUri")]
306+
pub root_uri: Option<String>,
236307
}
237308

238309
#[derive(Clone, Debug)]
239310
pub struct Config {
240311
pub config: ValidConfig,
241-
_client_params: ValidClientParams,
312+
pub client_params: ValidClientParams,
242313
}
243314

244315
impl Config {
@@ -255,7 +326,7 @@ impl Config {
255326
let client_params: ValidClientParams = serde_json::from_value(args)?;
256327
Ok(Self {
257328
config: valid_args,
258-
_client_params: client_params,
329+
client_params,
259330
})
260331
}
261332

@@ -300,20 +371,17 @@ impl Config {
300371
}
301372
}
302373

303-
// This makes testing much easier.
374+
// For teesting use only
304375
#[cfg(test)]
305376
impl Config {
306377
pub fn default_with_file_store_without_models() -> Self {
307378
Self {
308379
config: ValidConfig {
309-
memory: ValidMemoryBackend::FileStore(FileStore { crawl: false }),
380+
memory: ValidMemoryBackend::FileStore(FileStore { crawl: None }),
310381
models: HashMap::new(),
311382
completion: None,
312383
},
313-
_client_params: ValidClientParams {
314-
_root_uri: None,
315-
_workspace_folders: None,
316-
},
384+
client_params: ValidClientParams { root_uri: None },
317385
}
318386
}
319387
}

0 commit comments

Comments
 (0)