@@ -24,6 +24,51 @@ impl Default for PostProcess {
2424 }
2525}
2626
27+ #[ derive( Debug , Clone , Deserialize ) ]
28+ pub enum ValidSplitter {
29+ #[ serde( rename = "tree_sitter" ) ]
30+ TreeSitter ( TreeSitter ) ,
31+ #[ serde( rename = "text_sitter" ) ]
32+ TextSplitter ( TextSplitter ) ,
33+ }
34+
35+ impl Default for ValidSplitter {
36+ fn default ( ) -> Self {
37+ ValidSplitter :: TreeSitter ( TreeSitter :: default ( ) )
38+ }
39+ }
40+
41+ const fn chunk_size_default ( ) -> usize {
42+ 1500
43+ }
44+
45+ const fn chunk_overlap_default ( ) -> usize {
46+ 0
47+ }
48+
49+ #[ derive( Debug , Clone , Deserialize ) ]
50+ pub struct TreeSitter {
51+ #[ serde( default = "chunk_size_default" ) ]
52+ pub chunk_size : usize ,
53+ #[ serde( default = "chunk_overlap_default" ) ]
54+ pub chunk_overlap : usize ,
55+ }
56+
57+ impl Default for TreeSitter {
58+ fn default ( ) -> Self {
59+ Self {
60+ chunk_size : 1500 ,
61+ chunk_overlap : 0 ,
62+ }
63+ }
64+ }
65+
66+ #[ derive( Debug , Clone , Deserialize ) ]
67+ pub struct TextSplitter {
68+ #[ serde( default = "chunk_size_default" ) ]
69+ pub chunk_size : usize ,
70+ }
71+
2772#[ derive( Debug , Clone , Deserialize ) ]
2873pub enum ValidMemoryBackend {
2974 #[ serde( rename = "file_store" ) ]
@@ -67,15 +112,6 @@ impl ChatMessage {
67112 }
68113}
69114
70- #[ derive( Debug , Clone , Deserialize ) ]
71- #[ serde( deny_unknown_fields) ]
72- pub struct Chat {
73- pub completion : Option < Vec < ChatMessage > > ,
74- pub generation : Option < Vec < ChatMessage > > ,
75- pub chat_template : Option < String > ,
76- pub chat_format : Option < String > ,
77- }
78-
79115#[ derive( Clone , Debug , Deserialize ) ]
80116#[ allow( clippy:: upper_case_acronyms) ]
81117#[ serde( deny_unknown_fields) ]
@@ -85,27 +121,52 @@ pub struct FIM {
85121 pub end : String ,
86122}
87123
124+ const fn max_crawl_memory_default ( ) -> u64 {
125+ 100_000_000
126+ }
127+
128+ const fn max_crawl_file_size_default ( ) -> u64 {
129+ 10_000_000
130+ }
131+
132+ #[ derive( Clone , Debug , Deserialize ) ]
133+ #[ serde( deny_unknown_fields) ]
134+ pub struct Crawl {
135+ #[ serde( default = "max_crawl_file_size_default" ) ]
136+ pub max_file_size : u64 ,
137+ #[ serde( default = "max_crawl_memory_default" ) ]
138+ pub max_crawl_memory : u64 ,
139+ #[ serde( default ) ]
140+ pub all_files : bool ,
141+ }
142+
143+ #[ derive( Clone , Debug , Deserialize ) ]
144+ pub struct PostgresMLEmbeddingModel {
145+ pub model : String ,
146+ pub embed_parameters : Option < Value > ,
147+ pub query_parameters : Option < Value > ,
148+ }
149+
88150#[ derive( Clone , Debug , Deserialize ) ]
89151#[ serde( deny_unknown_fields) ]
90152pub struct PostgresML {
91153 pub database_url : Option < String > ,
154+ pub crawl : Option < Crawl > ,
92155 #[ serde( default ) ]
93- pub crawl : bool ,
156+ pub splitter : ValidSplitter ,
157+ pub embedding_model : Option < PostgresMLEmbeddingModel > ,
94158}
95159
96160#[ derive( Clone , Debug , Deserialize , Default ) ]
97161#[ serde( deny_unknown_fields) ]
98162pub struct FileStore {
99- #[ serde( default ) ]
100- pub crawl : bool ,
101- }
102-
103- const fn n_gpu_layers_default ( ) -> u32 {
104- 1000
163+ pub crawl : Option < Crawl > ,
105164}
106165
107- const fn n_ctx_default ( ) -> u32 {
108- 1000
166+ impl FileStore {
167+ pub fn new_without_crawl ( ) -> Self {
168+ Self { crawl : None }
169+ }
109170}
110171
111172#[ derive( Clone , Debug , Deserialize ) ]
@@ -137,6 +198,17 @@ pub struct MistralFIM {
137198 pub max_requests_per_second : f32 ,
138199}
139200
201+ #[ cfg( feature = "llama_cpp" ) ]
202+ const fn n_gpu_layers_default ( ) -> u32 {
203+ 1000
204+ }
205+
206+ #[ cfg( feature = "llama_cpp" ) ]
207+ const fn n_ctx_default ( ) -> u32 {
208+ 1000
209+ }
210+
211+ #[ cfg( feature = "llama_cpp" ) ]
140212#[ derive( Clone , Debug , Deserialize ) ]
141213#[ serde( deny_unknown_fields) ]
142214pub struct LLaMACPP {
@@ -230,15 +302,14 @@ pub struct ValidConfig {
230302
231303#[ derive( Clone , Debug , Deserialize , Default ) ]
232304pub struct ValidClientParams {
233- #[ serde( alias = "rootURI" ) ]
234- _root_uri : Option < String > ,
235- _workspace_folders : Option < Vec < String > > ,
305+ #[ serde( alias = "rootUri" ) ]
306+ pub root_uri : Option < String > ,
236307}
237308
238309#[ derive( Clone , Debug ) ]
239310pub struct Config {
240311 pub config : ValidConfig ,
241- _client_params : ValidClientParams ,
312+ pub client_params : ValidClientParams ,
242313}
243314
244315impl Config {
@@ -255,7 +326,7 @@ impl Config {
255326 let client_params: ValidClientParams = serde_json:: from_value ( args) ?;
256327 Ok ( Self {
257328 config : valid_args,
258- _client_params : client_params,
329+ client_params,
259330 } )
260331 }
261332
@@ -300,20 +371,17 @@ impl Config {
300371 }
301372}
302373
303- // This makes testing much easier.
374+ // For teesting use only
304375#[ cfg( test) ]
305376impl Config {
306377 pub fn default_with_file_store_without_models ( ) -> Self {
307378 Self {
308379 config : ValidConfig {
309- memory : ValidMemoryBackend :: FileStore ( FileStore { crawl : false } ) ,
380+ memory : ValidMemoryBackend :: FileStore ( FileStore { crawl : None } ) ,
310381 models : HashMap :: new ( ) ,
311382 completion : None ,
312383 } ,
313- _client_params : ValidClientParams {
314- _root_uri : None ,
315- _workspace_folders : None ,
316- } ,
384+ client_params : ValidClientParams { root_uri : None } ,
317385 }
318386 }
319387}
0 commit comments