Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite load notebook logic, support Obsidian's recursive structure #147

Merged
merged 13 commits into from
Sep 21, 2024
4 changes: 2 additions & 2 deletions fire_seq_search_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["llm"]
#default = ["llm"]
llm = []

[dependencies]
Expand Down Expand Up @@ -37,6 +37,7 @@ env_logger = "0.11.5"
clap = { version = "4.0", features = ["derive"] }
lazy_static = "1.4.0"
rayon = "1.5"
futures = "0.3"

urlencoding = "2.1.0"

Expand Down Expand Up @@ -65,5 +66,4 @@ pdf-extract-temporary-mitigation-panic = "0.7.1"
# llm related
sha256 = "1.5.0"
reqwest = { version = "0.12", features = ["json"] }
futures = "0.3"
serde_derive = "1.0.209"
6 changes: 3 additions & 3 deletions fire_seq_search_server/obsidian.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
set -e
cargo build
rm ./fire_seq_search_server -f
cp --force target/debug/fire_seq_search_server.exe ./fire_seq_search_server
cp --force target/debug/fire_seq_search_server ./fire_seq_search_server

RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path /c/Users/z2369li/Documents/graph-note-of-greek-myth/希腊神话 \
--obsidian-md
--notebook_path ~/Documents/obsidian-hub-main \
--obsidian-md
163 changes: 86 additions & 77 deletions fire_seq_search_server/src/load_notes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,86 @@ use crate::query_engine::ServerInformation;
use crate::JOURNAL_PREFIX;


use std::borrow::Cow;
use std::borrow::Borrow;

#[derive(Debug, Clone)]
pub struct NoteListItem {
pub realpath: String,
pub title: String,
}

pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
let path: &str = &server_info.notebook_path;
let note_list = list_directory( Cow::from(path) , true);

// TODO didn't handle logseq
note_list
}

fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
debug!("Listing directory {}", &path);
let mut result = Vec::new();

let path_ref: &str = path.borrow();
let notebooks = match std::fs::read_dir(path_ref) {
Ok(x) => x,
Err(e) => {
error!("Fatal error ({:?}) when reading {}", e, &path);
process::abort();
}
};

for note_result in notebooks {
let entry = match note_result {
Ok(x) => x,
Err(e) => {
error!("Error during looping {:?}", &e);
continue;
}
};
let file_type = match entry.file_type() {
Ok(x) => x,
Err(e) => {
error!("Error: Can't get file type {:?} {:?}", &entry, &e);
continue;
}
};

let entry_path = entry.path();
let entry_path_str = entry_path.to_string_lossy();

if file_type.is_dir() {
if recursive {
let next = list_directory(entry_path_str, true);
result.extend(next);
}
continue;
}

if !entry_path_str.ends_with(".md") {
info!("skip non-md file {:?}", &entry);
continue;
}

let note_title = match entry_path.file_stem() {
Some(osstr) => osstr.to_str().unwrap(),
None => {
error!("Couldn't get file_stem for {:?}", entry_path);
continue;
}
};
let row = NoteListItem {
realpath: entry_path_str.to_string(),
title: note_title.to_string(),
};
result.push(row);
}

return result;
}

/*
pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)> {
// I should remove the unwrap and convert it into map
let path: &str = &server_info.notebook_path;
Expand All @@ -26,14 +106,18 @@ pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)>
(title.to_string(), content)
}).collect(); //silly collect.

// TODO: Silly filter
if server_info.exclude_zotero_items {
error!("exclude zotero disabled");
}
/*
for (file_name, contents) in pages_tmp {
// info!("File Name: {}", &file_name);
if server_info.exclude_zotero_items && file_name.starts_with('@') {
continue;
}
pages.push((file_name,contents));
}
*/
if server_info.enable_journal_query {
info!("Loading journals");
let journals_page = path.clone() + "/journals";
Expand All @@ -56,84 +140,9 @@ pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)>

}

pub fn read_specific_directory(path: &str) -> Vec<(String, String)> {
info!("Try to read {}", &path);
let notebooks = match std::fs::read_dir(path) {
Ok(x) => x,
Err(e) => {
error!("Fatal error ({:?}) when reading {}", e, path);
process::abort();
}
};
let mut note_filenames: Vec<DirEntry> = Vec::new();
for note in notebooks {
let note : DirEntry = note.unwrap();
note_filenames.push(note);
}
// debug!("Note titles: {:?}", &note_filenames);
let result: Vec<(String,String)> = note_filenames.par_iter()
.map(|note| read_md_file_wo_parse(&note))
.filter(|x| (&x).is_some())
.map(|x| x.unwrap())
.collect();
info!("Loaded {} notes from {}", result.len(), path);
// info!("After map {:?}", &result);

result
}


*/


///
///
/// # Arguments
///
/// * `note`:
///
/// returns: Option<(String, String)>
///
/// First: title (filename)
/// Second: full raw text
///
/// I would delay the parsing job, so it could be couples with server info. -Zhenbo Li 2023-02-17
/// If input is a directory or DS_STORE, return None
///
pub fn read_md_file_wo_parse(note: &std::fs::DirEntry) -> Option<(String, String)> {
if let Ok(file_type) = note.file_type() {
// Now let's show our entry's file type!
debug!("{:?}: {:?}", note.path(), file_type);
if file_type.is_dir() {
debug!("{:?} is a directory, skipping", note.path());
return None;
}
} else {
warn!("Couldn't get file type for {:?}", note.path());
return None;
}

let note_path = note.path();
let note_title = match note_path.file_stem() {
Some(osstr) => osstr.to_str().unwrap(),
None => {
error!("Couldn't get file_stem for {:?}", note.path());
return None;
}
};
debug!("note title: {}", &note_title);

let content : String = match std::fs::read_to_string(&note_path) {
Ok(c) => c,
Err(e) => {
if note_title.to_lowercase() == ".ds_store" {
debug!("Ignore .DS_Store for mac");
} else {
error!("Error({:?}) when reading the file {:?}", e, note_path);
}
return None;
}
};

Some((note_title.to_string(),content))
}

6 changes: 2 additions & 4 deletions fire_seq_search_server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ async fn main() {
let matches = Cli::parse();
let server_info: ServerInformation = build_server_info(matches);

let mut engine = QueryEngine::construct(server_info);
let mut engine = QueryEngine::construct(server_info).await;

info!("query engine build finished");
if cfg!(feature="llm") {
Expand All @@ -77,15 +77,13 @@ async fn main() {
let llm_poll = llm_arc.clone();
engine.llm = Some(llm_arc);

let poll_handle = tokio::spawn( async move {
let _poll_handle = tokio::spawn( async move {
loop {
llm_poll.call_llm_engine().await;
let wait_llm = tokio::time::Duration::from_millis(500);
tokio::time::sleep(wait_llm).await;
}
});
// poll_handle.await;

}

let engine_arc = std::sync::Arc::new(engine);
Expand Down
11 changes: 5 additions & 6 deletions fire_seq_search_server/src/markdown_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@ use crate::query_engine::ServerInformation;

// https://docs.rs/regex/latest/regex/#repetitions
// https://stackoverflow.com/a/8303552/1166518
pub fn exclude_advanced_query(md: &str) -> Cow<str> {
pub fn exclude_advanced_query(md: Cow<'_,str>) -> Cow<'_, str> {
if !md.contains('#') {
return Cow::Borrowed(md);
return md;
}

lazy_static! {
static ref RE: Regex = Regex::new(
r"\#\+BEGIN_QUERY[\S\s]+?\#\+END_QUERY")
.unwrap();
}
// return RE.replace_all(&md, " ")
return RE.replace_all(&md, " ");
return RE.replace_all(&md, " ").into_owned().into();
}

fn hack_specific_chars_cow(text: Cow<str>) -> String {
Expand All @@ -27,7 +26,7 @@ fn hack_specific_chars_cow(text: Cow<str>) -> String {
text.replace(bullet, " ")
}

pub fn parse_logseq_notebook(md: &str, title: &str, server_info: &ServerInformation) -> String {
pub fn parse_logseq_notebook(md: Cow<'_,str>, title: &str, server_info: &ServerInformation) -> String {
// Now we do some parsing for this file
let content = exclude_advanced_query(md);
let content = hack_specific_chars_cow(content);
Expand All @@ -50,4 +49,4 @@ fn hack_specific_chars(text: String) -> String {
let bullet = char::from_u32(0x00002022).unwrap();
// println!("{}", bullet);
text.replace(bullet, " ")
}
}
Loading
Loading