Skip to content

Commit

Permalink
Rewrite load notebook logic, support Obsidian's recursive structure (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Endle authored Sep 21, 2024
1 parent c86d118 commit 7692bd9
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 105 deletions.
4 changes: 2 additions & 2 deletions fire_seq_search_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["llm"]
#default = ["llm"]
llm = []

[dependencies]
Expand Down Expand Up @@ -37,6 +37,7 @@ env_logger = "0.11.5"
clap = { version = "4.0", features = ["derive"] }
lazy_static = "1.4.0"
rayon = "1.5"
futures = "0.3"

urlencoding = "2.1.0"

Expand Down Expand Up @@ -66,5 +67,4 @@ pdf-extract-temporary-mitigation-panic = "0.7.1"
# TODO I should make them optional
sha256 = "1.5.0"
reqwest = { version = "0.12", features = ["json"] }
futures = "0.3"
serde_derive = "1.0.209"
6 changes: 3 additions & 3 deletions fire_seq_search_server/obsidian.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
set -e
cargo build
rm ./fire_seq_search_server -f
cp --force target/debug/fire_seq_search_server.exe ./fire_seq_search_server
cp --force target/debug/fire_seq_search_server ./fire_seq_search_server

RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path /c/Users/z2369li/Documents/graph-note-of-greek-myth/希腊神话 \
--obsidian-md
--notebook_path ~/Documents/obsidian-hub-main \
--obsidian-md
163 changes: 86 additions & 77 deletions fire_seq_search_server/src/load_notes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,86 @@ use crate::query_engine::ServerInformation;
use crate::JOURNAL_PREFIX;


use std::borrow::Cow;
use std::borrow::Borrow;

#[derive(Debug, Clone)]
pub struct NoteListItem {
pub realpath: String,
pub title: String,
}

pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
let path: &str = &server_info.notebook_path;
let note_list = list_directory( Cow::from(path) , true);

// TODO didn't handle logseq
note_list
}

fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
debug!("Listing directory {}", &path);
let mut result = Vec::new();

let path_ref: &str = path.borrow();
let notebooks = match std::fs::read_dir(path_ref) {
Ok(x) => x,
Err(e) => {
error!("Fatal error ({:?}) when reading {}", e, &path);
process::abort();
}
};

for note_result in notebooks {
let entry = match note_result {
Ok(x) => x,
Err(e) => {
error!("Error during looping {:?}", &e);
continue;
}
};
let file_type = match entry.file_type() {
Ok(x) => x,
Err(e) => {
error!("Error: Can't get file type {:?} {:?}", &entry, &e);
continue;
}
};

let entry_path = entry.path();
let entry_path_str = entry_path.to_string_lossy();

if file_type.is_dir() {
if recursive {
let next = list_directory(entry_path_str, true);
result.extend(next);
}
continue;
}

if !entry_path_str.ends_with(".md") {
info!("skip non-md file {:?}", &entry);
continue;
}

let note_title = match entry_path.file_stem() {
Some(osstr) => osstr.to_str().unwrap(),
None => {
error!("Couldn't get file_stem for {:?}", entry_path);
continue;
}
};
let row = NoteListItem {
realpath: entry_path_str.to_string(),
title: note_title.to_string(),
};
result.push(row);
}

return result;
}

/*
pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)> {
// I should remove the unwrap and convert it into map
let path: &str = &server_info.notebook_path;
Expand All @@ -26,14 +106,18 @@ pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)>
(title.to_string(), content)
}).collect(); //silly collect.
// TODO: Silly filter
if server_info.exclude_zotero_items {
error!("exclude zotero disabled");
}
/*
for (file_name, contents) in pages_tmp {
// info!("File Name: {}", &file_name);
if server_info.exclude_zotero_items && file_name.starts_with('@') {
continue;
}
pages.push((file_name,contents));
}
*/
if server_info.enable_journal_query {
info!("Loading journals");
let journals_page = path.clone() + "/journals";
Expand All @@ -56,84 +140,9 @@ pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)>
}
pub fn read_specific_directory(path: &str) -> Vec<(String, String)> {
info!("Try to read {}", &path);
let notebooks = match std::fs::read_dir(path) {
Ok(x) => x,
Err(e) => {
error!("Fatal error ({:?}) when reading {}", e, path);
process::abort();
}
};
let mut note_filenames: Vec<DirEntry> = Vec::new();
for note in notebooks {
let note : DirEntry = note.unwrap();
note_filenames.push(note);
}
// debug!("Note titles: {:?}", &note_filenames);
let result: Vec<(String,String)> = note_filenames.par_iter()
.map(|note| read_md_file_wo_parse(&note))
.filter(|x| (&x).is_some())
.map(|x| x.unwrap())
.collect();
info!("Loaded {} notes from {}", result.len(), path);
// info!("After map {:?}", &result);

result
}

*/


///
///
/// # Arguments
///
/// * `note`:
///
/// returns: Option<(String, String)>
///
/// First: title (filename)
/// Second: full raw text
///
/// I would delay the parsing job, so it could be couples with server info. -Zhenbo Li 2023-02-17
/// If input is a directory or DS_STORE, return None
///
pub fn read_md_file_wo_parse(note: &std::fs::DirEntry) -> Option<(String, String)> {
if let Ok(file_type) = note.file_type() {
// Now let's show our entry's file type!
debug!("{:?}: {:?}", note.path(), file_type);
if file_type.is_dir() {
debug!("{:?} is a directory, skipping", note.path());
return None;
}
} else {
warn!("Couldn't get file type for {:?}", note.path());
return None;
}

let note_path = note.path();
let note_title = match note_path.file_stem() {
Some(osstr) => osstr.to_str().unwrap(),
None => {
error!("Couldn't get file_stem for {:?}", note.path());
return None;
}
};
debug!("note title: {}", &note_title);

let content : String = match std::fs::read_to_string(&note_path) {
Ok(c) => c,
Err(e) => {
if note_title.to_lowercase() == ".ds_store" {
debug!("Ignore .DS_Store for mac");
} else {
error!("Error({:?}) when reading the file {:?}", e, note_path);
}
return None;
}
};

Some((note_title.to_string(),content))
}

6 changes: 2 additions & 4 deletions fire_seq_search_server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ async fn main() {
let matches = Cli::parse();
let server_info: ServerInformation = build_server_info(matches);

let mut engine = QueryEngine::construct(server_info);
let mut engine = QueryEngine::construct(server_info).await;

info!("query engine build finished");
if cfg!(feature="llm") {
Expand All @@ -77,15 +77,13 @@ async fn main() {
let llm_poll = llm_arc.clone();
engine.llm = Some(llm_arc);

let poll_handle = tokio::spawn( async move {
let _poll_handle = tokio::spawn( async move {
loop {
llm_poll.call_llm_engine().await;
let wait_llm = tokio::time::Duration::from_millis(500);
tokio::time::sleep(wait_llm).await;
}
});
// poll_handle.await;

}

let engine_arc = std::sync::Arc::new(engine);
Expand Down
11 changes: 5 additions & 6 deletions fire_seq_search_server/src/markdown_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@ use crate::query_engine::ServerInformation;

// https://docs.rs/regex/latest/regex/#repetitions
// https://stackoverflow.com/a/8303552/1166518
pub fn exclude_advanced_query(md: &str) -> Cow<str> {
pub fn exclude_advanced_query(md: Cow<'_,str>) -> Cow<'_, str> {
if !md.contains('#') {
return Cow::Borrowed(md);
return md;
}

lazy_static! {
static ref RE: Regex = Regex::new(
r"\#\+BEGIN_QUERY[\S\s]+?\#\+END_QUERY")
.unwrap();
}
// return RE.replace_all(&md, " ")
return RE.replace_all(&md, " ");
return RE.replace_all(&md, " ").into_owned().into();
}

fn hack_specific_chars_cow(text: Cow<str>) -> String {
Expand All @@ -27,7 +26,7 @@ fn hack_specific_chars_cow(text: Cow<str>) -> String {
text.replace(bullet, " ")
}

pub fn parse_logseq_notebook(md: &str, title: &str, server_info: &ServerInformation) -> String {
pub fn parse_logseq_notebook(md: Cow<'_,str>, title: &str, server_info: &ServerInformation) -> String {
// Now we do some parsing for this file
let content = exclude_advanced_query(md);
let content = hack_specific_chars_cow(content);
Expand All @@ -50,4 +49,4 @@ fn hack_specific_chars(text: String) -> String {
let bullet = char::from_u32(0x00002022).unwrap();
// println!("{}", bullet);
text.replace(bullet, " ")
}
}
Loading

0 comments on commit 7692bd9

Please sign in to comment.