Skip to content

Commit

Permalink
parse documents in the new parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Endle committed Sep 14, 2024
1 parent 0d2231c commit fa4972a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 85 deletions.
81 changes: 3 additions & 78 deletions fire_seq_search_server/src/load_notes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
}

fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
info!("Listing directory {}", &path);
debug!("Listing directory {}", &path);
let mut result = Vec::new();

let path_ref: &str = path.borrow();
Expand Down Expand Up @@ -58,7 +58,6 @@ fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {

if file_type.is_dir() {
if recursive {
info!("Recursive loop {:?}", &entry);
let next = list_directory(entry_path_str, true);
result.extend(next);
}
Expand Down Expand Up @@ -87,6 +86,7 @@ fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
return result;
}

/*
pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)> {
// I should remove the unwrap and convert it into map
let path: &str = &server_info.notebook_path;
Expand Down Expand Up @@ -140,84 +140,9 @@ pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)>
}
pub fn read_specific_directory(path: &str) -> Vec<(String, String)> {
info!("Try to read {}", &path);
let notebooks = match std::fs::read_dir(path) {
Ok(x) => x,
Err(e) => {
error!("Fatal error ({:?}) when reading {}", e, path);
process::abort();
}
};
let mut note_filenames: Vec<DirEntry> = Vec::new();
for note in notebooks {
let note : DirEntry = note.unwrap();
note_filenames.push(note);
}
// debug!("Note titles: {:?}", &note_filenames);
let result: Vec<(String,String)> = note_filenames.par_iter()
.map(|note| read_md_file_wo_parse(&note))
.filter(|x| (&x).is_some())
.map(|x| x.unwrap())
.collect();
info!("Loaded {} notes from {}", result.len(), path);
// info!("After map {:?}", &result);

result
}



///
///
/// # Arguments
///
/// * `note`:
///
/// returns: Option<(String, String)>
///
/// First: title (filename)
/// Second: full raw text
///
/// I would delay the parsing job, so it could be couples with server info. -Zhenbo Li 2023-02-17
/// If input is a directory or DS_STORE, return None
///
pub fn read_md_file_wo_parse(note: &std::fs::DirEntry) -> Option<(String, String)> {
if let Ok(file_type) = note.file_type() {
// Now let's show our entry's file type!
debug!("{:?}: {:?}", note.path(), file_type);
if file_type.is_dir() {
debug!("{:?} is a directory, skipping", note.path());
return None;
}
} else {
warn!("Couldn't get file type for {:?}", note.path());
return None;
}
*/

let note_path = note.path();
let note_title = match note_path.file_stem() {
Some(osstr) => osstr.to_str().unwrap(),
None => {
error!("Couldn't get file_stem for {:?}", note.path());
return None;
}
};
debug!("note title: {}", &note_title);

let content : String = match std::fs::read_to_string(&note_path) {
Ok(c) => c,
Err(e) => {
if note_title.to_lowercase() == ".ds_store" {
debug!("Ignore .DS_Store for mac");
} else {
error!("Error({:?}) when reading the file {:?}", e, note_path);
}
return None;
}
};

Some((note_title.to_string(),content))
}

11 changes: 5 additions & 6 deletions fire_seq_search_server/src/markdown_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@ use crate::query_engine::ServerInformation;

// https://docs.rs/regex/latest/regex/#repetitions
// https://stackoverflow.com/a/8303552/1166518
pub fn exclude_advanced_query(md: &str) -> Cow<str> {
pub fn exclude_advanced_query(md: Cow<'_,str>) -> Cow<'_, str> {
if !md.contains('#') {
return Cow::Borrowed(md);
return md;
}

lazy_static! {
static ref RE: Regex = Regex::new(
r"\#\+BEGIN_QUERY[\S\s]+?\#\+END_QUERY")
.unwrap();
}
// return RE.replace_all(&md, " ")
return RE.replace_all(&md, " ");
return RE.replace_all(&md, " ").into_owned().into();
}

fn hack_specific_chars_cow(text: Cow<str>) -> String {
Expand All @@ -27,7 +26,7 @@ fn hack_specific_chars_cow(text: Cow<str>) -> String {
text.replace(bullet, " ")
}

pub fn parse_logseq_notebook(md: &str, title: &str, server_info: &ServerInformation) -> String {
pub fn parse_logseq_notebook(md: Cow<'_,str>, title: &str, server_info: &ServerInformation) -> String {
// Now we do some parsing for this file
let content = exclude_advanced_query(md);
let content = hack_specific_chars_cow(content);
Expand All @@ -50,4 +49,4 @@ fn hack_specific_chars(text: String) -> String {
let bullet = char::from_u32(0x00002022).unwrap();
// println!("{}", bullet);
text.replace(bullet, " ")
}
}
5 changes: 4 additions & 1 deletion fire_seq_search_server/src/query_engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::sync::Arc;



use std::borrow::Cow;

// This struct should be immutable when the program starts running
#[derive(Debug, Clone, serde::Serialize)]
Expand Down Expand Up @@ -97,7 +98,9 @@ impl QueryEngine {
return;
}
};
let content = raw_content; // TODO parse file after read

let content = crate::markdown_parser::parse_logseq_notebook(
Cow::from(raw_content), &note.title, server_info);

let schema = &document_setting.schema;
let title = schema.get_field("title").unwrap();
Expand Down

0 comments on commit fa4972a

Please sign in to comment.