From 5952641a8fc196ce883b3356726a21cf9f1b8b4a Mon Sep 17 00:00:00 2001 From: Zhenbo Li Date: Sat, 21 Sep 2024 14:50:05 -0400 Subject: [PATCH] remove obsidian header --- .../src/markdown_parser/mod.rs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fire_seq_search_server/src/markdown_parser/mod.rs b/fire_seq_search_server/src/markdown_parser/mod.rs index 62705ce..fc727f8 100644 --- a/fire_seq_search_server/src/markdown_parser/mod.rs +++ b/fire_seq_search_server/src/markdown_parser/mod.rs @@ -26,13 +26,39 @@ fn hack_specific_chars_cow(text: Cow) -> String { text.replace(bullet, " ") } +use crate::query_engine::NotebookSoftware; +use std::borrow::Borrow; +use log::info; + +fn remove_obsidian_header<'a>(content: Cow<'a, str>) -> Cow<'a, str> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"---[\s\S]*?---" + ).unwrap(); + } + info!("from {:?}", &content); + let cr = content.borrow(); + let ret: Cow = RE.replace(cr, " "); + info!("into {:?}", &ret); + ret.into_owned().into() +} + pub fn parse_logseq_notebook(md: Cow<'_,str>, title: &str, server_info: &ServerInformation) -> String { // Now we do some parsing for this file let content = exclude_advanced_query(md); let content = hack_specific_chars_cow(content); + + let content = Cow::from(content); + let content = match &server_info.software { + NotebookSoftware::Obsidian => remove_obsidian_header(content), + _ => content, + }; let content: String = markdown_to_text::convert_from_logseq( &content, title, server_info); + + //let content = content.into_owned(); content + }