Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

edit deny #148

Merged
merged 14 commits into from
Sep 22, 2024
4 changes: 4 additions & 0 deletions docs/release_notes_0.2_2024Sep.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


https://github.com/user-attachments/assets/b0a4ca66-0a33-401a-a916-af7a69f2ae7b

21 changes: 13 additions & 8 deletions fire_seq_search_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ license = "MIT"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
#default = ["llm"]
llm = []

[dependencies]

Expand Down Expand Up @@ -63,8 +60,16 @@ pulldown-cmark = { version = "0.9.2", default-features = false }
pdf-extract-temporary-mitigation-panic = "0.7.1"


# llm related
# TODO I should make them optional
sha256 = "1.5.0"
reqwest = { version = "0.12", features = ["json"] }
serde_derive = "1.0.209"

# TODO Currently turn them off will make cargo build fail
# I should make these deps optional, so those who doesn't want LLM could have a smaller binary
sha256 = { version = "1.5.0", optional = true }
reqwest = { version = "0.12", features = ["json"], optional = false }
serde_derive = { version = "1.0.209", optional = false}

[features]
#default = ["llm"]
llm = ["sha256",
#"serde_derive",
#"request"
]
2 changes: 1 addition & 1 deletion fire_seq_search_server/debug_server_mac.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
set -e
rm -f ./fire_seq_search_server
#nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
cargo build
cargo build --features llm
cp target/debug/fire_seq_search_server ./fire_seq_search_server

export RUST_LOG="warn,fire_seq_search_server=info"
Expand Down
2 changes: 1 addition & 1 deletion fire_seq_search_server/deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ allow = [
"MIT", "Apache-2.0",
"BSD-2-Clause", "BSD-3-Clause",
"CC0-1.0",
"MPL-2.0",
]
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
Expand All @@ -34,7 +35,6 @@ confidence-threshold = 0.8
exceptions = [
{ name = "fastdivide", allow = ["zlib-acknowledgement"] },
{ name = "unicode-ident", allow = ["Unicode-DFS-2016"] },
{ allow = ["Unlicense"], crate = "measure_time" }, # tantivy
]

# This section is considered when running `cargo deny check bans`.
Expand Down
6 changes: 4 additions & 2 deletions fire_seq_search_server/obsidian.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
set -e
cargo build
cargo build --features llm
rm ./fire_seq_search_server -f
cp --force target/debug/fire_seq_search_server ./fire_seq_search_server

NOTEBOOK_NAME=AstroWiki_2.0-main

RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path ~/Documents/obsidian-hub-main \
--notebook_path ~/Documents/$NOTEBOOK_NAME \
--obsidian-md
4 changes: 2 additions & 2 deletions fire_seq_search_server/src/http_client/endpoints.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::sync::Arc;
use log::{debug, info};
use log::{debug};

use crate::query_engine::{QueryEngine, ServerInformation};
use axum::Json;
use axum::extract::State;
use axum::{response::Html, routing::get, Router, extract::Path};
use axum::{response::Html, extract::Path};

pub async fn get_server_info(State(engine_arc): State<Arc<QueryEngine>>)
-> Json<ServerInformation> {
Expand Down
8 changes: 5 additions & 3 deletions fire_seq_search_server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ pub mod word_frequency;
pub mod local_llm;


use log::{debug, info};
use log::debug;
use crate::query_engine::ServerInformation;
use crate::query_engine::NotebookSoftware::Logseq;


#[macro_use]
Expand All @@ -19,6 +20,7 @@ pub static JOURNAL_PREFIX: &str = "@journal@";


pub struct Article {
#[allow(dead_code)] /* TODO rethink if we need it 2024 Sep 21 */
file_name: String,
content: String
}
Expand Down Expand Up @@ -72,7 +74,6 @@ tanvity's default tokenizer will lowercase all English characters.
However, I think there could be a better approach
1. use https://github.com/pemistahl/lingua-rs to determine the language of the text
2. Select proper tokenizer
*/
fn process_token_text(text: &str, indices: &Vec<(usize, char)>, token: &jieba_rs::Token<'_>) -> Option<String> {
let raw = String::from(&text[(indices[token.start].0)..(indices[token.end].0)]);
let lower = raw.to_lowercase();
Expand All @@ -82,6 +83,7 @@ fn process_token_text(text: &str, indices: &Vec<(usize, char)>, token: &jieba_rs
Some(lower)
}
}
*/

// TODO use stub now
pub fn tokenize_default(sentence: &str) -> Vec<String> {
Expand Down Expand Up @@ -168,7 +170,7 @@ pub fn generate_server_info_for_test() -> ServerInformation {
show_summary_single_line_chars_limit: 0,
parse_pdf_links: false,
exclude_zotero_items: false,
obsidian_md: false,
software: Logseq,
convert_underline_hierarchy: true,
host: "127.0.0.1:22024".to_string(),
llm_enabled: false,
Expand Down
78 changes: 16 additions & 62 deletions fire_seq_search_server/src/load_notes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use std::fs::DirEntry;
use log::{debug, error, info, warn};
use log::{debug, error, info};
use std::process;

use rayon::prelude::*;
use crate::query_engine::ServerInformation;
use crate::JOURNAL_PREFIX;


use std::borrow::Cow;
Expand All @@ -16,10 +13,24 @@ pub struct NoteListItem {
pub title: String,
}

use crate::query_engine::NotebookSoftware;
pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
let path: &str = &server_info.notebook_path;
let note_list = list_directory( Cow::from(path) , true);

let note_list = match &server_info.software {
NotebookSoftware::Obsidian => list_directory( Cow::from(path) , true),
NotebookSoftware::Logseq => {
let pp = path.to_string() + "/pages";
let mut pages = list_directory( Cow::from(pp), false );

// TODO Journal prefix
let pp = path.to_string() + "/journals";
let jours = list_directory( Cow::from(pp), false );

pages.extend(jours);
pages
},
};
// TODO didn't handle logseq
note_list
}
Expand Down Expand Up @@ -82,66 +93,9 @@ fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
};
result.push(row);
}

return result;
}

/*
pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)> {
// I should remove the unwrap and convert it into map
let path: &str = &server_info.notebook_path;
let path = path.to_owned();
let pages_path = if server_info.obsidian_md {
path.clone()
} else{
path.clone() + "/pages"
};


let mut pages: Vec<(String, String)> = Vec:: new();

let pages_tmp: Vec<(String, String)> = read_specific_directory(&pages_path).par_iter()
.map(|(title,md)| {
let content = crate::markdown_parser::parse_logseq_notebook(md, title, server_info);
(title.to_string(), content)
}).collect(); //silly collect.

if server_info.exclude_zotero_items {
error!("exclude zotero disabled");
}
/*
for (file_name, contents) in pages_tmp {
// info!("File Name: {}", &file_name);
if server_info.exclude_zotero_items && file_name.starts_with('@') {
continue;
}
pages.push((file_name,contents));
}
*/
if server_info.enable_journal_query {
info!("Loading journals");
let journals_page = path.clone() + "/journals";
let journals:Vec<(String, String)>
= read_specific_directory(&journals_page).par_iter()
.map(|(title,md)| {
let content = crate::markdown_parser::parse_logseq_notebook(md, title, server_info);
let tantivy_title = JOURNAL_PREFIX.to_owned() + &title;
(tantivy_title, content)
}).collect(); //silly collect.


for (file_name, contents) in journals {
pages.push((file_name,contents));
}

}

pages

}


*/



Expand Down
Loading
Loading