Skip to content

Commit

Permalink
Code cleanup: fixed lots of warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
Endle authored Sep 22, 2024
1 parent 7692bd9 commit 322d731
Show file tree
Hide file tree
Showing 16 changed files with 224 additions and 177 deletions.
4 changes: 4 additions & 0 deletions docs/release_notes_0.2_2024Sep.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


https://github.com/user-attachments/assets/b0a4ca66-0a33-401a-a916-af7a69f2ae7b

21 changes: 13 additions & 8 deletions fire_seq_search_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ license = "MIT"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
#default = ["llm"]
llm = []

[dependencies]

Expand Down Expand Up @@ -63,8 +60,16 @@ pulldown-cmark = { version = "0.9.2", default-features = false }
pdf-extract-temporary-mitigation-panic = "0.7.1"


# llm related
# TODO I should make them optional
sha256 = "1.5.0"
reqwest = { version = "0.12", features = ["json"] }
serde_derive = "1.0.209"

# TODO Currently turn them off will make cargo build fail
# I should make these deps optional, so those who doesn't want LLM could have a smaller binary
sha256 = { version = "1.5.0", optional = true }
reqwest = { version = "0.12", features = ["json"], optional = false }
serde_derive = { version = "1.0.209", optional = false}

[features]
#default = ["llm"]
llm = ["sha256",
#"serde_derive",
#"request"
]
2 changes: 1 addition & 1 deletion fire_seq_search_server/debug_server_mac.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
set -e
rm -f ./fire_seq_search_server
#nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
cargo build
cargo build --features llm
cp target/debug/fire_seq_search_server ./fire_seq_search_server

export RUST_LOG="warn,fire_seq_search_server=info"
Expand Down
2 changes: 1 addition & 1 deletion fire_seq_search_server/deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ allow = [
"MIT", "Apache-2.0",
"BSD-2-Clause", "BSD-3-Clause",
"CC0-1.0",
"MPL-2.0",
]
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
Expand All @@ -34,7 +35,6 @@ confidence-threshold = 0.8
exceptions = [
{ name = "fastdivide", allow = ["zlib-acknowledgement"] },
{ name = "unicode-ident", allow = ["Unicode-DFS-2016"] },
{ allow = ["Unlicense"], crate = "measure_time" }, # tantivy
]

# This section is considered when running `cargo deny check bans`.
Expand Down
6 changes: 4 additions & 2 deletions fire_seq_search_server/obsidian.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
set -e
cargo build
cargo build --features llm
rm ./fire_seq_search_server -f
cp --force target/debug/fire_seq_search_server ./fire_seq_search_server

NOTEBOOK_NAME=AstroWiki_2.0-main

RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path ~/Documents/obsidian-hub-main \
--notebook_path ~/Documents/$NOTEBOOK_NAME \
--obsidian-md
4 changes: 2 additions & 2 deletions fire_seq_search_server/src/http_client/endpoints.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::sync::Arc;
use log::{debug, info};
use log::{debug};

use crate::query_engine::{QueryEngine, ServerInformation};
use axum::Json;
use axum::extract::State;
use axum::{response::Html, routing::get, Router, extract::Path};
use axum::{response::Html, extract::Path};

pub async fn get_server_info(State(engine_arc): State<Arc<QueryEngine>>)
-> Json<ServerInformation> {
Expand Down
8 changes: 5 additions & 3 deletions fire_seq_search_server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ pub mod word_frequency;
pub mod local_llm;


use log::{debug, info};
use log::debug;
use crate::query_engine::ServerInformation;
use crate::query_engine::NotebookSoftware::Logseq;


#[macro_use]
Expand All @@ -19,6 +20,7 @@ pub static JOURNAL_PREFIX: &str = "@journal@";


pub struct Article {
#[allow(dead_code)] /* TODO rethink if we need it 2024 Sep 21 */
file_name: String,
content: String
}
Expand Down Expand Up @@ -72,7 +74,6 @@ tanvity's default tokenizer will lowercase all English characters.
However, I think there could be a better approach
1. use https://github.com/pemistahl/lingua-rs to determine the language of the text
2. Select proper tokenizer
*/
fn process_token_text(text: &str, indices: &Vec<(usize, char)>, token: &jieba_rs::Token<'_>) -> Option<String> {
let raw = String::from(&text[(indices[token.start].0)..(indices[token.end].0)]);
let lower = raw.to_lowercase();
Expand All @@ -82,6 +83,7 @@ fn process_token_text(text: &str, indices: &Vec<(usize, char)>, token: &jieba_rs
Some(lower)
}
}
*/

// TODO use stub now
pub fn tokenize_default(sentence: &str) -> Vec<String> {
Expand Down Expand Up @@ -168,7 +170,7 @@ pub fn generate_server_info_for_test() -> ServerInformation {
show_summary_single_line_chars_limit: 0,
parse_pdf_links: false,
exclude_zotero_items: false,
obsidian_md: false,
software: Logseq,
convert_underline_hierarchy: true,
host: "127.0.0.1:22024".to_string(),
llm_enabled: false,
Expand Down
78 changes: 16 additions & 62 deletions fire_seq_search_server/src/load_notes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use std::fs::DirEntry;
use log::{debug, error, info, warn};
use log::{debug, error, info};
use std::process;

use rayon::prelude::*;
use crate::query_engine::ServerInformation;
use crate::JOURNAL_PREFIX;


use std::borrow::Cow;
Expand All @@ -16,10 +13,24 @@ pub struct NoteListItem {
pub title: String,
}

use crate::query_engine::NotebookSoftware;
pub fn retrive_note_list(server_info: &ServerInformation) -> Vec<NoteListItem> {
let path: &str = &server_info.notebook_path;
let note_list = list_directory( Cow::from(path) , true);

let note_list = match &server_info.software {
NotebookSoftware::Obsidian => list_directory( Cow::from(path) , true),
NotebookSoftware::Logseq => {
let pp = path.to_string() + "/pages";
let mut pages = list_directory( Cow::from(pp), false );

// TODO Journal prefix
let pp = path.to_string() + "/journals";
let jours = list_directory( Cow::from(pp), false );

pages.extend(jours);
pages
},
};
// TODO didn't handle logseq
note_list
}
Expand Down Expand Up @@ -82,66 +93,9 @@ fn list_directory(path: Cow<'_, str>, recursive: bool) -> Vec<NoteListItem> {
};
result.push(row);
}

return result;
}

/*
pub fn read_all_notes(server_info: &ServerInformation) -> Vec<(String, String)> {
// I should remove the unwrap and convert it into map
let path: &str = &server_info.notebook_path;
let path = path.to_owned();
let pages_path = if server_info.obsidian_md {
path.clone()
} else{
path.clone() + "/pages"
};
let mut pages: Vec<(String, String)> = Vec:: new();
let pages_tmp: Vec<(String, String)> = read_specific_directory(&pages_path).par_iter()
.map(|(title,md)| {
let content = crate::markdown_parser::parse_logseq_notebook(md, title, server_info);
(title.to_string(), content)
}).collect(); //silly collect.
if server_info.exclude_zotero_items {
error!("exclude zotero disabled");
}
/*
for (file_name, contents) in pages_tmp {
// info!("File Name: {}", &file_name);
if server_info.exclude_zotero_items && file_name.starts_with('@') {
continue;
}
pages.push((file_name,contents));
}
*/
if server_info.enable_journal_query {
info!("Loading journals");
let journals_page = path.clone() + "/journals";
let journals:Vec<(String, String)>
= read_specific_directory(&journals_page).par_iter()
.map(|(title,md)| {
let content = crate::markdown_parser::parse_logseq_notebook(md, title, server_info);
let tantivy_title = JOURNAL_PREFIX.to_owned() + &title;
(tantivy_title, content)
}).collect(); //silly collect.
for (file_name, contents) in journals {
pages.push((file_name,contents));
}
}
pages
}
*/



Expand Down
Loading

0 comments on commit 322d731

Please sign in to comment.