Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change definition of highlight_keywords_in_body #135

Merged
merged 2 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions fire_seq_search_server/debug_server.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
set -e
rm -f ./fire_seq_search_server
nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
# nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
cargo build
cp target/debug/fire_seq_search_server ./fire_seq_search_server
RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path /Users/zhenboli/logseq \
--notebook_path ~/logseq
--exclude-zotero-items
# --parse-pdf-links
--notebook_path /Users/zhenboli/logseq \
4 changes: 3 additions & 1 deletion fire_seq_search_server/src/post_query/highlighter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ use regex::RegexBuilder;

use lazy_static::lazy_static;
use crate::post_query::highlighter::HighlightStatusWithWords::{Highlight, Lowlight};
use crate::query_engine::ServerInformation;

lazy_static! {
static ref STOPWORDS_LIST: HashSet<String> = crate::language_tools::generate_stopwords_list();
}

pub fn highlight_keywords_in_body(body: &str, term_tokens: &Vec<String>,
show_summary_single_line_chars_limit: usize) -> String {
server_info: &ServerInformation) -> String {

let show_summary_single_line_chars_limit: usize = server_info.show_summary_single_line_chars_limit;
let blocks = split_body_to_blocks(body, show_summary_single_line_chars_limit);
let nltk = &STOPWORDS_LIST;

Expand Down
15 changes: 4 additions & 11 deletions fire_seq_search_server/src/post_query/hit_parsed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,15 @@ pub struct FireSeqSearchHitParsed {
pub logseq_uri: String,
}




impl FireSeqSearchHitParsed {

pub fn from_tantivy(doc: &tantivy::schema::Document,
score: f32, term_tokens: &Vec<String>,
server_info: &ServerInformation) ->FireSeqSearchHitParsed {
for _field in doc.field_values() {
// debug!("field {:?} ", &field);
}

let title: &str = doc.field_values()[0].value().as_text().unwrap();
let body: &str = doc.field_values()[1].value().as_text().unwrap();
let summary = highlight_keywords_in_body(body, term_tokens, server_info.show_summary_single_line_chars_limit);
let summary = highlight_keywords_in_body(body, term_tokens, server_info);

let mut is_page_hit = true;
let title = if title.starts_with(JOURNAL_PREFIX) {
Expand All @@ -40,9 +35,7 @@ impl FireSeqSearchHitParsed {
title.to_string()
};


let logseq_uri = generate_uri(&title, &is_page_hit, &server_info);

let logseq_uri = generate_uri(&title, &is_page_hit, server_info);

debug!("Processing a hit, title={}, uri={}", &title, &logseq_uri);

Expand Down Expand Up @@ -102,4 +95,4 @@ mod test_serde {
// assert!(serde("Games/EU4").contains("\"logseq://graph/logseq_notebook?page=Games/EU4\""));
//
// }
}
}
2 changes: 1 addition & 1 deletion fire_seq_search_server/src/post_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub fn post_query_wrapper(top_docs: Vec<(f32, tantivy::DocAddress)>,
term: &str,
searcher: &tantivy::LeasedItem<tantivy::Searcher>,
server_info: &ServerInformation) -> Vec<String> {
let term_tokens = tokenize_default(&term);
let term_tokens = tokenize_default(term);
info!("get term tokens {:?}", &term_tokens);
let result: Vec<String> = top_docs.par_iter()
.map(|x| parse_and_serde(x, searcher, &term_tokens, server_info))
Expand Down
14 changes: 10 additions & 4 deletions fire_seq_search_server/tests/unit_test_post_query.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use fire_seq_search_server::post_query::highlighter::{highlight_keywords_in_body, highlight_sentence_with_keywords, locate_single_keyword, split_body_to_blocks, wrap_text_at_given_spots};
use fire_seq_search_server::generate_server_info_for_test;

fn get_english_text() -> String {
std::fs::read_to_string("tests/resource/pages/International Language, Past, Present & Future by Walter John Clark.md")
.expect("Should have been able to read the file")
}
fn highlight_keywords_in_body_old_2024_apr(body:&str, terms: &Vec<String>, limit:usize) ->String {
let mut server_info = generate_server_info_for_test();
server_info.show_summary_single_line_chars_limit = limit;
highlight_keywords_in_body(body, terms, &server_info)
}

#[test]
fn test_empty_key() {
let text = "Hello World";
let v = Vec::new();

let r = highlight_keywords_in_body(text, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(text, &v, 120);
assert_eq!(4,4);

assert_eq!(&r, "");
Expand All @@ -22,15 +28,15 @@ fn test_empty_key() {
fn test_highlight_wrap() {
let contents = "使用 git shallow clone 下载并编译 Thunderbird".to_string();
let v = vec![String::from("thunderbird")];
let r = highlight_keywords_in_body(&contents, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
assert_eq!(&r, "使用 git shallow clone 下载并编译 <span class=\"fireSeqSearchHighlight\">Thunderbird</span>");
}

#[test]
fn test_highlight_latex() {
let contents = "$\\vec{q_i}^T \\vec{a_j}, i<j$".to_string();
let v = vec![String::from("vec")];
let r = highlight_keywords_in_body(&contents, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
println!("{:?}", &r);
}
#[test]
Expand Down Expand Up @@ -87,4 +93,4 @@ fn test_wrap_text_at_given_spots() {
assert_eq!(right-left, 6);
}
let _r = wrap_text_at_given_spots(&contents, &mats, 320);
}
}
Loading