Skip to content

Commit

Permalink
change definition of highlight_keywords_in_body (#135)
Browse files Browse the repository at this point in the history
  • Loading branch information
Endle authored Apr 16, 2024
1 parent 8dc0e78 commit b5eeadc
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 19 deletions.
6 changes: 4 additions & 2 deletions fire_seq_search_server/debug_server.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
set -e
rm -f ./fire_seq_search_server
nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
# nix-shell -p cargo -p rustc -p libiconv --run "cargo build"
cargo build
cp target/debug/fire_seq_search_server ./fire_seq_search_server
RUST_BACKTRACE=1 RUST_LOG=debug ./fire_seq_search_server \
--notebook_path /Users/zhenboli/logseq \
--notebook_path ~/logseq
--exclude-zotero-items
# --parse-pdf-links
--notebook_path /Users/zhenboli/logseq \
4 changes: 3 additions & 1 deletion fire_seq_search_server/src/post_query/highlighter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ use regex::RegexBuilder;

use lazy_static::lazy_static;
use crate::post_query::highlighter::HighlightStatusWithWords::{Highlight, Lowlight};
use crate::query_engine::ServerInformation;

lazy_static! {
static ref STOPWORDS_LIST: HashSet<String> = crate::language_tools::generate_stopwords_list();
}

pub fn highlight_keywords_in_body(body: &str, term_tokens: &Vec<String>,
show_summary_single_line_chars_limit: usize) -> String {
server_info: &ServerInformation) -> String {

let show_summary_single_line_chars_limit: usize = server_info.show_summary_single_line_chars_limit;
let blocks = split_body_to_blocks(body, show_summary_single_line_chars_limit);
let nltk = &STOPWORDS_LIST;

Expand Down
15 changes: 4 additions & 11 deletions fire_seq_search_server/src/post_query/hit_parsed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,15 @@ pub struct FireSeqSearchHitParsed {
pub logseq_uri: String,
}




impl FireSeqSearchHitParsed {

pub fn from_tantivy(doc: &tantivy::schema::Document,
score: f32, term_tokens: &Vec<String>,
server_info: &ServerInformation) ->FireSeqSearchHitParsed {
for _field in doc.field_values() {
// debug!("field {:?} ", &field);
}

let title: &str = doc.field_values()[0].value().as_text().unwrap();
let body: &str = doc.field_values()[1].value().as_text().unwrap();
let summary = highlight_keywords_in_body(body, term_tokens, server_info.show_summary_single_line_chars_limit);
let summary = highlight_keywords_in_body(body, term_tokens, server_info);

let mut is_page_hit = true;
let title = if title.starts_with(JOURNAL_PREFIX) {
Expand All @@ -40,9 +35,7 @@ impl FireSeqSearchHitParsed {
title.to_string()
};


let logseq_uri = generate_uri(&title, &is_page_hit, &server_info);

let logseq_uri = generate_uri(&title, &is_page_hit, server_info);

debug!("Processing a hit, title={}, uri={}", &title, &logseq_uri);

Expand Down Expand Up @@ -102,4 +95,4 @@ mod test_serde {
// assert!(serde("Games/EU4").contains("\"logseq://graph/logseq_notebook?page=Games/EU4\""));
//
// }
}
}
2 changes: 1 addition & 1 deletion fire_seq_search_server/src/post_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub fn post_query_wrapper(top_docs: Vec<(f32, tantivy::DocAddress)>,
term: &str,
searcher: &tantivy::LeasedItem<tantivy::Searcher>,
server_info: &ServerInformation) -> Vec<String> {
let term_tokens = tokenize_default(&term);
let term_tokens = tokenize_default(term);
info!("get term tokens {:?}", &term_tokens);
let result: Vec<String> = top_docs.par_iter()
.map(|x| parse_and_serde(x, searcher, &term_tokens, server_info))
Expand Down
14 changes: 10 additions & 4 deletions fire_seq_search_server/tests/unit_test_post_query.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use fire_seq_search_server::post_query::highlighter::{highlight_keywords_in_body, highlight_sentence_with_keywords, locate_single_keyword, split_body_to_blocks, wrap_text_at_given_spots};
use fire_seq_search_server::generate_server_info_for_test;

fn get_english_text() -> String {
std::fs::read_to_string("tests/resource/pages/International Language, Past, Present & Future by Walter John Clark.md")
.expect("Should have been able to read the file")
}
fn highlight_keywords_in_body_old_2024_apr(body:&str, terms: &Vec<String>, limit:usize) ->String {
let mut server_info = generate_server_info_for_test();
server_info.show_summary_single_line_chars_limit = limit;
highlight_keywords_in_body(body, terms, &server_info)
}

#[test]
fn test_empty_key() {
let text = "Hello World";
let v = Vec::new();

let r = highlight_keywords_in_body(text, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(text, &v, 120);
assert_eq!(4,4);

assert_eq!(&r, "");
Expand All @@ -22,15 +28,15 @@ fn test_empty_key() {
fn test_highlight_wrap() {
let contents = "使用 git shallow clone 下载并编译 Thunderbird".to_string();
let v = vec![String::from("thunderbird")];
let r = highlight_keywords_in_body(&contents, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
assert_eq!(&r, "使用 git shallow clone 下载并编译 <span class=\"fireSeqSearchHighlight\">Thunderbird</span>");
}

#[test]
fn test_highlight_latex() {
let contents = "$\\vec{q_i}^T \\vec{a_j}, i<j$".to_string();
let v = vec![String::from("vec")];
let r = highlight_keywords_in_body(&contents, &v, 120);
let r = highlight_keywords_in_body_old_2024_apr(&contents, &v, 120);
println!("{:?}", &r);
}
#[test]
Expand Down Expand Up @@ -87,4 +93,4 @@ fn test_wrap_text_at_given_spots() {
assert_eq!(right-left, 6);
}
let _r = wrap_text_at_given_spots(&contents, &mats, 320);
}
}

0 comments on commit b5eeadc

Please sign in to comment.