Skip to content

Change lazy_static into once_cell #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ encoding = "0.2.33"
env_logger = "0.10.0"
icu_normalizer = "1.3.2"
icu_properties = "1.3.2"
lazy_static = "1.4.0"
log = "0.4.20"
once_cell = "1.18.0"
ordered-float = "3.9.1"
regex = "1.9.3"
serde = { version = "1.0.188", features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion benches/large_payload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn large_payload(c: &mut Criterion) {
let mut payload = b"hello simple ascii "
.repeat(*TOO_BIG_SEQUENCE)
.repeat(TOO_BIG_SEQUENCE)
.as_slice()
.to_vec();
payload.extend("我没有埋怨,磋砣的只是一些时间。 磋砣的只是一些时间。".as_bytes());
Expand Down
121 changes: 61 additions & 60 deletions src/assets.rs
Original file line number Diff line number Diff line change
@@ -1,65 +1,66 @@
use crate::entity::Language;
use ahash::HashMap;
use lazy_static::lazy_static;

lazy_static! {
pub static ref LANGUAGE_SUPPORTED_COUNT: usize = 41;
pub static ref LANGUAGES: [(Language, &'static str, bool, bool);41] = [
// language, alphabet, have_accents, pure_latin
(Language::English, "eationsrhldcmufpgwbyvkjxzq", false, true, ),
(Language::English, "eationsrhldcumfpgwybvkxjzq", false, true, ),
(Language::German, "enirstadhulgocmbfkwzpvüäöj", true, true, ),
(Language::French, "easnitrluodcpmévgfbhqàxèyj", true, true, ),
(Language::Dutch, "enairtodslghvmukcpbwjzfyxë", true, true, ),
(Language::Italian, "eiaonltrscdupmgvfbzhqèàkyò", true, true, ),
(Language::Polish, "aioenrzwsctkydpmuljłgbhąęó", true, true, ),
(Language::Spanish, "eaonsrildtcumpbgvfyóhqíjzá", true, true, ),
(Language::Russian, "оаеинстрвлкмдпугяызбйьчхжц", false, false, ),
(Language::Japanese, "人一大亅丁丨竹笑口日今二彳行十土丶寸寺時乙丿乂气気冂巾亠市目儿見八小凵県月彐門間木東山出本中刀分耳又取最言田心思刂前京尹事生厶云会未来白冫楽灬馬尸尺駅明耂者了阝都高卜占厂广店子申奄亻俺上方冖学衣艮食自", false, false, ),
(Language::Japanese, "ーンス・ルトリイアラックドシレジタフロカテマィグバムプオコデニウメサビナブャエュチキズダパミェョハセベガモツネボソノァヴワポペピケゴギザホゲォヤヒユヨヘゼヌゥゾヶヂヲヅヵヱヰヮヽ゠ヾヷヿヸヹヺ", false, false, ),
(Language::Japanese, "のにるたとはしいをでてがなれからさっりすあもこまうくよきんめおけそつだやえどわちみせじばへびずろほげむべひょゆぶごゃねふぐぎぼゅづざぞぬぜぱぽぷぴぃぁぇぺゞぢぉぅゐゝゑ゛゜ゎゔ゚ゟ゙ゕゖ", false, false, ),
(Language::Portuguese, "aeosirdntmuclpgvbfhãqéçází", true, true, ),
(Language::Swedish, "eanrtsildomkgvhfupäcböåyjx", true, true, ),
(Language::Chinese, "的一是不了在人有我他这个们中来上大为和国地到以说时要就出会可也你对生能而子那得于着下自之年过发后作里用道行所然家种事成方多经么去法学如都同现当没动面起看定天分还进好小部其些主样理心她本前开但因只从想实", false, false, ),
(Language::Ukrainian, "оаніирвтесклудмпзяьбгйчхцї", false, false, ),
(Language::Norwegian, "erntasioldgkmvfpubhåyjøcæw", false, true, ),
(Language::Finnish, "aintesloukämrvjhpydögcbfwz", true, true, ),
(Language::Vietnamese, "nhticgaoumlràđsevpbyưdákộế", true, true, ),
(Language::Czech, "oeantsilvrkdumpíchzáyjběéř", true, true, ),
(Language::Hungarian, "eatlsnkriozáégmbyvdhupjöfc", true, true, ),
(Language::Korean, "이다에의는로하을가고지서한은기으년대사시를리도인스일", false, false, ),
(Language::Indonesian, "aneirtusdkmlgpbohyjcwfvzxq", false, true, ),
(Language::Turkish, "aeinrlıkdtsmyuobüşvgzhcpçğ", true, true, ),
(Language::Romanian, "eiarntulocsdpmăfvîgbșțzhâj", true, true, ),
(Language::Farsi, "ایردنهومتبسلکشزفگعخقجآپحطص", false, false, ),
(Language::Arabic, "اليمونرتبةعدسفهكقأحجشطصىخإ", false, false, ),
(Language::Danish, "erntaisdlogmkfvubhpåyøæcjw", false, true, ),
(Language::Serbian, "аиоенрсуткјвдмплгзбaieonцш", false, false, ),
(Language::Lithuanian, "iasoretnukmlpvdjgėbyųšžcąį", false, true, ),
(Language::Slovene, "eaionrsltjvkdpmuzbghčcšžfy", false, true, ),
(Language::Slovak, "oaenirvtslkdmpuchjbzáyýíčé", true, true, ),
(Language::Hebrew, "יוהלרבתמאשנעםדקחפסכגטצןזך", false, false, ),
(Language::Bulgarian, "аиоентрсвлкдпмзгяъубчцйжщх", false, false, ),
(Language::Croatian, "aioenrjstuklvdmpgzbcčhšžćf", true, true, ),
(Language::Hindi, "करसनतमहपयलवजदगबशटअएथभडचधषइ", false, false, ),
(Language::Estonian, "aiestlunokrdmvgpjhäbõüfcöy", true, true, ),
(Language::Thai, "านรอกเงมยลวดทสตะปบคหแจพชขใ", false, false, ),
(Language::Greek, "ατοιενρσκηπςυμλίόάγέδήωχθύ", false, false, ),
(Language::Tamil, "கதபடரமலனவறயளசநஇணஅஆழஙஎஉஒஸ", false, false, ),
(Language::Kazakh, "аыентрлідсмқкобиуғжңзшйпгө", false, false, ),
];
use once_cell::sync::Lazy;
use std::iter::FromIterator;

pub(crate) static LANGUAGES: Lazy<[(Language, &'static str, bool, bool); 41]> = Lazy::new(|| {
[
// language, alphabet, have_accents, pure_latin
(Language::English, "eationsrhldcmufpgwbyvkjxzq", false, true, ),
(Language::English, "eationsrhldcumfpgwybvkxjzq", false, true, ),
(Language::German, "enirstadhulgocmbfkwzpvüäöj", true, true, ),
(Language::French, "easnitrluodcpmévgfbhqàxèyj", true, true, ),
(Language::Dutch, "enairtodslghvmukcpbwjzfyxë", true, true, ),
(Language::Italian, "eiaonltrscdupmgvfbzhqèàkyò", true, true, ),
(Language::Polish, "aioenrzwsctkydpmuljłgbhąęó", true, true, ),
(Language::Spanish, "eaonsrildtcumpbgvfyóhqíjzá", true, true, ),
(Language::Russian, "оаеинстрвлкмдпугяызбйьчхжц", false, false, ),
(Language::Japanese, "人一大亅丁丨竹笑口日今二彳行十土丶寸寺時乙丿乂气気冂巾亠市目儿見八小凵県月彐門間木東山出本中刀分耳又取最言田心思刂前京尹事生厶云会未来白冫楽灬馬尸尺駅明耂者了阝都高卜占厂广店子申奄亻俺上方冖学衣艮食自", false, false, ),
(Language::Japanese, "ーンス・ルトリイアラックドシレジタフロカテマィグバムプオコデニウメサビナブャエュチキズダパミェョハセベガモツネボソノァヴワポペピケゴギザホゲォヤヒユヨヘゼヌゥゾヶヂヲヅヵヱヰヮヽ゠ヾヷヿヸヹヺ", false, false, ),
(Language::Japanese, "のにるたとはしいをでてがなれからさっりすあもこまうくよきんめおけそつだやえどわちみせじばへびずろほげむべひょゆぶごゃねふぐぎぼゅづざぞぬぜぱぽぷぴぃぁぇぺゞぢぉぅゐゝゑ゛゜ゎゔ゚ゟ゙ゕゖ", false, false, ),
(Language::Portuguese, "aeosirdntmuclpgvbfhãqéçází", true, true, ),
(Language::Swedish, "eanrtsildomkgvhfupäcböåyjx", true, true, ),
(Language::Chinese, "的一是不了在人有我他这个们中来上大为和国地到以说时要就出会可也你对生能而子那得于着下自之年过发后作里用道行所然家种事成方多经么去法学如都同现当没动面起看定天分还进好小部其些主样理心她本前开但因只从想实", false, false, ),
(Language::Ukrainian, "оаніирвтесклудмпзяьбгйчхцї", false, false, ),
(Language::Norwegian, "erntasioldgkmvfpubhåyjøcæw", false, true, ),
(Language::Finnish, "aintesloukämrvjhpydögcbfwz", true, true, ),
(Language::Vietnamese, "nhticgaoumlràđsevpbyưdákộế", true, true, ),
(Language::Czech, "oeantsilvrkdumpíchzáyjběéř", true, true, ),
(Language::Hungarian, "eatlsnkriozáégmbyvdhupjöfc", true, true, ),
(Language::Korean, "이다에의는로하을가고지서한은기으년대사시를리도인스일", false, false, ),
(Language::Indonesian, "aneirtusdkmlgpbohyjcwfvzxq", false, true, ),
(Language::Turkish, "aeinrlıkdtsmyuobüşvgzhcpçğ", true, true, ),
(Language::Romanian, "eiarntulocsdpmăfvîgbșțzhâj", true, true, ),
(Language::Farsi, "ایردنهومتبسلکشزفگعخقجآپحطص", false, false, ),
(Language::Arabic, "اليمونرتبةعدسفهكقأحجشطصىخإ", false, false, ),
(Language::Danish, "erntaisdlogmkfvubhpåyøæcjw", false, true, ),
(Language::Serbian, "аиоенрсуткјвдмплгзбaieonцш", false, false, ),
(Language::Lithuanian, "iasoretnukmlpvdjgėbyųšžcąį", false, true, ),
(Language::Slovene, "eaionrsltjvkdpmuzbghčcšžfy", false, true, ),
(Language::Slovak, "oaenirvtslkdmpuchjbzáyýíčé", true, true, ),
(Language::Hebrew, "יוהלרבתמאשנעםדקחפסכגטצןזך", false, false, ),
(Language::Bulgarian, "аиоентрсвлкдпмзгяъубчцйжщх", false, false, ),
(Language::Croatian, "aioenrjstuklvdmpgzbcčhšžćf", true, true, ),
(Language::Hindi, "करसनतमहपयलवजदगबशटअएथभडचधषइ", false, false, ),
(Language::Estonian, "aiestlunokrdmvgpjhäbõüfcöy", true, true, ),
(Language::Thai, "านรอกเงมยลวดทสตะปบคหแจพชขใ", false, false, ),
(Language::Greek, "ατοιενρσκηπςυμλίόάγέδήωχθύ", false, false, ),
(Language::Tamil, "கதபடரமலனவறயளசநஇணஅஆழஙஎஉஒஸ", false, false, ),
(Language::Kazakh, "аыентрлідсмқкобиуғжңзшйпгө", false, false, ),
]
});
pub(crate) static LANGUAGE_SUPPORTED_COUNT: Lazy<usize> = Lazy::new(|| LANGUAGES.len()); // 41


// direct binding encoding to language
pub(crate) static ref ENCODING_TO_LANGUAGE: HashMap<&'static str, Language> = HashMap::from_iter([
("euc-kr", Language::Korean),
("big5", Language::Chinese),
("hz", Language::Chinese),
("gbk", Language::Chinese),
("gb18030", Language::Chinese),
("euc-jp", Language::Japanese),
("iso-2022-jp", Language::Japanese),
("shift_jis", Language::Japanese),
]);
}
pub(crate) static ENCODING_TO_LANGUAGE: Lazy<HashMap<&'static str, Language>> = Lazy::new(|| {
HashMap::from_iter([
("euc-kr", Language::Korean),
("big5", Language::Chinese),
("hz", Language::Chinese),
("gbk", Language::Chinese),
("gb18030", Language::Chinese),
("euc-jp", Language::Japanese),
("iso-2022-jp", Language::Japanese),
("shift_jis", Language::Japanese),
])
});
2 changes: 1 addition & 1 deletion src/cd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ pub(crate) fn coherence_ratio(
let mut sufficient_match_count: u64 = 0;

for layer in alpha_unicode_split(&decoded_sequence) {
if layer.chars().count() <= *TOO_SMALL_SEQUENCE {
if layer.chars().count() <= TOO_SMALL_SEQUENCE {
continue;
}
let most_common = layer.chars().collect::<Counter<_>>().most_common_ordered();
Expand Down
Loading