Skip to content

Commit

Permalink
Merge pull request #33 from allo-media/feature/sc-49424/italian
Browse files Browse the repository at this point in the history
[show] Italian support
  • Loading branch information
rtxm authored Jun 28, 2024
2 parents 1530797 + 5ddad96 commit 2640f59
Show file tree
Hide file tree
Showing 11 changed files with 624 additions and 10 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[package]
name = "text2num"
version = "2.2.0"
version = "2.3.0"
authors = ["Allo-Media <[email protected]>"]
edition = "2021"
license = "MIT"
description = "Parse and convert numbers written in English, Spanish, German or French into their digit representation."
description = "Parse and convert numbers written in English, Spanish, German, Italian or French into their digit representation."
keywords = ["NLP", "words-to-numbers"]
categories = ["text-processing"]
repository = "https://github.com/allo-media/text2num-rs"
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 Groupe Allo-Media
Copyright (c) 2021-2024 Groupe Allo-Media

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
1 change: 1 addition & 0 deletions src/digit_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ impl DigitString {
Ok(())
};
}
// maybe subpart of a bigger number
let mut padding_zeroes = self.buffer[(l - positions)..]
.iter()
.take_while(|&c| *c == b'0')
Expand Down
2 changes: 1 addition & 1 deletion src/lang/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ impl LangInterpretor for German {
}
}
"tausend" | "tausendste" if b.is_range_free(3, 5) => b.shift(3),
"million" | "millionen" | "millionste" => b.shift(6),
"million" | "millionen" | "millionste" if b.is_range_free(6, 8) => b.shift(6),
"milliarde" | "milliarden" | "milliardste" => b.shift(9),
"billion" | "billionste" => b.shift(12),
"und" => Err(Error::Incomplete),
Expand Down
2 changes: 1 addition & 1 deletion src/lang/de/vocabulary.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use phf::{phf_set, Set};

pub static INSIGNIFICANT: Set<&'static str> = phf_set! {
"und", "so", "ach", "doch", "ja"
"aber", "ah", "äh", "ähm", "also", "gut", "auch", "denn", "doch", "dort", "eben", "eh", "halt", "ja", "mal", "sehen", "naja", "nun", "ok", "schon", "so", "genau", "und", "noch"
};
3 changes: 2 additions & 1 deletion src/lang/en/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl LangInterpretor for English {
}
}
"thousand" | "thousandth" if b.is_range_free(3, 5) => b.shift(3),
"million" | "millionth" => b.shift(6),
"million" | "millionth" if b.is_range_free(6, 8) => b.shift(6),
"billion" | "billionth" => b.shift(9),
"and" if b.len() >= 2 => Err(Error::Incomplete),

Expand Down Expand Up @@ -346,6 +346,7 @@ mod tests {
one hundred twenty point o five, one point two hundred thirty-six, one point two three six.",
"12.99, 120.05, 120.05, 1.2 136, 1.236."
);
assert_replace_numbers!("I say point three", "I say point three");
}

#[test]
Expand Down
4 changes: 3 additions & 1 deletion src/lang/es/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ impl LangInterpretor for Spanish {
"ochociento" | "ochocienta" | "octingentésimo" | "octingentésima" => b.put(b"800"),
"noveciento" | "novecienta" | "noningentésimo" | "noningentésima" => b.put(b"900"),
"mil" | "milésimo" | "milésima" if b.is_range_free(3, 5) => b.shift(3),
"millon" | "millón" | "millonésimo" | "millonésima" => b.shift(6),
"millon" | "millón" | "millonésimo" | "millonésima" if b.is_range_free(6, 8) => {
b.shift(6)
}
"y" if b.len() >= 2 => Err(Error::Incomplete),

_ => Err(Error::NaN),
Expand Down
20 changes: 18 additions & 2 deletions src/lang/fr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ impl LangInterpretor for French {
}
"cent" | "centième" => {
let peek = b.peek(2);
if (peek.len() == 1 || peek < b"20") && peek != b"1" {
if (peek.len() == 1 || peek < b"20") && peek != b"1" && peek != b"01" {
b.shift(2)
} else {
Err(Error::Overlap)
Expand All @@ -171,7 +171,7 @@ impl LangInterpretor for French {
b.shift(3)
}
}
"million" | "millionième" => b.shift(6),
"million" | "millionième" if b.is_range_free(6, 8) => b.shift(6),
"milliard" | "milliardième" => b.shift(9),
"et" if b.len() >= 2 => Err(Error::Incomplete),

Expand Down Expand Up @@ -290,16 +290,26 @@ mod tests {

#[test]
fn test_apply() {
assert_text2digits!(
"cinquante trois mille millions deux cent quarante-trois mille sept cent vingt-quatre",
"53000243724"
);

assert_text2digits!(
"cinquante trois mille millions deux cent quarante trois mille sept cent vingt quatre",
"53000243724"
);

assert_text2digits!(
"cinquante et un million cinq cent soixante-dix-huit mille trois cent deux",
"51578302"
);
assert_text2digits!(
"cinquante et un million cinq cent soixante dix huit mille trois cent deux",
"51578302"
);

assert_text2digits!("quatre-vingt-cinq", "85");
assert_text2digits!("quatre vingt cinq", "85");

assert_text2digits!("quatre vingt un", "81");
Expand Down Expand Up @@ -328,17 +338,20 @@ mod tests {

#[test]
fn test_centuries() {
assert_text2digits!("dix neuf cent soixante-treize", "1973");
assert_text2digits!("dix neuf cent soixante treize", "1973");
}

#[test]
fn test_ordinals() {
assert_text2digits!("vingt-cinquième", "25ème");
assert_text2digits!("vingt cinquième", "25ème");
assert_text2digits!("vingt et unième", "21ème");
}

#[test]
fn test_fractions() {
assert_text2digits!("vingt-cinquièmes", "25èmes");
assert_text2digits!("vingt cinquièmes", "25èmes");
assert_text2digits!("vingt et unièmes", "21èmes");
}
Expand All @@ -347,6 +360,7 @@ mod tests {
fn test_zeroes() {
assert_text2digits!("zéro", "0");
assert_text2digits!("zéro huit", "08");
assert_text2digits!("zéro zéro cent vingt-cinq", "00125");
assert_text2digits!("zéro zéro cent vingt cinq", "00125");
assert_invalid!("cinq zéro");
assert_invalid!("cinquante zéro trois");
Expand All @@ -366,6 +380,7 @@ mod tests {
assert_invalid!("vingt un");
assert_invalid!("zéro zéro trente quatre vingt");
assert_invalid!("quatre-vingt dix-huit");
assert_invalid!("mille un cent");
}

#[test]
Expand Down Expand Up @@ -451,6 +466,7 @@ mod tests {
"la densité moyenne est de zéro virgule cinq.",
"la densité moyenne est de 0,5."
);
assert_replace_numbers!("Je dis virgule cinq", "Je dis virgule cinq");
}

#[test]
Expand Down
Loading

0 comments on commit 2640f59

Please sign in to comment.