diff --git a/somajo/single_token_abbreviations_de.txt b/somajo/single_token_abbreviations_de.txt index f000fdd..cd9910b 100644 --- a/somajo/single_token_abbreviations_de.txt +++ b/somajo/single_token_abbreviations_de.txt @@ -38,9 +38,11 @@ röm.-kath. Uni-Kl. USt-IdNr. -# Custom Abbreviations +# These should be moved to another file: .Net E/E +tl;dr +zl;ng # SAP Versions S/4 diff --git a/somajo/single_token_abbreviations_en.txt b/somajo/single_token_abbreviations_en.txt index 59bb725..5b78214 100644 --- a/somajo/single_token_abbreviations_en.txt +++ b/somajo/single_token_abbreviations_en.txt @@ -9,3 +9,6 @@ a.m. p.m. P.S. T.V. + +# These should be moved to another file: +tl;dr diff --git a/somajo/test/test_tokenizer.py b/somajo/test/test_tokenizer.py index f2d3da7..713c80e 100644 --- a/somajo/test/test_tokenizer.py +++ b/somajo/test/test_tokenizer.py @@ -352,6 +352,9 @@ def test_abbreviations_12(self): def test_abbreviations_13(self): self._equal("Bruce Springsteen aka The Boss", "Bruce Springsteen aka The Boss") + def test_abbreviations_14(self): + self._equal("Englisch: tl;dr. Deutsch: zl;ng.", "Englisch : tl;dr . Deutsch : zl;ng .") + class TestTypos(TestTokenizer): """"""