|
| 1 | +/* |
| 2 | +Copyright (c) 2023, Hugo W.L. ter Doest |
| 3 | +
|
| 4 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | +of this software and associated documentation files (the "Software"), to deal |
| 6 | +in the Software without restriction, including without limitation the rights |
| 7 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | +copies of the Software, and to permit persons to whom the Software is |
| 9 | +furnished to do so, subject to the following conditions: |
| 10 | +
|
| 11 | +The above copyright notice and this permission notice shall be included in |
| 12 | +all copies or substantial portions of the Software. |
| 13 | +
|
| 14 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 20 | +THE SOFTWARE. |
| 21 | +*/ |
| 22 | + |
| 23 | +'use strict' |
| 24 | + |
| 25 | +const Tokenizer = require('../lib/natural/tokenizers/aggressive_tokenizer_de') |
| 26 | +const tokenizer = new Tokenizer() |
| 27 | + |
| 28 | +describe('aggressive_tokenizer', function () { |
| 29 | + it('should tokenize strings with diacritics ä, ö and ü, and esszet ß', function () { |
| 30 | + expect(tokenizer.tokenize('Es werden nur Maßnahmen gefördert, die nicht aufgrund einer Rechtsvorschrift umgesetzt werden müssen.')).toEqual( |
| 31 | + ['Es', 'werden', 'nur', 'Maßnahmen', 'gefördert', 'die', 'nicht', 'aufgrund', 'einer', |
| 32 | + 'Rechtsvorschrift', 'umgesetzt', 'werden', 'müssen']) |
| 33 | + expect(tokenizer.tokenize('Anträge sind vor Beginn der jeweiligen Maßnahme zu stellen.')).toEqual( |
| 34 | + ['Anträge', 'sind', 'vor', 'Beginn', 'der', 'jeweiligen', 'Maßnahme', 'zu', 'stellen']) |
| 35 | + }) |
| 36 | +}) |
0 commit comments