-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathstemmer.xfst
60 lines (52 loc) · 2.38 KB
/
stemmer.xfst
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
! stemmer.xfst
!
! Released under the terms of the MIT License
! Copyright (c) 2011-2015 Çağrı Çöltekin <[email protected]>
!
! Permission is hereby granted, free of charge, to any person obtaining a
! copy of this software and associated documentation files (the "Software"),
! to deal in the Software without restriction, including without limitation
! the rights to use, copy, modify, merge, publish, distribute, sublicense,
! and/or sell copies of the Software, and to permit persons to whom the
! Software is furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in
! all copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
! FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
! DEALINGS IN THE SOFTWARE.
!
!
! This atomata is used for stemming. It uses the analyzer FST to to
! analyze the given word, but it strips all tags in the analyzer
! output, leaving only the stem.
!
! It can optinally keep the POS tag of the stem, and may add the
! infinitive suffix to the verbs so that the output is the dictionary
! citation form. Both options can be set in the file options.h.
!
#include "options.h"
define ASCIIupper [A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|R|S|T|U|V|Y|Z];
define Vf e | i | ö | ü | E | İ | Ö | Ü | î | Î | %^sFRV | %^sFUV;
define Vb a | ı | o | u | A | I | O | U | â | Â | û | Û | %^sBRV | %^sBUV;
define ConsV b | d | c | g | v | z | j | f | ğ | l | m | n | r | w | y | B | D | C | G | V | Z | J | F | Ğ | L | M | N | R | W | Y | %^sVC;
define ConsUV p | t | ç | k | f | s | ş | h | P | T | Ç | K | F | S | Ş | H | %^sUC;
define Cons ConsV | ConsUV;
define PoStag "<" ASCIIupper ?* ">";
define stem ?* ->@ 0 || PoStag _
#if (STEMMER_LEMMATIZE == 1)
.o. [ %< V %> -> m e k %< V %> || Vf Cons* _ ]
.o. [ %< V %> -> m a k %< V %> || Vb Cons* _ ]
#endif
#if (STEMMER_KEEP_ROOT_POS == 0)
.o.PoStag -> 0 || _ .#.
#endif
;
define stems stem.i .o. @"trmorph.fst";
regex stems;
save stack stem.fst