55"""
66Syllable tools
77"""
8+
89import re
910
1011from pythainlp import thai_consonants , thai_tonemarks
2324thai_consonants_all = list (thai_consonants )
2425thai_consonants_all .remove ("อ" )
2526
26- _temp = list (
27- "" .join (["" .join (v ) for v in spelling_class .values ()])
28- )
27+ _temp = list ("" .join (["" .join (v ) for v in spelling_class .values ()]))
2928not_spelling_class = [j for j in thai_consonants_all if j not in _temp ]
3029
3130# vowel's short sound
3736# These spelling consonant ares live syllables.
3837for i in ["กง" , "กน" , "กม" , "เกย" , "เกอว" ]:
3938 _check_1 .extend (spelling_class [i ])
39+
4040# These spelling consonants are dead syllables.
4141_check_2 = spelling_class ["กก" ] + spelling_class ["กบ" ] + spelling_class ["กด" ]
4242
5454 "high" : thai_high_aspirates + thai_high_irregular ,
5555}
5656thai_initial_consonant_to_type = {}
57+
5758for k , v in thai_initial_consonant_type .items ():
5859 for i in v :
5960 thai_initial_consonant_to_type [i ] = k
@@ -67,7 +68,7 @@ def sound_syllable(syllable: str) -> str:
6768 The syllable is a live syllable or dead syllable.
6869
6970 :param str syllable: Thai syllable
70- :return: syllable's type (live or dead)
71+ :return: syllable's type (" live" or " dead" )
7172 :rtype: str
7273
7374 :Example:
@@ -81,56 +82,78 @@ def sound_syllable(syllable: str) -> str:
8182 print(sound_syllable("เลข"))
8283 # output: dead
8384 """
85+ # if len of syllable < 2
86+ if len (syllable ) < 2 :
87+ return "dead"
88+
8489 # get consonants
8590 consonants = [i for i in syllable if i in list (thai_consonants_all )]
91+ if (
92+ (len (consonants ) == 0 )
93+ and ("อ" in syllable )
94+ and any ((c in set ("เ" )) for c in syllable )
95+ and (len (syllable ) == 2 )
96+ ):
97+ return "live"
98+
8699 # get spelling consonants
87100 spelling_consonant = consonants [- 1 ]
88- # if len of syllable < 2
89- if len (syllable ) < 2 :
90- return "dead"
91- elif (spelling_consonant in _check_2 ) and (
101+ if (spelling_consonant in _check_2 ) and (
92102 any ((c in set ("าีืแูาเโ" )) for c in syllable ) is False
93103 and any ((c in set ("ำใไ" )) for c in syllable ) is False
94104 and bool (pattern .search (syllable )) is not True
95105 ):
96106 return "dead"
97- elif any ((c in set ("าีืแูาโ" )) for c in syllable ): # in syllable:
107+
108+ if any ((c in set ("าีืแูาโ" )) for c in syllable ): # in syllable:
98109 if (
99110 spelling_consonant in _check_1
100111 and bool (re_short .search (syllable )) is not True
101112 ):
102113 return "live"
103- elif (
114+
115+ if (
104116 spelling_consonant != syllable [- 1 ]
105117 and bool (re_short .search (syllable )) is not True
106118 ):
107119 return "live"
108- elif spelling_consonant in _check_2 :
120+
121+ if spelling_consonant in _check_2 :
109122 return "dead"
110- elif bool (re_short .search (syllable )) or any (
123+
124+ if bool (re_short .search (syllable )) or any (
111125 (c in set (short )) for c in syllable
112126 ):
113127 return "dead"
128+
114129 return "live"
115- elif any ((c in set ("ำใไ" )) for c in syllable ):
130+
131+ if any ((c in set ("ำใไ" )) for c in syllable ):
116132 return "live" # if these vowel's long sounds are live syllables
117- elif bool (pattern .search (syllable )): # if it is เ-า
133+
134+ if bool (pattern .search (syllable )): # if it is เ-า
118135 return "live"
119- elif spelling_consonant in _check_1 :
136+
137+ if spelling_consonant in _check_1 :
120138 if (
121139 bool (re_short .search (syllable ))
122140 or any ((c in set (short )) for c in syllable )
123141 ) and len (consonants ) < 2 :
124142 return "dead"
143+
144+ if syllable [- 1 ] in set (short ):
145+ return "dead"
146+
125147 return "live"
126- elif bool (
148+
149+ if bool (
127150 re_short .search (syllable )
128151 ) or any ( # if vowel's short sound is found
129152 (c in set (short )) for c in syllable
130153 ): # consonant in short
131154 return "dead"
132- else :
133- return "dead"
155+
156+ return "dead"
134157
135158
136159def syllable_open_close_detector (syllable : str ) -> str :
@@ -155,10 +178,13 @@ def syllable_open_close_detector(syllable: str) -> str:
155178 # output: open
156179 """
157180 consonants = [i for i in syllable if i in list (thai_consonants )]
181+
158182 if len (consonants ) < 2 :
159183 return "open"
160- elif len (consonants ) == 2 and consonants [- 1 ] == "อ" :
184+
185+ if len (consonants ) == 2 and consonants [- 1 ] == "อ" :
161186 return "open"
187+
162188 return "close"
163189
164190
@@ -186,27 +212,31 @@ def syllable_length(syllable: str) -> str:
186212 consonants = [i for i in syllable if i in list (thai_consonants )]
187213 if len (consonants ) <= 3 and any ((c in set (short )) for c in syllable ):
188214 return "short"
189- elif bool (re_short .search (syllable )):
215+
216+ if bool (re_short .search (syllable )):
190217 return "short"
191- else :
192- return "long"
218+
219+ return "long"
193220
194221
195222def _tone_mark_detector (syllable : str ) -> str :
196223 tone_mark = [i for i in syllable if i in list (thai_tonemarks )]
197224 if tone_mark == []:
198225 return ""
199- else :
200- return tone_mark [0 ]
226+
227+ return tone_mark [0 ]
201228
202229
203230def _check_sonorant_syllable (syllable : str ) -> bool :
204231 _sonorant = [i for i in syllable if i in thai_low_sonorants ]
205232 consonants = [i for i in syllable if i in list (thai_consonants )]
233+
206234 if _sonorant [- 1 ] == consonants [- 2 ]:
207235 return True
208- elif _sonorant [- 1 ] == consonants [- 1 ]:
236+
237+ if _sonorant [- 1 ] == consonants [- 1 ]:
209238 return True
239+
210240 return False
211241
212242
@@ -248,9 +278,7 @@ def tone_detector(syllable: str) -> str:
248278 initial_consonant_type = thai_initial_consonant_to_type [initial_consonant ]
249279 # r for store value
250280 r = ""
251- if len (consonants ) > 1 and (
252- initial_consonant in ("อ" , "ห" )
253- ):
281+ if len (consonants ) > 1 and (initial_consonant in ("อ" , "ห" )):
254282 consonant_ending = _check_sonorant_syllable (syllable )
255283 if (
256284 initial_consonant == "อ"
@@ -325,4 +353,5 @@ def tone_detector(syllable: str) -> str:
325353 r = "m"
326354 elif initial_consonant_type == "high" and s == "live" :
327355 r = "r"
356+
328357 return r
0 commit comments