File tree Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -272,17 +272,19 @@ def expand_maiyamok(sent: Union[str, List[str]]) -> List[str]:
272272 if isinstance (sent , str ):
273273 sent = word_tokenize (sent )
274274
275+ yamok = "ๆ"
276+
275277 # Breaks Maiyamok that attached to others, e.g. "นกๆๆ", "นกๆ ๆ", "นกๆคน"
278+ re_yamok = re .compile (rf"({ yamok } )" )
276279 temp_toks : list [str ] = []
277- for _ , token in enumerate ( sent ) :
278- toks = re .split (r"(ๆ)" , token )
280+ for token in sent :
281+ toks = re_yamok .split (token )
279282 toks = [tok for tok in toks if tok ] # remove empty string ("")
280283 temp_toks .extend (toks )
281284 sent = temp_toks
285+ del temp_toks
282286
283287 output_toks : list [str ] = []
284-
285- yamok = "ๆ"
286288 yamok_count = 0
287289 len_sent = len (sent )
288290 for i in range (len_sent - 1 , - 1 , - 1 ): # do it backward
You can’t perform that action at this time.
0 commit comments