Skip to content

Commit 9a1274b

Browse files
authored
Merge pull request #1029 from bact/update-maiyamok
Remove unnecessary enumerate in expand_maiyamok
2 parents 33fa45a + cbcf8f2 commit 9a1274b

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

pythainlp/util/normalize.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,17 +272,19 @@ def expand_maiyamok(sent: Union[str, List[str]]) -> List[str]:
272272
if isinstance(sent, str):
273273
sent = word_tokenize(sent)
274274

275+
yamok = "ๆ"
276+
275277
# Breaks Maiyamok that attached to others, e.g. "นกๆๆ", "นกๆ ๆ", "นกๆคน"
278+
re_yamok = re.compile(rf"({yamok})")
276279
temp_toks: list[str] = []
277-
for _, token in enumerate(sent):
278-
toks = re.split(r"(ๆ)", token)
280+
for token in sent:
281+
toks = re_yamok.split(token)
279282
toks = [tok for tok in toks if tok] # remove empty string ("")
280283
temp_toks.extend(toks)
281284
sent = temp_toks
285+
del temp_toks
282286

283287
output_toks: list[str] = []
284-
285-
yamok = "ๆ"
286288
yamok_count = 0
287289
len_sent = len(sent)
288290
for i in range(len_sent - 1, -1, -1): # do it backward

0 commit comments

Comments
 (0)