Skip to content

Commit 1b55f4f

Browse files
authored
Merge pull request #1353 from PyThaiNLP/copilot/organise-test-cases-by-functions
refactor(tests): reorganize noauto test cases by function; fix too-broad except
2 parents 715cc06 + a823508 commit 1b55f4f

12 files changed

+367
-124
lines changed

pythainlp/spell/wanchanberta_thai_grammarly.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,7 @@ def align_word_ids(texts: str) -> list[int]:
7171
if word_idx is None:
7272
label_ids.append(-100)
7373
else:
74-
try:
75-
label_ids.append(2)
76-
except Exception:
77-
label_ids.append(-100)
74+
label_ids.append(2)
7875

7976
return label_ids
8077

tests/noauto_onnx/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121

2222
# Names of module to be tested
2323
test_packages: list[str] = [
24+
"tests.noauto_onnx.testn_spell_onnx",
25+
"tests.noauto_onnx.testn_tag_onnx",
2426
"tests.noauto_onnx.testn_tokenize_onnx",
27+
"tests.noauto_onnx.testn_transliterate_onnx",
2528
]
2629

2730

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for spell correction functions that require ONNX Runtime
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (onnxruntime)
8+
# - Platform-specific compatibility issues
9+
# - Version constraints
10+
11+
import unittest
12+
13+
14+
class SpellONNXTestCaseN(unittest.TestCase):
15+
"""Tests for ONNX-based spell correction (requires onnxruntime)"""
16+
17+
def test_words_spelling_correction_returns_list(self):
18+
from pythainlp.spell.words_spelling_correction import (
19+
get_words_spell_suggestion,
20+
)
21+
22+
result = get_words_spell_suggestion("สวัสดี")
23+
self.assertIsInstance(result, list)
24+
25+
def test_words_spelling_correction_nonempty_input(self):
26+
from pythainlp.spell.words_spelling_correction import (
27+
get_words_spell_suggestion,
28+
)
29+
30+
result = get_words_spell_suggestion("กาารเขียน")
31+
self.assertIsInstance(result, list)
32+
self.assertGreater(len(result), 0)
33+
34+
def test_words_spelling_correction_items_are_strings(self):
35+
from pythainlp.spell.words_spelling_correction import (
36+
get_words_spell_suggestion,
37+
)
38+
39+
result = get_words_spell_suggestion("กาารเขียน")
40+
for item in result:
41+
self.assertIsInstance(item, str)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for POS tagging functions that require ONNX Runtime
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (onnxruntime)
8+
# - Platform-specific compatibility issues
9+
# - Version constraints
10+
11+
import unittest
12+
13+
14+
class TagONNXTestCaseN(unittest.TestCase):
15+
"""Tests for ONNX-based POS tagging (requires onnxruntime)"""
16+
17+
def test_pos_tag_wangchanberta_onnx_returns_list(self):
18+
from pythainlp.tag import pos_tag
19+
20+
result = pos_tag(
21+
["แมว", "กิน", "ปลา"],
22+
engine="wangchanberta_onnx",
23+
)
24+
self.assertIsInstance(result, list)
25+
self.assertGreater(len(result), 0)
26+
27+
def test_pos_tag_wangchanberta_onnx_length_matches(self):
28+
from pythainlp.tag import pos_tag
29+
30+
tokens = ["แมว", "กิน", "ปลา"]
31+
result = pos_tag(tokens, engine="wangchanberta_onnx")
32+
self.assertEqual(len(result), len(tokens))
33+
34+
def test_pos_tag_wangchanberta_onnx_tuple_pairs(self):
35+
from pythainlp.tag import pos_tag
36+
37+
result = pos_tag(
38+
["แมว", "กิน", "ปลา"],
39+
engine="wangchanberta_onnx",
40+
)
41+
for item in result:
42+
self.assertIsInstance(item, tuple)
43+
self.assertEqual(len(item), 2)
44+
word, tag = item
45+
self.assertIsInstance(word, str)
46+
self.assertIsInstance(tag, str)
47+
48+
def test_pos_tag_wangchanberta_onnx_empty_list(self):
49+
from pythainlp.tag import pos_tag
50+
51+
result = pos_tag([], engine="wangchanberta_onnx")
52+
self.assertIsInstance(result, list)
53+
self.assertEqual(len(result), 0)

tests/noauto_onnx/testn_tokenize_onnx.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -79,40 +79,3 @@ def test_sefr_cut(self):
7979
sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tnhc"),
8080
)
8181

82-
83-
class TransliterateONNXTestCaseN(unittest.TestCase):
84-
"""Tests for ONNX-based transliteration (requires onnxruntime)"""
85-
86-
def test_thai2rom_onnx(self):
87-
from pythainlp.transliterate.thai2rom_onnx import romanize
88-
89-
result = romanize("สวัสดี")
90-
self.assertIsInstance(result, str)
91-
self.assertGreater(len(result), 0)
92-
93-
94-
class TagONNXTestCaseN(unittest.TestCase):
95-
"""Tests for ONNX-based POS tagging (requires onnxruntime)"""
96-
97-
def test_pos_tag_wangchanberta_onnx(self):
98-
from pythainlp.tag import pos_tag
99-
100-
result = pos_tag(
101-
["แมว", "กิน", "ปลา"],
102-
engine="wangchanberta_onnx"
103-
)
104-
self.assertIsInstance(result, list)
105-
self.assertGreater(len(result), 0)
106-
self.assertEqual(len(result), 3)
107-
108-
109-
class SpellONNXTestCaseN(unittest.TestCase):
110-
"""Tests for ONNX-based spell correction (requires onnxruntime)"""
111-
112-
def test_words_spelling_correction(self):
113-
from pythainlp.spell.words_spelling_correction import (
114-
get_words_spell_suggestion,
115-
)
116-
117-
result = get_words_spell_suggestion("สวัสดี")
118-
self.assertIsInstance(result, list)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for transliteration functions that require ONNX Runtime
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (onnxruntime)
8+
# - Platform-specific compatibility issues
9+
# - Version constraints
10+
11+
import unittest
12+
13+
14+
class TransliterateONNXTestCaseN(unittest.TestCase):
15+
"""Tests for ONNX-based transliteration (requires onnxruntime)"""
16+
17+
def test_thai2rom_onnx_returns_string(self):
18+
from pythainlp.transliterate.thai2rom_onnx import romanize
19+
20+
result = romanize("สวัสดี")
21+
self.assertIsInstance(result, str)
22+
self.assertGreater(len(result), 0)
23+
24+
def test_thai2rom_onnx_empty_string(self):
25+
from pythainlp.transliterate.thai2rom_onnx import romanize
26+
27+
result = romanize("")
28+
self.assertIsInstance(result, str)
29+
30+
def test_thai2rom_onnx_ascii_passthrough(self):
31+
from pythainlp.transliterate.thai2rom_onnx import romanize
32+
33+
result = romanize("hello")
34+
self.assertIsInstance(result, str)
35+
36+
def test_thai2rom_onnx_mixed_text(self):
37+
from pythainlp.transliterate.thai2rom_onnx import romanize
38+
39+
result = romanize("ภาษาไทย")
40+
self.assertIsInstance(result, str)
41+
self.assertGreater(len(result), 0)

tests/noauto_torch/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,14 @@
2323

2424
# Names of module to be tested
2525
test_packages: list[str] = [
26+
"tests.noauto_torch.testn_augment_torch",
2627
"tests.noauto_torch.testn_lm_torch",
28+
"tests.noauto_torch.testn_parse_torch",
2729
"tests.noauto_torch.testn_spell_torch",
30+
"tests.noauto_torch.testn_summarize_torch",
2831
"tests.noauto_torch.testn_tag_torch",
2932
"tests.noauto_torch.testn_tokenize_torch",
33+
"tests.noauto_torch.testn_transliterate_torch",
3034
]
3135

3236

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for augmentation functions that require transformers
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (torch, transformers)
8+
# - Python 3.13+ compatibility issues
9+
10+
import unittest
11+
12+
13+
class AugmentTestCaseN(unittest.TestCase):
14+
"""Tests for augmentation functions (requires transformers)"""
15+
16+
def test_augment_wangchanberta_returns_list(self):
17+
from pythainlp.augment.lm import Thai2transformersAug
18+
19+
augmenter = Thai2transformersAug()
20+
result = augmenter.augment("แมวกิน<mask>")
21+
self.assertIsInstance(result, list)
22+
self.assertGreater(len(result), 0)
23+
24+
def test_augment_wangchanberta_items_are_strings(self):
25+
from pythainlp.augment.lm import Thai2transformersAug
26+
27+
augmenter = Thai2transformersAug()
28+
result = augmenter.augment("แมวกิน<mask>")
29+
for item in result:
30+
self.assertIsInstance(item, str)
31+
32+
def test_augment_wangchanberta_generate_returns_list(self):
33+
from pythainlp.augment.lm import Thai2transformersAug
34+
35+
augmenter = Thai2transformersAug()
36+
result = augmenter.generate("แมวกิน<mask>", num_replace_tokens=1)
37+
self.assertIsInstance(result, list)
38+
39+
def test_augment_phayathaibert_returns_list(self):
40+
from pythainlp.augment.lm import ThaiTextAugmenter
41+
42+
augmenter = ThaiTextAugmenter()
43+
result = augmenter.augment("แมวกิน<mask>")
44+
self.assertIsInstance(result, list)
45+
self.assertGreater(len(result), 0)
46+
47+
def test_augment_phayathaibert_items_are_strings(self):
48+
from pythainlp.augment.lm import ThaiTextAugmenter
49+
50+
augmenter = ThaiTextAugmenter()
51+
result = augmenter.augment("แมวกิน<mask>")
52+
for item in result:
53+
self.assertIsInstance(item, str)
54+
55+
def test_augment_phayathaibert_num_augs_respected(self):
56+
from pythainlp.augment.lm import ThaiTextAugmenter
57+
58+
augmenter = ThaiTextAugmenter()
59+
result = augmenter.augment("แมวกิน<mask>", num_augs=2)
60+
self.assertEqual(len(result), 2)
61+
62+
def test_augment_phayathaibert_exceeds_limit_raises(self):
63+
from pythainlp.augment.lm import ThaiTextAugmenter
64+
65+
augmenter = ThaiTextAugmenter()
66+
with self.assertRaises(ValueError):
67+
augmenter.augment("แมวกิน<mask>", num_augs=10)
68+
69+
def test_augment_phayathaibert_adds_mask_if_missing(self):
70+
from pythainlp.augment.lm import ThaiTextAugmenter
71+
72+
augmenter = ThaiTextAugmenter()
73+
result = augmenter.augment("แมวกิน", num_augs=1)
74+
self.assertIsInstance(result, list)
75+
self.assertEqual(len(result), 1)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for parsing functions that require torch and transformers
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (torch, transformers)
8+
# - Python 3.13+ compatibility issues
9+
10+
import unittest
11+
12+
13+
class ParseTestCaseN(unittest.TestCase):
14+
"""Tests for parsing functions (requires torch and transformers)"""
15+
16+
def test_dependency_parsing_returns_list(self):
17+
from pythainlp.parse import dependency_parsing
18+
19+
result = dependency_parsing("แมวกินปลา")
20+
self.assertIsInstance(result, list)
21+
self.assertGreater(len(result), 0)
22+
23+
def test_dependency_parsing_result_structure(self):
24+
from pythainlp.parse import dependency_parsing
25+
26+
result = dependency_parsing("แมวกินปลา")
27+
for item in result:
28+
self.assertIsInstance(item, dict)
29+
30+
def test_dependency_parsing_empty_string(self):
31+
from pythainlp.parse import dependency_parsing
32+
33+
result = dependency_parsing("")
34+
self.assertIsInstance(result, list)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
2+
# SPDX-FileType: SOURCE
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Tests for summarization functions that require transformers
6+
# These tests are NOT run in automated CI workflows due to:
7+
# - Large dependencies (torch, transformers)
8+
# - Python 3.13+ compatibility issues
9+
10+
import unittest
11+
12+
13+
class SummarizeTestCaseN(unittest.TestCase):
14+
"""Tests for summarization functions (requires transformers)"""
15+
16+
def test_summarize_keybert_returns_list(self):
17+
from pythainlp.summarize.keybert import KeyBERT
18+
19+
text = "แมวเป็นสัตว์เลี้ยงที่น่ารัก แมวชอบกินปลา แมวชอบนอนหลับ"
20+
keybert = KeyBERT()
21+
result = keybert.extract_keywords(text, max_keywords=2)
22+
self.assertIsInstance(result, list)
23+
24+
def test_summarize_keybert_max_keywords_respected(self):
25+
from pythainlp.summarize.keybert import KeyBERT
26+
27+
text = "แมวเป็นสัตว์เลี้ยงที่น่ารัก แมวชอบกินปลา แมวชอบนอนหลับ"
28+
keybert = KeyBERT()
29+
result = keybert.extract_keywords(text, max_keywords=2)
30+
self.assertLessEqual(len(result), 2)
31+
32+
def test_summarize_mt5_returns_list(self):
33+
from pythainlp.summarize.mt5 import mT5Summarizer
34+
35+
text = "แมวเป็นสัตว์เลี้ยงที่น่ารัก แมวชอบกินปลา แมวชอบนอนหลับ"
36+
summarizer = mT5Summarizer()
37+
result = summarizer.summarize(text)
38+
self.assertIsInstance(result, list)
39+
40+
def test_summarize_mt5_result_items_are_strings(self):
41+
from pythainlp.summarize.mt5 import mT5Summarizer
42+
43+
text = "แมวเป็นสัตว์เลี้ยงที่น่ารัก แมวชอบกินปลา แมวชอบนอนหลับ"
44+
summarizer = mT5Summarizer()
45+
result = summarizer.summarize(text)
46+
for item in result:
47+
self.assertIsInstance(item, str)

0 commit comments

Comments
 (0)