Skip to content

Commit 2daa44d

Browse files
authored
Merge pull request #44 from oscar-defelice/master
Added the French language support
2 parents d762cfc + e503123 commit 2daa44d

File tree

5 files changed

+64
-1
lines changed

5 files changed

+64
-1
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ MANIFEST
44
.vscode
55
autocorrect/data/*
66
.coverage
7+
.DS_Store

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ bzip2 -d ruiwiki-latest-pages-articles.xml.bz2
6262

6363
After that:
6464

65+
First, edit the `autocorrect.constants` dictionaries in order to accommodate regexes and dictionaries for your language.
66+
67+
Then:
68+
6569
```python
6670
>>> from autocorrect.word_count import count_words
6771
>>> count_words('ruwiki-latest-pages-articles.xml', 'ru')

autocorrect/constants.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"cs": r"[AÁBCČDĎEÉĚFGH(Ch)IÍJKLMNŇOÓPQRŘSŠTŤUÚŮVWXYÝZŽaábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž]+",
1010
"el": r"[α-ωΑ-ΩίϊΐόάέύϋΰήώΊΪΪ́ΌΆΈΎΫΫ́ΉΏ]+",
1111
"it": r"[a-zA-ZãáàâçéêíõóôúüÃÁÀÂÇÉÊÍÕÓÔÚÜ]+",
12+
"fr": r"[a-zA-ZãáàâçéêíõóôúüÃÁÀÂÇÉÊÍÕÓÔÚÜ]+",
1213
"vi": r"[a-zA-ZàáạảãÀÁẠẢÃằắặẳẵẰẮẶẲẴầấậẩẫẦẤẬẨẪèéẹẻẽÈÉẸẺẼềếệểễỀẾỆỂỄìíịỉĩÌÍỊỈĨòóọỏõÒÓỌỎÕồốộổỗỒỐỘỔỖờớợởỡỜỚỢỞỠùúụủũÙÚỤỦŨừứựửữỪỨỰỬỮỳýỵỷỹỲÝỴỶỸ]+",
1314
}
1415

@@ -23,13 +24,15 @@
2324
"cs": "aábcčdďeéěfgh(ch)iíjklmnňoópqrřsštťuúůvwxyýzž",
2425
"el": "αβγδεζηθικλμνξοπρςτυφχψωίϊΐόάέύϋΰήώ",
2526
"it": "abcdefghijklmnopqrstuvwxzyãáàâçéêíõóôúü",
27+
"fr": "abcdefghijklmnopqrstuvwxzyãáàâçéêíõóôúü",
2628
"vi": "aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz",
2729
}
2830

2931
ipfs_gateways = [
3032
"http://ipfs.io/ipfs/",
3133
"https://gateway.pinata.cloud/ipfs/",
32-
"https://cf-ipfs.com/ipfs/", # this one has the best performance, but doesn't return download progress
34+
# this one has the best performance, but doesn't return download progress
35+
"https://cf-ipfs.com/ipfs/",
3336
]
3437

3538
ipfs_paths = {
@@ -43,6 +46,7 @@
4346
"pt": ["QmbRSZvfJV6zN12zzWhecphcvE9ZBeQdAJGQ9c9ttJXzcg/pt.tar.gz"],
4447
"el": ["QmbRSZvfJV6zN12zzWhecphcvE9ZBeQdAJGQ9c9ttJXzcg/el.tar.gz"],
4548
"it": ["QmbRSZvfJV6zN12zzWhecphcvE9ZBeQdAJGQ9c9ttJXzcg/it.tar.gz"],
49+
"fr": ["QmPRNDmUDTXikq8gWnGcw3ZGmnoBfvekmAyeyX8y6onf23/fr.tar.gz"],
4650
"vi": ["QmRRJj5i7nkpzTRSKhFe23XMjLRw7f2zD6FLKDrRfzco7f/vi.tar.gz"],
4751
}
4852

@@ -82,4 +86,5 @@
8286
"it": [
8387
"https://dl.dropboxusercontent.com/s/6xci1wfb387zk23/it.tar.gz?dl=0",
8488
],
89+
"fr": ["https://mega.nz/file/kQByQJAb#rMbmF0HG09MLQQ-FDafHrPAgXigJIpmC1zhtxRMp2dQ"],
8590
}

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"Natural Language :: Portuguese",
2525
"Natural Language :: Greek",
2626
"Natural Language :: Italian",
27+
"Natural Language :: French",
2728
"Natural Language :: Vietnamese",
2829
"Programming Language :: Python",
2930
"Programming Language :: Python :: 3",

test_all.py

+52
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,19 @@
127127
"salto": "saulto",
128128
}
129129

130+
french_words_all_correct = {
131+
"ordre": "oldre",
132+
"leger": "lezger",
133+
"cahier": "cnhier",
134+
"saut": "slaut",
135+
"vache": "vacne",
136+
"fromage": "fromae",
137+
"bisous": "biosus",
138+
"possible": "possable",
139+
"position": "posizion",
140+
"populaire": "popularie",
141+
}
142+
130143
single_typos_me = {
131144
"ae",
132145
"ame",
@@ -1009,6 +1022,38 @@
10091022
"cavallo": "cavatlo",
10101023
"poltrona": "poltrola",
10111024
},
1025+
"fr": {
1026+
"disparu": "disparue",
1027+
"atteint": "atteind",
1028+
"croient": "croyent",
1029+
"cicogne": "cygogne",
1030+
"electronique": "électronique",
1031+
"bien": "bein",
1032+
"connexion": "connection",
1033+
"galerie": "gallerie",
1034+
"meilleur": "meiileur",
1035+
"obnubiler": "obnibuler",
1036+
"oculaire": "ocualire",
1037+
"télescope": "teiescope",
1038+
"valeur": "vaelur",
1039+
"vertu": "veltu",
1040+
"salade": "saiade",
1041+
"renommer": "renomner",
1042+
"sibyllin": "sibuliin",
1043+
"successeur": "succaszeur",
1044+
"écologie": "ecoiogie",
1045+
"éloge": "elogie",
1046+
"emménager": "ennenager",
1047+
"cheval": "clreval",
1048+
"permis": "pennis",
1049+
"recueillir": "recuelilir",
1050+
"martel": "manel",
1051+
"veux": "vuex",
1052+
"emploi": "emnloi",
1053+
"pôle": "pole",
1054+
"qui": "uui",
1055+
"experience": "escpehience",
1056+
},
10121057
"vi": {
10131058
"hiếu": "hiéu",
10141059
"hiền": "hién",
@@ -1141,6 +1186,11 @@ def test_italian():
11411186
assert spelltest(spell_it, italian_words_all_correct) == 0
11421187

11431188

1189+
def test_french():
1190+
spell_fr = Speller("fr")
1191+
assert spelltest(spell_fr, french_words_all_correct) == 0
1192+
1193+
11441194
if __name__ == "__main__":
11451195
command = sys.argv[1]
11461196

@@ -1161,6 +1211,8 @@ def test_italian():
11611211
benchmark("spanish words", spell, optional_language_tests["es"])
11621212
spell = Speller("it")
11631213
benchmark("italian words", spell, optional_language_tests["it"])
1214+
spell = Speller("fr")
1215+
benchmark("french words", spell, optional_language_tests["fr"])
11641216
elif command == "find_threshold":
11651217
lang = sys.argv[2]
11661218
test = optional_language_tests[lang]

0 commit comments

Comments
 (0)