1
1
# -*- coding: utf-8 -*-
2
+ from gtts .langs import _main_langs
3
+ from warnings import warn
2
4
import logging
3
5
4
6
__all__ = ['tts_langs' ]
@@ -14,13 +16,13 @@ def tts_langs():
14
16
Returns:
15
17
dict: A dictionary of the type `{ '<lang>': '<name>'}`
16
18
17
- Where `<lang>` is an IETF language tag such as `en` or `pt-br `,
19
+ Where `<lang>` is an IETF language tag such as `en` or `zh-TW `,
18
20
and `<name>` is the full English name of the language, such as
19
- `English` or `Portuguese (Brazil )`.
21
+ `English` or `Chinese (Mandarin/Taiwan )`.
20
22
21
23
The dictionary returned combines languages from two origins:
22
24
23
- - Languages fetched from Google Translate
25
+ - Languages fetched from Google Translate (pre-generated in :mod:`gtts.langs`)
24
26
- Languages that are undocumented variations that were observed to work and
25
27
present different dialects or accents.
26
28
@@ -32,112 +34,65 @@ def tts_langs():
32
34
return langs
33
35
34
36
35
- def _main_langs ():
36
- """Define the main languages.
37
+ def _extra_langs ():
38
+ """Define extra languages.
37
39
38
40
Returns:
39
- dict: A dictionnary of the main languages extracted from
40
- Google Translate.
41
+ dict: A dictionnary of extra languages manually defined.
42
+
43
+ Variations of the ones generated in `_main_langs`,
44
+ observed to provide different dialects or accents or
45
+ just simply accepted by the Google Translate Text-to-Speech API.
41
46
42
47
"""
43
48
return {
44
- 'af' : 'Afrikaans' ,
45
- 'ar' : 'Arabic' ,
46
- 'bn' : 'Bengali' ,
47
- 'bs' : 'Bosnian' ,
48
- 'ca' : 'Catalan' ,
49
- 'cs' : 'Czech' ,
50
- 'cy' : 'Welsh' ,
51
- 'da' : 'Danish' ,
52
- 'de' : 'German' ,
53
- 'el' : 'Greek' ,
54
- 'en' : 'English' ,
55
- 'eo' : 'Esperanto' ,
56
- 'es' : 'Spanish' ,
57
- 'et' : 'Estonian' ,
58
- 'fi' : 'Finnish' ,
59
- 'fr' : 'French' ,
60
- 'gu' : 'Gujarati' ,
61
- 'hi' : 'Hindi' ,
62
- 'hr' : 'Croatian' ,
63
- 'hu' : 'Hungarian' ,
64
- 'hy' : 'Armenian' ,
65
- 'id' : 'Indonesian' ,
66
- 'is' : 'Icelandic' ,
67
- 'it' : 'Italian' ,
68
- 'ja' : 'Japanese' ,
69
- 'jw' : 'Javanese' ,
70
- 'km' : 'Khmer' ,
71
- 'kn' : 'Kannada' ,
72
- 'ko' : 'Korean' ,
73
- 'la' : 'Latin' ,
74
- 'lv' : 'Latvian' ,
75
- 'mk' : 'Macedonian' ,
76
- 'ml' : 'Malayalam' ,
77
- 'mr' : 'Marathi' ,
78
- 'my' : 'Myanmar (Burmese)' ,
79
- 'ne' : 'Nepali' ,
80
- 'nl' : 'Dutch' ,
81
- 'no' : 'Norwegian' ,
82
- 'pl' : 'Polish' ,
83
- 'pt' : 'Portuguese' ,
84
- 'ro' : 'Romanian' ,
85
- 'ru' : 'Russian' ,
86
- 'si' : 'Sinhala' ,
87
- 'sk' : 'Slovak' ,
88
- 'sq' : 'Albanian' ,
89
- 'sr' : 'Serbian' ,
90
- 'su' : 'Sundanese' ,
91
- 'sv' : 'Swedish' ,
92
- 'sw' : 'Swahili' ,
93
- 'ta' : 'Tamil' ,
94
- 'te' : 'Telugu' ,
95
- 'th' : 'Thai' ,
96
- 'tl' : 'Filipino' ,
97
- 'tr' : 'Turkish' ,
98
- 'uk' : 'Ukrainian' ,
99
- 'ur' : 'Urdu' ,
100
- 'vi' : 'Vietnamese' ,
101
- 'zh-CN' : 'Chinese'
49
+ # Chinese
50
+ 'zh-TW' : 'Chinese (Mandarin/Taiwan)' ,
51
+ 'zh' : 'Chinese (Mandarin)'
102
52
}
103
53
104
54
105
- def _extra_langs ():
106
- """Define extra languages.
55
+ def _fallback_deprecated_lang (lang ):
56
+ """Languages Google Text-to-Speech used to support.
57
+
58
+ Language tags that don't work anymore, but that can
59
+ fallback to a more general language code to maintain
60
+ compatibility.
61
+
62
+ Args:
63
+ lang (string): The language tag.
107
64
108
65
Returns:
109
- dict: A dictionnary of extra languages manually defined.
66
+ string: The language tag, as-is if not deprecated,
67
+ or a fallack if it exits.
110
68
111
- Variations of the ones fetched by `_main_langs`,
112
- observed to provide different dialects or accents or
113
- just simply accepted by the Google Translate Text-to-Speech API .
69
+ Example:
70
+ ``en-GB`` returns ``en``.
71
+ ``en-gb`` returns ``en`` .
114
72
115
73
"""
116
- return {
117
- # Chinese
118
- 'zh-cn' : 'Chinese (Mandarin/China)' ,
119
- 'zh-tw' : 'Chinese (Mandarin/Taiwan)' ,
120
- # English
121
- 'en-us' : 'English (US)' ,
122
- 'en-ca' : 'English (Canada)' ,
123
- 'en-uk' : 'English (UK)' ,
124
- 'en-gb' : 'English (UK)' ,
125
- 'en-au' : 'English (Australia)' ,
126
- 'en-gh' : 'English (Ghana)' ,
127
- 'en-in' : 'English (India)' ,
128
- 'en-ie' : 'English (Ireland)' ,
129
- 'en-nz' : 'English (New Zealand)' ,
130
- 'en-ng' : 'English (Nigeria)' ,
131
- 'en-ph' : 'English (Philippines)' ,
132
- 'en-za' : 'English (South Africa)' ,
133
- 'en-tz' : 'English (Tanzania)' ,
134
- # French
135
- 'fr-ca' : 'French (Canada)' ,
136
- 'fr-fr' : 'French (France)' ,
137
- # Portuguese
138
- 'pt-br' : 'Portuguese (Brazil)' ,
139
- 'pt-pt' : 'Portuguese (Portugal)' ,
140
- # Spanish
141
- 'es-es' : 'Spanish (Spain)' ,
142
- 'es-us' : 'Spanish (United States)'
74
+
75
+ deprecated = {
76
+ # '<fallback>': [<list of deprecated langs>]
77
+ 'en' : ['en-us' , 'en-ca' , 'en-uk' , 'en-gb' , 'en-au' , 'en-gh' , 'en-in' ,
78
+ 'en-ie' , 'en-nz' , 'en-ng' , 'en-ph' , 'en-za' , 'en-tz' ],
79
+ 'fr' : ['fr-ca' , 'fr-fr' ],
80
+ 'pt' : ['pt-br' , 'pt-pt' ],
81
+ 'es' : ['es-es' , 'es-us' ],
82
+ 'zh-CN' : ['zh-cn' ],
83
+ 'zh-TW' : ['zh-tw' ],
143
84
}
85
+
86
+ for fallback_lang , deprecated_langs in deprecated .items ():
87
+ if lang .lower () in deprecated_langs :
88
+ msg = (
89
+ "'{}' has been deprecated, falling back to '{}'. "
90
+ "This fallback will be removed in a future version."
91
+ ).format (lang , fallback_lang )
92
+
93
+ warn (msg , DeprecationWarning )
94
+ log .warning (msg )
95
+
96
+ return fallback_lang
97
+
98
+ return lang
0 commit comments