|
| 1 | +import regex |
| 2 | + |
| 3 | +englishLocales = ['AU', 'GB', 'HK', 'IN', 'NZ', 'ZA', 'ZM'] |
| 4 | +arabicLocales = ['AE', 'BH', 'DZ', 'EG', 'IQ', 'JO', 'KW', 'LB', 'LY', 'MA', 'QM', 'QA', 'SA', 'SD', 'SY', 'TN', 'YE'] |
| 5 | +farsiLocales = ['IR', 'AF'] |
| 6 | +bengaliLocales = ['BD', 'IN'] |
| 7 | +dotDecimal = ['ar-EG', 'ar-LB', 'ar-LY'] |
| 8 | +commaDecimal = [ |
| 9 | + 'bg-BG', 'cs-CZ', 'da-DK', 'de-DE', 'el-GR', 'en-ZM', 'es-ES', 'fr-CA', 'fr-FR', |
| 10 | + 'id-ID', 'it-IT', 'ku-IQ', 'hi-IN', 'hu-HU', 'nb-NO', 'nn-NO', 'nl-NL', 'pl-PL', 'pt-PT', |
| 11 | + 'ru-RU', 'kk-KZ', 'si-LK', 'sl-SI', 'sr-RS@latin', 'sr-RS', 'sv-SE', 'tr-TR', 'uk-UA', 'vi-VN', |
| 12 | +] |
| 13 | + |
| 14 | +alpha = { |
| 15 | + 'en-US': r"^[A-Za-z]+$", |
| 16 | + 'az-AZ': r"^[A-VXYZÇƏĞİıÖŞÜ]+$", |
| 17 | + 'bg-BG': r"^[А-Я]+$", |
| 18 | + 'cs-CZ': r"^[A-Za-zÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]+$", |
| 19 | + 'da-DK': r"^[A-Za-zÆØÅ]+$", |
| 20 | + 'de-DE': r"^[A-Za-zÄÖÜß]+$", |
| 21 | + 'el-GR': r"^[Α-ώ]+$", |
| 22 | + 'es-ES': r"^[A-Za-zÁÉÍÑÓÚÜ]+$", |
| 23 | + 'fa-IR': r"^[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی]+$", |
| 24 | + 'fi-FI': r"^[A-Za-zÅÄÖ]+$", |
| 25 | + 'fr-FR': r"^[A-Za-zÀÂÆÇÉÈÊËÏÎÔŒÙÛÜŸ]+$", |
| 26 | + 'it-IT': r"^[A-Za-zÀÉÈÌÎÓÒÙ]+$", |
| 27 | + 'ja-JP': r"^[ぁ-んァ-ヶヲ-゚一-龠ー・。、]+$", |
| 28 | + 'nb-NO': r"^[A-Za-zÆØÅ]+$", |
| 29 | + 'nl-NL': r"^[A-Za-zÁÉËÏÓÖÜÚ]+$", |
| 30 | + 'nn-NO': r"^[A-Za-zÆØÅ]+$", |
| 31 | + 'hu-HU': r"^[A-Za-zÁÉÍÓÖŐÚÜŰ]+$", |
| 32 | + 'pl-PL': r"^[A-Za-zĄĆĘŚŁŃÓŻŹ]+$", |
| 33 | + 'pt-PT': r"^[A-Za-zÃÁÀÂÄÇÉÊËÍÏÕÓÔÖÚÜ]+$", |
| 34 | + 'ru-RU': r"^[А-ЯЁ]+$", |
| 35 | + 'kk-KZ': r"^[А-ЯЁ\u04D8\u04B0\u0406\u04A2\u0492\u04AE\u049A\u04E8\u04BA]+$", |
| 36 | + 'sl-SI': r"^[A-Za-zČĆĐŠŽ]+$", |
| 37 | + 'sk-SK': r"^[A-Za-zÁČĎÉÍŇÓŠŤÚÝŽĹŔĽÄÔ]+$", |
| 38 | + 'sr-RS@latin': r"^[A-Za-zČĆŽŠĐ]+$", |
| 39 | + 'sr-RS': r"^[А-ЯЂЈЉЊЋЏ]+$", |
| 40 | + 'sv-SE': r"^[A-Za-zÅÄÖ]+$", |
| 41 | + 'th-TH': r"^[ก-๐\s]+$", |
| 42 | + 'tr-TR': r"^[A-Za-zÇĞİıÖŞÜ]+$", |
| 43 | + 'uk-UA': r"^[А-ЩЬЮЯЄIЇҐі]+$", |
| 44 | + 'vi-VN': r"^[A-Za-zÀÁẠẢÃÂẦẤẬẨẪĂẰẮẶẲẴĐÈÉẸẺẼÊỀẾỆỂỄÌÍỊỈĨÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠÙÚỤỦŨƯỪỨỰỬỮỲÝỴỶỸ]+$", |
| 45 | + 'ko-KR': r"^[ㄱ-ㅎㅏ-ㅣ가-힣]*$" |
| 46 | + 'ku-IQ': r"^[ئابپتجچحخدرڕزژسشعغفڤقکگلڵمنوۆھەیێيطؤثآإأكضصةظذ]+$", |
| 47 | + ar: r"^[ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْٰ]+$" |
| 48 | + he: r"^[א-ת]+$" |
| 49 | + fa: r"^['آاءأؤئبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهةی']+$", |
| 50 | + bn: r"^['ঀঁংঃঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃৄেৈোৌ্ৎৗড়ঢ়য়ৠৡৢৣৰৱ৲৳৴৵৶৷৸৹৺৻']+$" |
| 51 | + 'hi-IN': r"^[\u0900-\u0961]+[\u0972-\u097F]*$", |
| 52 | + 'si-LK': r"^[\u0D80-\u0DFF]+$" |
| 53 | +} | {"${e}": r"^[A-Za-z]+$", for e in englishLocales} |
| 54 | + |
| 55 | +alphanumeric = { |
| 56 | + 'en-US': r"^[0-9A-Za-z]+$", |
| 57 | + 'az-AZ': r"^[0-9A-VXYZÇƏĞİıÖŞÜ]+$", |
| 58 | + 'bg-BG': r"^[0-9А-Я]+$", |
| 59 | + 'cs-CZ': r"^[0-9A-Za-zÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]+$", |
| 60 | + 'da-DK': r"^[0-9A-Za-zÆØÅ]+$", |
| 61 | + 'de-DE': r"^[0-9A-Za-zÄÖÜß]+$", |
| 62 | + 'el-GR': r"^[0-9Α-ω]+$", |
| 63 | + 'es-ES': r"^[0-9A-Za-zÁÉÍÑÓÚÜ]+$", |
| 64 | + 'fi-FI': r"^[0-9A-Za-zÅÄÖ]+$", |
| 65 | + 'fr-FR': r"^[0-9A-Za-zÀÂÆÇÉÈÊËÏÎÔŒÙÛÜŸ]+$", |
| 66 | + 'it-IT': r"^[0-9A-Za-zÀÉÈÌÎÓÒÙ]+$", |
| 67 | + 'ja-JP': r"^[0-90-9ぁ-んァ-ヶヲ-゚一-龠ー・。、]+$", |
| 68 | + 'hu-HU': r"^[0-9A-Za-zÁÉÍÓÖŐÚÜŰ]+$", |
| 69 | + 'nb-NO': r"^[0-9A-Za-zÆØÅ]+$", |
| 70 | + 'nl-NL': r"^[0-9A-Za-zÁÉËÏÓÖÜÚ]+$", |
| 71 | + 'nn-NO': r"^[0-9A-Za-zÆØÅ]+$", |
| 72 | + 'pl-PL': r"^[0-9A-Za-zĄĆĘŚŁŃÓŻŹ]+$", |
| 73 | + 'pt-PT': r"^[0-9A-Za-zÃÁÀÂÄÇÉÊËÍÏÕÓÔÖÚÜ]+$", |
| 74 | + 'ru-RU': r"^[0-9А-ЯЁ]+$", |
| 75 | + 'kk-KZ': r"^[0-9А-ЯЁ\u04D8\u04B0\u0406\u04A2\u0492\u04AE\u049A\u04E8\u04BA]+$", |
| 76 | + 'sl-SI': r"^[0-9A-Za-zČĆĐŠŽ]+$", |
| 77 | + 'sk-SK': r"^[0-9A-Za-zÁČĎÉÍŇÓŠŤÚÝŽĹŔĽÄÔ]+$", |
| 78 | + 'sr-RS@latin': r"^[0-9A-Za-zČĆŽŠĐ]+$", |
| 79 | + 'sr-RS': r"^[0-9А-ЯЂЈЉЊЋЏ]+$", |
| 80 | + 'sv-SE': r"^[0-9A-Za-zÅÄÖ]+$", |
| 81 | + 'th-TH': r"^[ก-๙\s]+$", |
| 82 | + 'tr-TR': r"^[0-9A-Za-zÇĞİıÖŞÜ]+$", |
| 83 | + 'uk-UA': r"^[0-9А-ЩЬЮЯЄIЇҐі]+$", |
| 84 | + 'ko-KR': r"^[0-9ㄱ-ㅎㅏ-ㅣ가-힣]*$" |
| 85 | + 'ku-IQ': r"^[٠١٢٣٤٥٦٧٨٩0-9ئابپتجچحخدرڕزژسشعغفڤقکگلڵمنوۆھەیێيطؤثآإأكضصةظذ]+$", |
| 86 | + 'vi-VN': r"^[0-9A-Za-zÀÁẠẢÃÂẦẤẬẨẪĂẰẮẶẲẴĐÈÉẸẺẼÊỀẾỆỂỄÌÍỊỈĨÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠÙÚỤỦŨƯỪỨỰỬỮỲÝỴỶỸ]+$", |
| 87 | + ar: r"^[٠١٢٣٤٥٦٧٨٩0-9ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْٰ]+$" |
| 88 | + he: r"^[0-9א-ת]+$" |
| 89 | + fa: r"^['0-9آاءأؤئبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهةی۱۲۳۴۵۶۷۸۹۰']+$", |
| 90 | + bn: r"^['ঀঁংঃঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃৄেৈোৌ্ৎৗড়ঢ়য়ৠৡৢৣ০১২৩৪৫৬৭৮৯ৰৱ৲৳৴৵৶৷৸৹৺৻']+$" |
| 91 | + 'hi-IN': r"^[\u0900-\u0963]+[\u0966-\u097F]*$", |
| 92 | + 'si-LK': r"^[0-9\u0D80-\u0DFF]+$" |
| 93 | +} | {"${e}": r"^[0-9A-Za-z]+$" for e in englishLocales} |
| 94 | + |
| 95 | +decimal = { |
| 96 | + 'en-US': '.', |
| 97 | + ar: '٫', |
| 98 | +} | {"${e}": "" for e in englishLocales} |
| 99 | + |
| 100 | +is_alpha = lambda s: str, locale: str = 'en-US', options: {str:} = {} -> bool { |
| 101 | + result = False |
| 102 | + assert locale in alpha, "Invalid locale ${locale}" |
| 103 | + # TODO: ignore options |
| 104 | + ignore = options?.ignore |
| 105 | + if s: |
| 106 | + result = regex.match(s, alpha[locale]) |
| 107 | + result |
| 108 | +} |
0 commit comments