diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 92c67865ae88f..94c289ef3ace7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -60,6 +60,7 @@ Other enhancements - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) +- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index 2d1b1eca55e98..1ca52ce64bd77 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -253,6 +253,10 @@ def _str_isalpha(self): result = pc.utf8_is_alpha(self._pa_array) return self._convert_bool_result(result) + def _str_isascii(self): + result = pc.string_is_ascii(self._pa_array) + return self._convert_bool_result(result) + def _str_isdecimal(self): result = pc.utf8_is_decimal(self._pa_array) return self._convert_bool_result(result) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index e5b434edacc59..d3ccd11281a77 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3415,7 +3415,8 @@ def len(self): # cases: # upper, lower, title, capitalize, swapcase, casefold # boolean: - # isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle + # isalpha, isnumeric isalnum isdigit isdecimal isspace islower + # isupper istitle isascii # _doc_args holds dict of strings to use in substituting casemethod docs _doc_args: dict[str, dict[str, str]] = {} _doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""} @@ -3495,6 +3496,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3518,6 +3520,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3544,6 +3547,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3576,6 +3580,7 @@ def casefold(self): Series.str.isdigit : Check whether all characters are digits. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3601,6 +3606,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3627,6 +3633,7 @@ def casefold(self): Series.str.isdigit : Check whether all characters are digits. Series.str.isdecimal : Check whether all characters are decimal. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3649,6 +3656,7 @@ def casefold(self): Series.str.isdigit : Check whether all characters are digits. Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. @@ -3674,6 +3682,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.istitle : Check whether all characters are titlecase. Examples @@ -3697,6 +3706,7 @@ def casefold(self): Series.str.isdecimal : Check whether all characters are decimal. Series.str.isspace : Check whether all characters are whitespace. Series.str.islower : Check whether all characters are lowercase. + Series.str.isascii : Check whether all characters are ascii. Series.str.isupper : Check whether all characters are uppercase. Examples @@ -3714,11 +3724,40 @@ def casefold(self): 3 False dtype: bool """ + _shared_docs["isascii"] = """ + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.istitle : Check whether all characters are titlecase. + Series.str.isupper : Check whether all characters are uppercase. + + Examples + ------------ + The ``s5.str.isascii`` method checks for whether all characters are ascii + characters, which includes digits 0-9, capital and lowercase letters A-Z, + and some other special characters. + + >>> s5 = pd.Series(['รถ', 'see123', 'hello world', '']) + >>> s5.str.isascii() + 0 False + 1 True + 2 True + 3 True + dtype: bool + """ + _doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"} _doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"} _doc_args["isdigit"] = {"type": "digits", "method": "isdigit"} _doc_args["isspace"] = {"type": "whitespace", "method": "isspace"} _doc_args["islower"] = {"type": "lowercase", "method": "islower"} + _doc_args["isascii"] = {"type": "ascii", "method": "isascii"} _doc_args["isupper"] = {"type": "uppercase", "method": "isupper"} _doc_args["istitle"] = {"type": "titlecase", "method": "istitle"} _doc_args["isnumeric"] = {"type": "numeric", "method": "isnumeric"} @@ -3750,6 +3789,11 @@ def casefold(self): docstring=_shared_docs["ismethods"] % _doc_args["islower"] + _shared_docs["islower"], ) + isascii = _map_and_wrap( + "isascii", + docstring=_shared_docs["ismethods"] % _doc_args["isascii"] + + _shared_docs["isascii"], + ) isupper = _map_and_wrap( "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"] diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index 4ed36f85167c9..78c4f3acbe1aa 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -179,6 +179,10 @@ def _str_isalnum(self): def _str_isalpha(self): pass + @abc.abstractmethod + def _str_isascii(self): + pass + @abc.abstractmethod def _str_isdecimal(self): pass diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 0268194e64d50..a07ab9534f491 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -455,6 +455,9 @@ def _str_isalnum(self): def _str_isalpha(self): return self._str_map(str.isalpha, dtype="bool") + def _str_isascii(self): + return self._str_map(str.isascii, dtype="bool") + def _str_isdecimal(self): return self._str_map(str.isdecimal, dtype="bool") diff --git a/pandas/tests/strings/conftest.py b/pandas/tests/strings/conftest.py index 92b7b16da3c1f..5bcbb16da3be9 100644 --- a/pandas/tests/strings/conftest.py +++ b/pandas/tests/strings/conftest.py @@ -68,6 +68,7 @@ "get_dummies", "isalnum", "isalpha", + "isascii", "isdecimal", "isdigit", "islower", diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index cd3c512328139..c5414022e664b 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -83,6 +83,7 @@ def test_string_array_numeric_integer_array(nullable_string_dtype, method, expec [ ("isdigit", [False, None, True]), ("isalpha", [True, None, False]), + ("isascii", [True, None, True]), ("isalnum", [True, None, True]), ("isnumeric", [False, None, True]), ], diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 75a2007b61640..0598e5f80e6d6 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -159,6 +159,7 @@ def test_empty_str_methods(any_string_dtype): # ismethods should always return boolean (GH 29624) tm.assert_series_equal(empty_bool, empty.str.isalnum()) tm.assert_series_equal(empty_bool, empty.str.isalpha()) + tm.assert_series_equal(empty_bool, empty.str.isascii()) tm.assert_series_equal(empty_bool, empty.str.isdigit()) tm.assert_series_equal(empty_bool, empty.str.isspace()) tm.assert_series_equal(empty_bool, empty.str.islower()) @@ -177,6 +178,7 @@ def test_empty_str_methods(any_string_dtype): @pytest.mark.parametrize( "method, expected", [ + ("isascii", [True, True, True, True, True, True, True, True, True, True]), ("isalnum", [True, True, True, True, True, False, True, True, False, False]), ("isalpha", [True, True, True, False, False, False, True, False, False, False]), (