From a79157cc1414496fb0d3a0ac738ec5b00f75a8fe Mon Sep 17 00:00:00 2001 From: Karthikeya2026 <22p31a0552@acet.ac.in> Date: Tue, 8 Apr 2025 13:44:39 +0530 Subject: [PATCH 1/3] Fix: Added validation for treated column to ensure boolean type (#440) --- .../experiments/test_treated_column_valid.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 causalpy/experiments/test_treated_column_valid.py diff --git a/causalpy/experiments/test_treated_column_valid.py b/causalpy/experiments/test_treated_column_valid.py new file mode 100644 index 00000000..8e573c82 --- /dev/null +++ b/causalpy/experiments/test_treated_column_valid.py @@ -0,0 +1,19 @@ +import pandas as pd +import pytest + +def _check_treated_column_validity(df, treated_col_name): + treated_col = df[treated_col_name] + if not pd.api.types.is_bool_dtype(treated_col): + raise ValueError(f"The '{treated_col_name}' column must be of boolean dtype (True/False).") + +def test_treated_column_with_integers(): + df = pd.DataFrame({"treated": [0, 1, 0, 1]}) + with pytest.raises(ValueError, match="treated.*must be of boolean dtype"): + _check_treated_column_validity(df, "treated") + +def test_treated_column_with_booleans(): + df = pd.DataFrame({"treated": [True, False, True, False]}) + try: + _check_treated_column_validity(df, "treated") + except ValueError: + pytest.fail("Unexpected ValueError raised") From 2670ed195449a418e94a55f39ad0b8082c50a421 Mon Sep 17 00:00:00 2001 From: Karthikeya2026 <22p31a0552@acet.ac.in> Date: Tue, 8 Apr 2025 14:14:52 +0530 Subject: [PATCH 2/3] Fix: Add validation and tests for treated column in RegressionDiscontinuity --- causalpy/experiments/regression_discontinuity.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py index 1afc5c1a..ebf3b103 100644 --- a/causalpy/experiments/regression_discontinuity.py +++ b/causalpy/experiments/regression_discontinuity.py @@ -190,6 +190,8 @@ def input_validation(self): raise DataException( """The treated variable should be dummy coded. Consisting of 0's and 1's only.""" # noqa: E501 ) + if not self.data['treated'].dtype == 'bool': + raise ValueError("The 'treated' column must be of type bool.Please convert your data accordingly.") def _is_treated(self, x): """Returns ``True`` if `x` is greater than or equal to the treatment threshold. From d9adb5c3ac7af9742148b5f2cc25859c8a091bd0 Mon Sep 17 00:00:00 2001 From: Karthikeya2026 <22p31a0552@acet.ac.in> Date: Tue, 8 Apr 2025 14:18:27 +0530 Subject: [PATCH 3/3] Add test for treated column validation in RegressionDiscontinuity --- causalpy/experiments/test_treated_column_valid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/causalpy/experiments/test_treated_column_valid.py b/causalpy/experiments/test_treated_column_valid.py index 8e573c82..99397e1e 100644 --- a/causalpy/experiments/test_treated_column_valid.py +++ b/causalpy/experiments/test_treated_column_valid.py @@ -17,3 +17,4 @@ def test_treated_column_with_booleans(): _check_treated_column_validity(df, "treated") except ValueError: pytest.fail("Unexpected ValueError raised") +