diff --git a/superset/utils/pandas_postprocessing/histogram.py b/superset/utils/pandas_postprocessing/histogram.py index 659eac7b42d1..72dbee855c31 100644 --- a/superset/utils/pandas_postprocessing/histogram.py +++ b/superset/utils/pandas_postprocessing/histogram.py @@ -48,6 +48,11 @@ def histogram( if groupby is None: groupby = [] + # drop empty values from the target column + df.dropna(subset=[column], inplace=True) + if df.empty: + return df + # convert to numeric, coercing errors to NaN df[column] = to_numeric(df[column], errors="coerce") diff --git a/tests/unit_tests/pandas_postprocessing/test_histogram.py b/tests/unit_tests/pandas_postprocessing/test_histogram.py index 9246a06df082..68ffdb0b7734 100644 --- a/tests/unit_tests/pandas_postprocessing/test_histogram.py +++ b/tests/unit_tests/pandas_postprocessing/test_histogram.py @@ -140,3 +140,70 @@ def test_histogram_with_some_non_numeric_values(): histogram(data_with_non_numeric, "a", ["group"], bins) except ValueError as e: assert str(e) == "Column 'group' contains non-numeric values" # noqa: PT017 + + +def test_histogram_with_groupby_and_some_null_values(): + data_with_groupby_and_some_nulls = DataFrame( + { + "group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"], + "a": [1, 2, 3, 4, 5, None, 7, 8, 9, 10], + "b": [1, 2, 3, 4, 5, None, 7, 8, 9, 10], + } + ) + + result = histogram(data_with_groupby_and_some_nulls, "a", ["group"], bins) + assert result.shape == (2, bins + 1) + assert result.columns.tolist() == [ + "group", + "1.0 - 2.8", + "2.8 - 4.6", + "4.6 - 6.4", + "6.4 - 8.2", + "8.2 - 10.0", + ] + assert result.values.tolist() == [["A", 2, 0, 1, 0, 2], ["B", 0, 2, 0, 2, 0]] + + +def test_histogram_with_no_groupby_and_some_null_values(): + data_with_no_groupby_and_some_nulls = DataFrame( + { + "a": [1, 2, 3, 4, 5, None, 7, 8, 9, 10], + "b": [1, 2, 3, 4, 5, None, 7, 8, 9, 10], + } + ) + + result = histogram(data_with_no_groupby_and_some_nulls, "a", [], bins) + assert result.shape == (1, bins) + assert result.columns.tolist() == [ + "1.0 - 2.8", + "2.8 - 4.6", + "4.6 - 6.4", + "6.4 - 8.2", + "8.2 - 10.0", + ] + assert result.values.tolist() == [[2, 2, 1, 2, 2]] + + +def test_histogram_with_groupby_and_all_null_values(): + data_with_groupby_and_all_nulls = DataFrame( + { + "group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"], + "a": [None, None, None, None, None, None, None, None, None, None], + "b": [None, None, None, None, None, None, None, None, None, None], + } + ) + + result = histogram(data_with_groupby_and_all_nulls, "a", ["group"], bins) + assert result.empty + + +def test_histogram_with_no_groupby_and_all_null_values(): + data_with_no_groupby_and_all_nulls = DataFrame( + { + "a": [None, None, None, None, None, None, None, None, None, None], + "b": [None, None, None, None, None, None, None, None, None, None], + } + ) + + result = histogram(data_with_no_groupby_and_all_nulls, "a", [], bins) + assert result.empty