update jupyter notebook examples w/ error-parity==0.3.6

socialfoundations · Nov 15, 2023 · a136b4b · a136b4b
1 parent dd2799e
commit a136b4b
Show file tree

Hide file tree

Showing 13 changed files with 677 additions and 275 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/error_parity/_commons.py b/error_parity/_commons.py
@@ -6,6 +6,28 @@
 from scipy.spatial import qhull, ConvexHull
 
 
+def arrays_are_equal(*arrays: list[np.ndarray]) -> bool:
+    """Compares two or more arrays and returns whether they are equal."""
+    assert len(arrays) >= 2, \
+        f"At least two arguments must be provided, got {len(arrays)}."
+
+    # Reference array
+    ref_array = arrays[0]
+    ref_array_np = np.array(ref_array)
+
+    for curr_arr in arrays[1:]:
+        curr_arr_np = np.array(curr_arr)
+
+        # Check shape and contents
+        if (ref_array_np.shape != curr_arr_np.shape
+            or not np.allclose(ref_array_np, curr_arr_np)
+            ):
+            return False    # arrays are not equal
+
+    # All checks passed, return True (arrays are equal)
+    return True
+
+
 def join_dictionaries(*dicts) -> dict:
     """Joins a sequence of dictionaries into a single dictionary."""
     return reduce(operator.or_, dicts)

diff --git a/error_parity/_version.py b/error_parity/_version.py
@@ -1,3 +1,3 @@
 """File to keep the package version in one place."""
-__version__ = "0.3.5"
+__version__ = "0.3.6"
 __version_info__ = tuple(__version__.split("."))
diff --git a/error_parity/classifiers.py b/error_parity/classifiers.py
@@ -49,7 +49,7 @@ def __call__(self, X: np.ndarray, group: np.ndarray = None) -> np.ndarray:
         y_pred_binary : np.ndarray[int]
             The predicted class for each input sample.
         """
-        return (self.score_predictor(X) >= self.threshold).astype(int)
+        return (self.score_predictor(X).ravel() >= self.threshold).astype(int)
 
 
 class BinaryClassifierAtROCDiagonal(Classifier):
@@ -125,7 +125,7 @@ def __call__(self, X: np.ndarray, group: np.ndarray) -> np.ndarray:
             to a group-specific classifier for that sample.
         """
         if len(X) != len(group):
-            raise ValueError(f"Invalid input sizes len(X) != len(group)")
+            raise ValueError(f"Invalid input sizes: len(X) != len(group), {len(X)} != {len(group)}.")
 
         # Array to store predictions
         num_samples = len(X)

diff --git a/error_parity/evaluation.py b/error_parity/evaluation.py
@@ -252,6 +252,10 @@ def evaluate_predictions(
 
     Will only evaluate fairness if `sensitive_attribute` is provided.
 
+    Note
+    ----
+    The value of `log_loss` may be inaccurate when using `scikit-learn<1.2`.
+
     Parameters
     ----------
     y_true : np.ndarray
@@ -286,7 +290,7 @@ def evaluate_predictions(
             "squared_loss": mean_squared_error(y_true, y_pred_scores),
             "log_loss": log_loss(
                 y_true, y_pred_scores,
-                # eps=np.finfo(y_pred_scores.dtype).eps,
+                # eps=np.finfo(y_pred_scores.dtype).eps,    # NOTE: for sklearn<1.2
 
                 # NOTE: this parameterization of `eps` is no longer useful as
                 # per sklearn 1.2, and will be removed in sklearn 1.5;

diff --git a/error_parity/pareto_curve.py b/error_parity/pareto_curve.py
@@ -11,6 +11,7 @@
 
 import os
 import logging
+import traceback
 from functools import partial
 from concurrent.futures import ThreadPoolExecutor
 
@@ -19,7 +20,13 @@
 
 from .threshold_optimizer import RelaxedThresholdOptimizer
 from .evaluation import evaluate_predictions, evaluate_predictions_bootstrap
-from ._commons import join_dictionaries, get_cost_envelope
+from ._commons import join_dictionaries, get_cost_envelope, arrays_are_equal
+
+
+DEFAULT_TOLERANCE_TICKS = np.hstack((
+    np.arange(0.0, 0.2, 1e-2),      # [0.00, 0.01, 0.02, ..., 0.19]
+    np.arange(0.2, 1.0, 1e-1),      # [0.20, 0.30, 0.40, ...]
+))
 
 
 def fit_and_evaluate_postprocessing(
@@ -157,8 +164,8 @@ def compute_postprocessing_curve(
         eval_data: tuple or dict[tuple],
         fairness_constraint: str = "equalized_odds",
         bootstrap: bool = True,
-        tolerance_tick_step: float = 1e-2,
-        tolerance_ticks: list = None,
+        tolerance_ticks: list = DEFAULT_TOLERANCE_TICKS,
+        tolerance_tick_step: float = None,
         predict_method: str = "predict_proba",
         n_jobs: int = None,
         **kwargs) -> pd.DataFrame:
@@ -181,15 +188,19 @@ def compute_postprocessing_curve(
     bootstrap : bool, optional
         Whether to compute uncertainty estimates via bootstrapping, by default
         False.
-    tolerance_tick_step : float, optional
-        Distance between constraint tolerances in the adjustment curve, by
-        default 1e-2.
     tolerance_ticks : list, optional
         List of constraint tolerances to use when computing adjustment curve.
-        If not provided, will use `tolerance_tick_step` to construct evenly-
-        -spaced ticks.
+        By default will use higher granularity/precision for lower levels of
+        disparity, and lower granularity for higher levels of disparity.
+        Should correspond to a sorted list of values between 0 and 1.
+        Will be ignored if `tolerance_tick_step` is provided.
+    tolerance_tick_step : float, optional
+        Distance between constraint tolerances in the adjustment curve.
+        Will override `tolerance_ticks` if provided!
     predict_method : str, optional
         Which method to call to obtain predictions out of the given model.
+        Use `predict_method="__call__"` for a callable predictor, or the default
+        `predict_method="predict_proba"` for a predictor with sklearn interface.
     n_jobs : int, optional
         Number of parallel jobs to use, if omitted will use `os.cpu_count()-1`.
 
@@ -210,7 +221,9 @@ def _func_call(tol: float):
                 **kwargs)
 
         except Exception as exc:
-            logging.error(f"FAILED fit_relaxed_postprocessing with `tolerance={tol}`: {exc}")
+            logging.error(
+                f"FAILED `fit_and_evaluate_postprocessing(.)` with `tolerance={tol}`; "
+                f"{''.join(traceback.TracebackException.from_exception(exc).format())}")
 
         return {}   # return empty dictionary
 
@@ -220,7 +233,26 @@ def _func_call(tol: float):
     logging.info(f"Using `n_jobs={n_jobs}` to compute adjustment curve.")
 
     from tqdm.auto import tqdm
-    tolerances = tolerance_ticks if tolerance_ticks is not None else np.arange(0.0, 1.0, tolerance_tick_step)
+    # Use `tolerance_tick_step` kwarg
+    if tolerance_tick_step is not None:
+        tolerances = np.arange(0.0, 1.0, tolerance_tick_step)
+
+        if (
+            # > `tolerance_ticks` was provided
+            tolerance_ticks is not None
+            # > and `tolerance_ticks` was set to a non-default value
+            and not arrays_are_equal(tolerance_ticks, DEFAULT_TOLERANCE_TICKS)
+        ):
+            logging.error("Please provide only one of `tolerance_ticks` and `tolerance_tick_step`.")
+
+        logging.warning("Use of `tolerance_tick_step` overrides the use of `tolerance_ticks`.")
+
+    # Use `tolerance_ticks` kwarg
+    else:
+        tolerances = tolerance_ticks
+
+    # Log tolerances used
+    logging.info(f"Computing postprocessing for the following constraint tolerances: {tolerances}.")
 
     with ThreadPoolExecutor(max_workers=n_jobs) as executor:
         func_call_results = list(

diff --git a/error_parity/plotting.py b/error_parity/plotting.py
@@ -4,6 +4,7 @@
 import pandas as pd
 
 import seaborn as sns
+import matplotlib.figure
 from matplotlib import pyplot as plt
 
 from .pareto_curve import compute_inner_and_outer_adjustment_ci, get_envelope_of_postprocessing_frontier
@@ -17,20 +18,47 @@ def plot_polygon_edges(polygon_points, **kwargs):
 
 
 def plot_postprocessing_solution(
-    *,
-    postprocessed_clf: RelaxedThresholdOptimizer,
-    plot_roc_curves: bool = False,
-    plot_roc_hulls: bool = True,
-    plot_group_optima: bool = True,
-    plot_group_triangulation: bool = True,
-    plot_global_optimum: bool = True,
-    plot_diagonal: bool = True,
-    plot_relaxation: bool = False,
-    group_name_map: dict = None,
-    figure=None,
-    **fig_kwargs,
-):
-    """Plots the group-specific solutions found by this predictor."""
+        *,
+        postprocessed_clf: RelaxedThresholdOptimizer,
+        plot_roc_curves: bool = False,
+        plot_roc_hulls: bool = True,
+        plot_group_optima: bool = True,
+        plot_group_triangulation: bool = True,
+        plot_global_optimum: bool = True,
+        plot_diagonal: bool = True,
+        plot_relaxation: bool = False,
+        group_name_map: dict = None,
+        figure: matplotlib.figure.Figure = None,
+        **fig_kwargs,
+    ):
+    """Plots the group-specific solutions found for this predictor.
+
+    Parameters
+    ----------
+    postprocessed_clf : RelaxedThresholdOptimizer
+        A postprocessed classifier already fitted on some data.
+    plot_roc_curves : bool, optional
+        Whether to plot the global ROC curves, by default False.
+    plot_roc_hulls : bool, optional
+        Whether to plot the global ROC convex hulls, by default True.
+    plot_group_optima : bool, optional
+        Whether to plot the group-specific optima, by default True.
+    plot_group_triangulation : bool, optional
+        Whether to plot the triangulation of a group-specific solution, when
+        such triangulation is needed to achieve a target ROC point.
+    plot_global_optimum : bool, optional
+        Whether to plot the global optimum ROC point, by default True.
+    plot_diagonal : bool, optional
+        Whether to plot the ROC diagonal with FPR=TPR, by default True.
+    plot_relaxation : bool, optional
+        Whether to plot the constraint relaxation bounding box, by default False.
+    group_name_map : dict, optional
+        A dictionary mapping each group's value to an appropriate name to show
+        in the plot legend, by default None.
+    figure : matplotlib.figure.Figure, optional
+        A matplotlib figure to use when plotting, by default will generate a new
+        figure for plotting.
+    """
     postprocessed_clf._check_fit_status()
 
     from matplotlib import pyplot as plt
@@ -44,7 +72,8 @@ def plot_postprocessing_solution(
     global_color = palette[0]
     all_group_colors = palette[1:]
 
-    fig = figure if figure is not None else plt.figure(**fig_kwargs)
+    if figure is None:
+        figure = plt.figure(**fig_kwargs)
 
     # For each group `idx`
     for idx in range(n_groups):
@@ -175,31 +204,75 @@ def plot_postprocessing_solution(
 
 def plot_postprocessing_frontier(
         postproc_results_df: pd.DataFrame,
+        *,
         perf_metric: str,
         disp_metric: str,
         show_data_type: str,
-        model_name: str,
         constant_clf_perf: float,
+        model_name: str = None,
         color: str = "black",
     ):
-    """Helper to plot the given post-processing frontier results with confidence intervals."""
+    """Helper to plot the given post-processing frontier results.
+
+    Will use bootstrapped results if available, including plotting confidence
+    intervals.
+
+    Parameters
+    ----------
+    postproc_results_df : pd.DataFrame
+        The DataFrame containing postprocessing results.
+        This should be the output of a call to `compute_postprocessing_curve(.)`.
+    perf_metric : str
+        Which performance metric to plot (horizontal axis).
+    disp_metric : str
+        Which disparity metric to plot (vertical axis).
+    show_data_type : str
+        The type of data to show results for; usually this will be "test".
+    constant_clf_perf : float
+        Performance achieved by the constant classifier; this is the point of
+        lowest performance and lowest disparity achievable by postprocessing.
+    model_name : str, optional
+        Shown in the plot legend. Name of the model to be postprocessed.
+    color : str, optional
+        Which color to use for plotting the postprocessing curve, by default "black".
+    """
+
+    # Get relevant column names
+    perf_col = f"{perf_metric}_mean_{show_data_type}"
+    disp_col = f"{disp_metric}_mean_{show_data_type}"
+
+    # Check if bootstrap means are available
+    has_bootstrap_results = perf_col in postproc_results_df.columns
+
+    if not has_bootstrap_results:
+        perf_col = f"{perf_metric}_{show_data_type}"
+        disp_col = f"{disp_metric}_{show_data_type}"
+
+    assert perf_col in postproc_results_df.columns, (
+        f"Could not find the column '{perf_col}' for the perf. metric "
+        f"'{perf_metric}' on data type '{show_data_type}'.")
+    assert disp_col in postproc_results_df.columns, (
+        f"Could not find the column '{disp_col}' for the disp. metric "
+        f"'{disp_metric}' on data type '{show_data_type}'.")
+
     # Get envelope of postprocessing adjustment frontier
     postproc_frontier = get_envelope_of_postprocessing_frontier(
         postproc_results_df,
-        perf_col=f"{perf_metric}_mean_{show_data_type}",
-        disp_col=f"{disp_metric}_mean_{show_data_type}",
+        perf_col=perf_col,
+        disp_col=disp_col,
         constant_clf_perf=constant_clf_perf,
     )
 
     # Get inner and outer confidence intervals
-    postproc_frontier_xticks, interior_frontier_yticks, outer_frontier_yticks = \
-        compute_inner_and_outer_adjustment_ci(
-            postproc_results_df,
-            perf_metric=perf_metric,
-            disp_metric=disp_metric,
-            data_type=show_data_type,
-            constant_clf_perf=constant_clf_perf,
-        )
+    if has_bootstrap_results:
+        postproc_frontier_xticks, interior_frontier_yticks, outer_frontier_yticks = \
+            compute_inner_and_outer_adjustment_ci(
+                postproc_results_df,
+                perf_metric=perf_metric,
+                disp_metric=disp_metric,
+                data_type=show_data_type,
+                constant_clf_perf=constant_clf_perf,
+            )
 
     # Draw upper right portion of the line (dominated but not feasible)
     upper_right_frontier = np.array([
@@ -219,19 +292,23 @@ def plot_postprocessing_frontier(
     sns.lineplot(
         x=postproc_frontier[:, 0],
         y=postproc_frontier[:, 1],
-        label=f"post-processing of {model_name}",
+        label=(
+            "post-processing" if model_name is None
+            else f"post-processing of {model_name}"
+        ),
         linestyle="-.",
         color=color,
     )
 
     # Draw confidence intervals (shaded area)
-    ax = plt.gca()
-    ax.fill_between(
-        x=postproc_frontier_xticks,
-        y1=interior_frontier_yticks,
-        y2=outer_frontier_yticks,
-        interpolate=True,
-        color=color,
-        alpha=0.1,
-        label=r"$95\%$ conf. interv.",
-    )
+    if has_bootstrap_results:
+        ax = plt.gca()
+        ax.fill_between(
+            x=postproc_frontier_xticks,
+            y1=interior_frontier_yticks,
+            y2=outer_frontier_yticks,
+            interpolate=True,
+            color=color,
+            alpha=0.1,
+            label=r"$95\%$ conf. interv.",
+        )