implement different l-p norm relaxations

Co-authored-by: Kajetan Schweighofer <[email protected]>
socialfoundations · Apr 23, 2024 · d78b46b · d78b46b
1 parent e68f613
commit d78b46b
Show file tree

Hide file tree

Showing 8 changed files with 245 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# error-parity
+# error-parity    <!-- omit in toc -->
 
 ![Tests status](https://github.com/socialfoundations/error-parity/actions/workflows/python-tests.yml/badge.svg)
 ![PyPI status](https://github.com/socialfoundations/error-parity/actions/workflows/python-publish.yml/badge.svg)
@@ -20,6 +20,14 @@ which can be useful to compare ML models at equal fairness levels.
 
 Package documentation available [here](https://socialfoundations.github.io/error-parity/).
 
+Contents:
+- [Installing](#installing)
+- [Getting started](#getting-started)
+- [How it works](#how-it-works)
+- [Fairness constraints](#fairness-constraints)
+  - [Equalized odds relaxations](#equalized-odds-relaxations)
+- [Citing](#citing)
+
 
 ## Installing
 
@@ -65,13 +73,13 @@ y_pred_test = fair_clf(X=X_test, group=group_test)
 
 Given a callable score-based predictor (i.e., `y_pred = predictor(X)`), and some `(X, Y, S)` data to fit, `RelaxedThresholdOptimizer` will:
 1. Compute group-specific ROC curves and their convex hulls;
-2. Compute the `r`-relaxed optimal solution for the chosen fairness criterion (using [cvxpy](https://www.cvxpy.org));
+2. Compute the $r$-relaxed optimal solution for the chosen fairness criterion (using [cvxpy](https://www.cvxpy.org));
 3. Find the set of group-specific binary classifiers that match the optimal solution found.
     - each group-specific classifier is made up of (possibly randomized) group-specific thresholds over the given predictor;
     - if a group's ROC point is in the interior of its ROC curve, partial randomization of its predictions may be necessary.
 
 
-## Available fairness constraints
+## Fairness constraints
 
 You can choose specific fairness constraints via the `constraint` key-word argument to
 the `RelaxedThresholdOptimizer` constructor.
@@ -83,6 +91,7 @@ Currently implemented fairness constraints:
   - i.e., equal group-specific TPR and FPR;
   - use `constraint="equalized_odds"`;
   - $\max_{a, b \in \mathcal{S}} \max_{y \in \{0, 1\}} \left( \mathbb{P}[\hat{Y}=1 | S=a, Y=y] - \mathbb{P}[\hat{Y}=1 | S=b, Y=y] \right) \leq r$
+  - [other relaxations available](#equalized-odds-relaxations) by changing the `l_p_norm` parameter;
 - [x] equal opportunity;
   - i.e., equal group-specific TPR;
   - use `constraint="true_positive_rate_parity"`;
@@ -96,7 +105,26 @@ Currently implemented fairness constraints:
   - use `constraint="demographic_parity"`;
   - $\max_{a, b \in \mathcal{S}} \left( \mathbb{P}[\hat{Y}=1 | S=a] - \mathbb{P}[\hat{Y}=1 | S=b] \right) \leq r$
 
-We welcome community contributions for [cvxpy](https://www.cvxpy.org) implementations of other fairness constraints.
+> We welcome community contributions for [cvxpy](https://www.cvxpy.org) implementations of other fairness constraints.
+
+### Equalized odds relaxations
+
+When using `constraint="equalized_odds"` (the default), different relaxations
+can be chosen by altering the `l_p_norm` parameter.
+
+A few useful values:
+- `l_p_norm="inf"` **[default]** evaluates equalized-odds as the maximum
+between group-wise TPR and FPR differences (as shown above).
+- `l_p_norm=1` evaluates equalized-odds as the average of the
+absolute difference in group-wise TPR and FPR.
+  - this is also known as `average_abs_odds_difference`.
+- `l_p_norm=p` for any other positive integer $p$: computes the distance between group-wise ROC
+points using the specified l-p norm.
+
+The actual equalized odds constraint implemented is:
+
+$\max_{a, b \in \mathcal{S}} \left\lVert ROC_a - ROC_b \right\rVert_p \leq r,$ where $ROC_a$ is the ROC point of group $S=a$ and $ROC_b$ is the ROC point of group $S=b$.
+
 
 
 ## Citing

diff --git a/error_parity/_version.py b/error_parity/_version.py
@@ -1,3 +1,3 @@
 """File to keep the package version in one place."""
-__version__ = "0.3.10"
+__version__ = "0.3.11"
 __version_info__ = tuple(__version__.split("."))
diff --git a/error_parity/cvxpy_utils.py b/error_parity/cvxpy_utils.py
@@ -237,6 +237,7 @@ def compute_fair_optimum(   # noqa: C901
     global_prevalence: float,
     false_positive_cost: float = 1.0,
     false_negative_cost: float = 1.0,
+    l_p_norm: int | str = np.inf,
 ) -> tuple[np.ndarray, np.ndarray]:
     """Computes the solution to finding the optimal fair (equal odds) classifier.
 
@@ -275,6 +276,15 @@ def compute_fair_optimum(   # noqa: C901
     false_negative_cost : float, optional
         The cost of a FALSE NEGATIVE error, by default 1.
 
+    l_p_norm : int | str, optional
+        The type of l-p norm to use when computing the distance between two ROC
+        points. Used only for the "equalized_odds" constraint. By default uses
+        `np.inf` (l-infinity distance): the maximum between groups' TPR and FPR
+        differences. Using `l_p_norm=1` will correspond to the
+        `average_abs_odds_difference`.
+        See the following link for more information on this parameter:
+        https://www.cvxpy.org/api_reference/cvxpy.atoms.other_atoms.html#norm
+
     Returns
     -------
     (groupwise_roc_points, global_roc_point) : tuple[np.ndarray, np.ndarray]
@@ -314,10 +324,14 @@ def compute_fair_optimum(   # noqa: C901
     # NOTE: feature request: compatibility with multiple constraints simultaneously
 
     # If "equalized_odds"
-    # > i.e., constrain l-inf distance between any two groups' ROCs being less than `tolerance`
+    # - i.e., l-p distance between any two groups' ROC points must be less than `tolerance`;
+    # - DEFAULT: l-infinity distance (max distance between any two points in the ROC curve);
     if fairness_constraint == "equalized_odds":
         constraints += [
-            cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j])
+            cp.norm(
+                groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j],
+                p=l_p_norm,
+            )
             <= tolerance
             for i, j in product(range(n_groups), range(n_groups))
             if i < j

diff --git a/error_parity/evaluation.py b/error_parity/evaluation.py
@@ -221,19 +221,39 @@ def group_metric_name(metric_name, group_name):
         diff_name = f"{metric_name}_diff"
         results[diff_name] = max(curr_metric_results) - min(curr_metric_results)
 
-    # Equal odds: maximum constraint violation for TPR and FPR equality
-    # i.e., the smallest ratio
+    # ** Equalized odds **
+    # default value: use maximum constraint violation for TPR and FPR equality
     results["equalized_odds_ratio"] = min(
         results["fnr_ratio"],
         results["fpr_ratio"],
     )
-
-    # or the largest absolute difference
     results["equalized_odds_diff"] = max(
         results["tpr_diff"],  # same as FNR diff
         results["fpr_diff"],  # same as TNR diff
     )
 
+    # Evaluate equalized odds using other l-p norms
+    # (default value corresponds to l-infinity norm)
+    available_norms = [1, 2, np.inf]
+    for norm in available_norms:
+        metric_name = f"equalized_odds_diff_l{norm}"
+        results[metric_name] = max(
+            np.linalg.norm(
+                [
+                    # TPR diff
+                    groupwise_metrics[group_metric_name("tpr", group_a)]
+                    - groupwise_metrics[group_metric_name("tpr", group_b)],
+
+                    # FPR diff
+                    groupwise_metrics[group_metric_name("fpr", group_a)]
+                    - groupwise_metrics[group_metric_name("fpr", group_b)],
+                ],
+                ord=norm,
+            )
+            for group_a, group_b in product(unique_groups, unique_groups)
+            if group_a < group_b
+        )
+
     # Optionally, return group-wise metrics as well
     if return_groupwise_metrics:
         results.update(groupwise_metrics)

diff --git a/error_parity/threshold_optimizer.py b/error_parity/threshold_optimizer.py
@@ -39,9 +39,9 @@ def __init__(
         tolerance: float = 0.0,
         false_pos_cost: float = 1.0,
         false_neg_cost: float = 1.0,
+        l_p_norm: int = np.inf,
         max_roc_ticks: int = 1000,
         seed: int = 42,
-        # distance: str = 'max',    # TODO: add option to use l_1 or l_inf distances
     ):
         """Initializes the relaxed equal odds wrapper.
 
@@ -60,6 +60,11 @@ def __init__(
             The cost of a FALSE POSITIVE error, by default 1.0.
         false_neg_cost : float, optional
             The cost of a FALSE NEGATIVE error, by default 1.0.
+        l_p_norm : int, optional
+            The l-p norm to use when computing distances between group ROC points.
+            Used only for the "equalized odds" constraint (different l-p norms
+            lead to different equalized-odds relaxations).
+            By default np.inf, which corresponds to the l-inf norm.
         max_roc_ticks : int, optional
             The maximum number of ticks (points) in each group's ROC, when
             computing the optimal fair classifier, by default 1000.
@@ -73,13 +78,25 @@ def __init__(
         self.tolerance = tolerance
         self.false_pos_cost = false_pos_cost
         self.false_neg_cost = false_neg_cost
+        self.l_p_norm = l_p_norm
         self.max_roc_ticks = max_roc_ticks
         self.seed = seed
 
         # Validate constraint
         if self.constraint not in ALL_CONSTRAINTS:
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
+        if self.l_p_norm != np.inf and self.constraint != "equalized_odds":
+            raise ValueError(
+                f"l-p norm is only supported for the 'equalized_odds' constraint. "
+                f"Got constraint='{self.constraint}' and l_p_norm={self.l_p_norm}."
+            )
+
+        if not (isinstance(self.l_p_norm, int) or self.l_p_norm == np.inf):
+            raise ValueError(
+                f"Invalid l-p norm={self.l_p_norm}. Must be an integer or np.inf."
+            )
+
         # Initialize instance variables
         self._groupwise_roc_data: dict = None
         self._groupwise_roc_hulls: dict = None
@@ -154,7 +171,11 @@ def cost(
             false_neg_cost=false_neg_cost or self.false_neg_cost,
         )
 
-    def constraint_violation(self, constraint_name: str = None) -> float:
+    def constraint_violation(
+        self,
+        constraint_name: str = None,
+        l_p_norm: int = None,
+    ) -> float:
         """Theoretical constraint violation of the LP solution found.
 
         Parameters
@@ -163,26 +184,39 @@ def constraint_violation(self, constraint_name: str = None) -> float:
             Optionally, may provide another constraint name that will be used
             instead of this classifier's self.constraint;
 
+        l_p_norm : int, optional
+            Which l-p norm to use when computing distances between group ROC
+            points. Used only for the "equalized odds" constraint.
+
         Returns
         -------
         float
             The fairness constraint violation.
         """
         self._check_fit_status()
 
-        if constraint_name is not None:
+        # Warn if provided a different constraint
+        constraint_name = constraint_name or self.constraint
+        if constraint_name != self.constraint:
             logging.warning(
                 f"Calculating constraint violation for {constraint_name} constraint;\n"
                 f"Note: this classifier was fitted with a {self.constraint} constraint;"
             )
-        else:
-            constraint_name = self.constraint
 
+        # Warn if provided a different l-p norm
+        l_p_norm = l_p_norm or self.l_p_norm
+        if l_p_norm != self.l_p_norm:
+            logging.warning(
+                f"Calculating constraint violation with l-{l_p_norm} norm;\n"
+                f"Note: this classifier was fitted with l-{self.l_p_norm} norm;"
+            )
+
+        # Validate constraint
         if constraint_name not in ALL_CONSTRAINTS:
             raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE)
 
         if constraint_name == "equalized_odds":
-            return self.equalized_odds_violation()
+            return self.equalized_odds_violation(l_p_norm=l_p_norm)
 
         elif constraint_name.endswith("rate_parity"):
             constraint_to_error_type = {
@@ -230,30 +264,46 @@ def error_rate_parity_constraint_violation(self, error_type: str) -> float:
 
         roc_idx_of_interest = 0 if error_type == "fp" else 1
 
-        return self._max_l_inf_between_points(
+        return self._max_l_p_between_points(
             points=[
                 np.reshape(     # NOTE: must pass an array object, not scalars
                     roc_point[roc_idx_of_interest],  # use only FPR or TPR (whichever was constrained)
                     newshape=(1,))
                 for roc_point in self.groupwise_roc_points
             ],
+            l_p_norm=np.inf,
         )
 
-    def equalized_odds_violation(self) -> float:
+    def equalized_odds_violation(self, l_p_norm: int = None) -> float:
         """Computes the theoretical violation of the equal odds constraint
         (i.e., the maximum l-inf distance between the ROC point of any pair
         of groups).
 
+        Parameters
+        ----------
+        l_p_norm : int, optional
+            Which l-p norm to use when computing distances between group ROC
+            points.
+
         Returns
         -------
         float
             The equal-odds constraint violation.
         """
         self._check_fit_status()
 
-        # Compute l-inf distance between each pair of groups
-        return self._max_l_inf_between_points(
+        # Warn if provided a different l-p norm
+        l_p_norm = l_p_norm or self.l_p_norm
+        if l_p_norm != self.l_p_norm:
+            logging.warning(
+                f"Calculating constraint violation with l-{l_p_norm} norm;\n"
+                f"Note: this classifier was fitted with l-{self.l_p_norm} norm;"
+            )
+
+        # Compute l-p distance between each pair of groups
+        return self._max_l_p_between_points(
             points=self.groupwise_roc_points,
+            l_p_norm=l_p_norm,
         )
 
     def demographic_parity_violation(self) -> float:
@@ -270,7 +320,7 @@ def demographic_parity_violation(self) -> float:
         self._check_fit_status()
 
         # Compute groups' PPR (positive prediction rate)
-        return self._max_l_inf_between_points(
+        return self._max_l_p_between_points(
             points=[
                 # NOTE: must pass an array object, not scalars
                 np.reshape(
@@ -279,16 +329,20 @@ def demographic_parity_violation(self) -> float:
                 )
                 for (group_fpr, group_tpr), group_prev in zip(self.groupwise_roc_points, self.groupwise_prevalence)
             ],
+            l_p_norm=np.inf,
         )
 
     @staticmethod
-    def _max_l_inf_between_points(points: list[float | np.ndarray]) -> float:
+    def _max_l_p_between_points(
+        points: list[float | np.ndarray],
+        l_p_norm: int,
+    ) -> float:
         # Number of points (should correspond to the number of groups)
         n_points = len(points)
 
         # Compute l-inf distance between each pair of groups
         l_inf_constraint_violation = [
-            (np.linalg.norm(points[i] - points[j], ord=np.inf), (i, j))
+            (np.linalg.norm(points[i] - points[j], ord=l_p_norm), (i, j))
             for i, j in product(range(n_points), range(n_points))
             if i < j
         ]
@@ -423,6 +477,7 @@ def fit(
             global_prevalence=self.global_prevalence,
             false_positive_cost=self.false_pos_cost,
             false_negative_cost=self.false_neg_cost,
+            l_p_norm=self.l_p_norm,
         )
 
         # Construct each group-specific classifier

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -38,6 +38,12 @@ def fairness_constraint(request) -> float:
     return request.param
 
 
+@pytest.fixture(params=[1, 2, np.inf])
+def l_p_norm(request) -> int:
+    """Fixture for the l-p norm to test."""
+    return request.param
+
+
 @pytest.fixture(params=[1_000, 10_000, 100_000])
 def num_samples(request) -> int:
     return request.param
@@ -48,6 +54,24 @@ def y_pred_scores(num_samples: int, rng) -> np.ndarray:
     return rng.random(size=num_samples)
 
 
+# TODO: eventually add other predictors as different fixture instantiations (?)
+@pytest.fixture
+def predictor(y_pred_scores: np.ndarray):
+    """Predictor function: predicts the generated scores from the sample indices."""
+    def predictor_func(idx):
+        return y_pred_scores[idx]
+
+    return predictor_func
+
+
+@pytest.fixture
+def X_features(num_samples: int) -> np.ndarray:
+    """The sample features are the sample indices.
+    This must match the `predictor` fixture functionality.
+    """
+    return np.arange(num_samples)
+
+
 @pytest.fixture(
     params=[
         0.2,