diff --git a/README.md b/README.md index 500cf06..9cf4b4c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# error-parity +# error-parity ![Tests status](https://github.com/socialfoundations/error-parity/actions/workflows/python-tests.yml/badge.svg) ![PyPI status](https://github.com/socialfoundations/error-parity/actions/workflows/python-publish.yml/badge.svg) @@ -20,6 +20,14 @@ which can be useful to compare ML models at equal fairness levels. Package documentation available [here](https://socialfoundations.github.io/error-parity/). +Contents: +- [Installing](#installing) +- [Getting started](#getting-started) +- [How it works](#how-it-works) +- [Fairness constraints](#fairness-constraints) + - [Equalized odds relaxations](#equalized-odds-relaxations) +- [Citing](#citing) + ## Installing @@ -65,13 +73,13 @@ y_pred_test = fair_clf(X=X_test, group=group_test) Given a callable score-based predictor (i.e., `y_pred = predictor(X)`), and some `(X, Y, S)` data to fit, `RelaxedThresholdOptimizer` will: 1. Compute group-specific ROC curves and their convex hulls; -2. Compute the `r`-relaxed optimal solution for the chosen fairness criterion (using [cvxpy](https://www.cvxpy.org)); +2. Compute the $r$-relaxed optimal solution for the chosen fairness criterion (using [cvxpy](https://www.cvxpy.org)); 3. Find the set of group-specific binary classifiers that match the optimal solution found. - each group-specific classifier is made up of (possibly randomized) group-specific thresholds over the given predictor; - if a group's ROC point is in the interior of its ROC curve, partial randomization of its predictions may be necessary. -## Available fairness constraints +## Fairness constraints You can choose specific fairness constraints via the `constraint` key-word argument to the `RelaxedThresholdOptimizer` constructor. @@ -83,6 +91,7 @@ Currently implemented fairness constraints: - i.e., equal group-specific TPR and FPR; - use `constraint="equalized_odds"`; - $\max_{a, b \in \mathcal{S}} \max_{y \in \{0, 1\}} \left( \mathbb{P}[\hat{Y}=1 | S=a, Y=y] - \mathbb{P}[\hat{Y}=1 | S=b, Y=y] \right) \leq r$ + - [other relaxations available](#equalized-odds-relaxations) by changing the `l_p_norm` parameter; - [x] equal opportunity; - i.e., equal group-specific TPR; - use `constraint="true_positive_rate_parity"`; @@ -96,7 +105,26 @@ Currently implemented fairness constraints: - use `constraint="demographic_parity"`; - $\max_{a, b \in \mathcal{S}} \left( \mathbb{P}[\hat{Y}=1 | S=a] - \mathbb{P}[\hat{Y}=1 | S=b] \right) \leq r$ -We welcome community contributions for [cvxpy](https://www.cvxpy.org) implementations of other fairness constraints. +> We welcome community contributions for [cvxpy](https://www.cvxpy.org) implementations of other fairness constraints. + +### Equalized odds relaxations + +When using `constraint="equalized_odds"` (the default), different relaxations +can be chosen by altering the `l_p_norm` parameter. + +A few useful values: +- `l_p_norm="inf"` **[default]** evaluates equalized-odds as the maximum +between group-wise TPR and FPR differences (as shown above). +- `l_p_norm=1` evaluates equalized-odds as the average of the +absolute difference in group-wise TPR and FPR. + - this is also known as `average_abs_odds_difference`. +- `l_p_norm=p` for any other positive integer $p$: computes the distance between group-wise ROC +points using the specified l-p norm. + +The actual equalized odds constraint implemented is: + +$\max_{a, b \in \mathcal{S}} \left\lVert ROC_a - ROC_b \right\rVert_p \leq r,$ where $ROC_a$ is the ROC point of group $S=a$ and $ROC_b$ is the ROC point of group $S=b$. + ## Citing diff --git a/error_parity/_version.py b/error_parity/_version.py index bcb0a64..783a634 100644 --- a/error_parity/_version.py +++ b/error_parity/_version.py @@ -1,3 +1,3 @@ """File to keep the package version in one place.""" -__version__ = "0.3.10" +__version__ = "0.3.11" __version_info__ = tuple(__version__.split(".")) diff --git a/error_parity/cvxpy_utils.py b/error_parity/cvxpy_utils.py index 92a7ec9..364abe9 100644 --- a/error_parity/cvxpy_utils.py +++ b/error_parity/cvxpy_utils.py @@ -237,6 +237,7 @@ def compute_fair_optimum( # noqa: C901 global_prevalence: float, false_positive_cost: float = 1.0, false_negative_cost: float = 1.0, + l_p_norm: int | str = np.inf, ) -> tuple[np.ndarray, np.ndarray]: """Computes the solution to finding the optimal fair (equal odds) classifier. @@ -275,6 +276,15 @@ def compute_fair_optimum( # noqa: C901 false_negative_cost : float, optional The cost of a FALSE NEGATIVE error, by default 1. + l_p_norm : int | str, optional + The type of l-p norm to use when computing the distance between two ROC + points. Used only for the "equalized_odds" constraint. By default uses + `np.inf` (l-infinity distance): the maximum between groups' TPR and FPR + differences. Using `l_p_norm=1` will correspond to the + `average_abs_odds_difference`. + See the following link for more information on this parameter: + https://www.cvxpy.org/api_reference/cvxpy.atoms.other_atoms.html#norm + Returns ------- (groupwise_roc_points, global_roc_point) : tuple[np.ndarray, np.ndarray] @@ -314,10 +324,14 @@ def compute_fair_optimum( # noqa: C901 # NOTE: feature request: compatibility with multiple constraints simultaneously # If "equalized_odds" - # > i.e., constrain l-inf distance between any two groups' ROCs being less than `tolerance` + # - i.e., l-p distance between any two groups' ROC points must be less than `tolerance`; + # - DEFAULT: l-infinity distance (max distance between any two points in the ROC curve); if fairness_constraint == "equalized_odds": constraints += [ - cp.norm_inf(groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j]) + cp.norm( + groupwise_roc_points_vars[i] - groupwise_roc_points_vars[j], + p=l_p_norm, + ) <= tolerance for i, j in product(range(n_groups), range(n_groups)) if i < j diff --git a/error_parity/evaluation.py b/error_parity/evaluation.py index 34481bb..00f749f 100644 --- a/error_parity/evaluation.py +++ b/error_parity/evaluation.py @@ -221,19 +221,39 @@ def group_metric_name(metric_name, group_name): diff_name = f"{metric_name}_diff" results[diff_name] = max(curr_metric_results) - min(curr_metric_results) - # Equal odds: maximum constraint violation for TPR and FPR equality - # i.e., the smallest ratio + # ** Equalized odds ** + # default value: use maximum constraint violation for TPR and FPR equality results["equalized_odds_ratio"] = min( results["fnr_ratio"], results["fpr_ratio"], ) - - # or the largest absolute difference results["equalized_odds_diff"] = max( results["tpr_diff"], # same as FNR diff results["fpr_diff"], # same as TNR diff ) + # Evaluate equalized odds using other l-p norms + # (default value corresponds to l-infinity norm) + available_norms = [1, 2, np.inf] + for norm in available_norms: + metric_name = f"equalized_odds_diff_l{norm}" + results[metric_name] = max( + np.linalg.norm( + [ + # TPR diff + groupwise_metrics[group_metric_name("tpr", group_a)] + - groupwise_metrics[group_metric_name("tpr", group_b)], + + # FPR diff + groupwise_metrics[group_metric_name("fpr", group_a)] + - groupwise_metrics[group_metric_name("fpr", group_b)], + ], + ord=norm, + ) + for group_a, group_b in product(unique_groups, unique_groups) + if group_a < group_b + ) + # Optionally, return group-wise metrics as well if return_groupwise_metrics: results.update(groupwise_metrics) diff --git a/error_parity/threshold_optimizer.py b/error_parity/threshold_optimizer.py index ec4a5cf..4c095f1 100644 --- a/error_parity/threshold_optimizer.py +++ b/error_parity/threshold_optimizer.py @@ -39,9 +39,9 @@ def __init__( tolerance: float = 0.0, false_pos_cost: float = 1.0, false_neg_cost: float = 1.0, + l_p_norm: int = np.inf, max_roc_ticks: int = 1000, seed: int = 42, - # distance: str = 'max', # TODO: add option to use l_1 or l_inf distances ): """Initializes the relaxed equal odds wrapper. @@ -60,6 +60,11 @@ def __init__( The cost of a FALSE POSITIVE error, by default 1.0. false_neg_cost : float, optional The cost of a FALSE NEGATIVE error, by default 1.0. + l_p_norm : int, optional + The l-p norm to use when computing distances between group ROC points. + Used only for the "equalized odds" constraint (different l-p norms + lead to different equalized-odds relaxations). + By default np.inf, which corresponds to the l-inf norm. max_roc_ticks : int, optional The maximum number of ticks (points) in each group's ROC, when computing the optimal fair classifier, by default 1000. @@ -73,6 +78,7 @@ def __init__( self.tolerance = tolerance self.false_pos_cost = false_pos_cost self.false_neg_cost = false_neg_cost + self.l_p_norm = l_p_norm self.max_roc_ticks = max_roc_ticks self.seed = seed @@ -80,6 +86,17 @@ def __init__( if self.constraint not in ALL_CONSTRAINTS: raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE) + if self.l_p_norm != np.inf and self.constraint != "equalized_odds": + raise ValueError( + f"l-p norm is only supported for the 'equalized_odds' constraint. " + f"Got constraint='{self.constraint}' and l_p_norm={self.l_p_norm}." + ) + + if not (isinstance(self.l_p_norm, int) or self.l_p_norm == np.inf): + raise ValueError( + f"Invalid l-p norm={self.l_p_norm}. Must be an integer or np.inf." + ) + # Initialize instance variables self._groupwise_roc_data: dict = None self._groupwise_roc_hulls: dict = None @@ -154,7 +171,11 @@ def cost( false_neg_cost=false_neg_cost or self.false_neg_cost, ) - def constraint_violation(self, constraint_name: str = None) -> float: + def constraint_violation( + self, + constraint_name: str = None, + l_p_norm: int = None, + ) -> float: """Theoretical constraint violation of the LP solution found. Parameters @@ -163,6 +184,10 @@ def constraint_violation(self, constraint_name: str = None) -> float: Optionally, may provide another constraint name that will be used instead of this classifier's self.constraint; + l_p_norm : int, optional + Which l-p norm to use when computing distances between group ROC + points. Used only for the "equalized odds" constraint. + Returns ------- float @@ -170,19 +195,28 @@ def constraint_violation(self, constraint_name: str = None) -> float: """ self._check_fit_status() - if constraint_name is not None: + # Warn if provided a different constraint + constraint_name = constraint_name or self.constraint + if constraint_name != self.constraint: logging.warning( f"Calculating constraint violation for {constraint_name} constraint;\n" f"Note: this classifier was fitted with a {self.constraint} constraint;" ) - else: - constraint_name = self.constraint + # Warn if provided a different l-p norm + l_p_norm = l_p_norm or self.l_p_norm + if l_p_norm != self.l_p_norm: + logging.warning( + f"Calculating constraint violation with l-{l_p_norm} norm;\n" + f"Note: this classifier was fitted with l-{self.l_p_norm} norm;" + ) + + # Validate constraint if constraint_name not in ALL_CONSTRAINTS: raise ValueError(NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE) if constraint_name == "equalized_odds": - return self.equalized_odds_violation() + return self.equalized_odds_violation(l_p_norm=l_p_norm) elif constraint_name.endswith("rate_parity"): constraint_to_error_type = { @@ -230,20 +264,27 @@ def error_rate_parity_constraint_violation(self, error_type: str) -> float: roc_idx_of_interest = 0 if error_type == "fp" else 1 - return self._max_l_inf_between_points( + return self._max_l_p_between_points( points=[ np.reshape( # NOTE: must pass an array object, not scalars roc_point[roc_idx_of_interest], # use only FPR or TPR (whichever was constrained) newshape=(1,)) for roc_point in self.groupwise_roc_points ], + l_p_norm=np.inf, ) - def equalized_odds_violation(self) -> float: + def equalized_odds_violation(self, l_p_norm: int = None) -> float: """Computes the theoretical violation of the equal odds constraint (i.e., the maximum l-inf distance between the ROC point of any pair of groups). + Parameters + ---------- + l_p_norm : int, optional + Which l-p norm to use when computing distances between group ROC + points. + Returns ------- float @@ -251,9 +292,18 @@ def equalized_odds_violation(self) -> float: """ self._check_fit_status() - # Compute l-inf distance between each pair of groups - return self._max_l_inf_between_points( + # Warn if provided a different l-p norm + l_p_norm = l_p_norm or self.l_p_norm + if l_p_norm != self.l_p_norm: + logging.warning( + f"Calculating constraint violation with l-{l_p_norm} norm;\n" + f"Note: this classifier was fitted with l-{self.l_p_norm} norm;" + ) + + # Compute l-p distance between each pair of groups + return self._max_l_p_between_points( points=self.groupwise_roc_points, + l_p_norm=l_p_norm, ) def demographic_parity_violation(self) -> float: @@ -270,7 +320,7 @@ def demographic_parity_violation(self) -> float: self._check_fit_status() # Compute groups' PPR (positive prediction rate) - return self._max_l_inf_between_points( + return self._max_l_p_between_points( points=[ # NOTE: must pass an array object, not scalars np.reshape( @@ -279,16 +329,20 @@ def demographic_parity_violation(self) -> float: ) for (group_fpr, group_tpr), group_prev in zip(self.groupwise_roc_points, self.groupwise_prevalence) ], + l_p_norm=np.inf, ) @staticmethod - def _max_l_inf_between_points(points: list[float | np.ndarray]) -> float: + def _max_l_p_between_points( + points: list[float | np.ndarray], + l_p_norm: int, + ) -> float: # Number of points (should correspond to the number of groups) n_points = len(points) # Compute l-inf distance between each pair of groups l_inf_constraint_violation = [ - (np.linalg.norm(points[i] - points[j], ord=np.inf), (i, j)) + (np.linalg.norm(points[i] - points[j], ord=l_p_norm), (i, j)) for i, j in product(range(n_points), range(n_points)) if i < j ] @@ -423,6 +477,7 @@ def fit( global_prevalence=self.global_prevalence, false_positive_cost=self.false_pos_cost, false_negative_cost=self.false_neg_cost, + l_p_norm=self.l_p_norm, ) # Construct each group-specific classifier diff --git a/tests/conftest.py b/tests/conftest.py index aa1b30b..8e787aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,12 @@ def fairness_constraint(request) -> float: return request.param +@pytest.fixture(params=[1, 2, np.inf]) +def l_p_norm(request) -> int: + """Fixture for the l-p norm to test.""" + return request.param + + @pytest.fixture(params=[1_000, 10_000, 100_000]) def num_samples(request) -> int: return request.param @@ -48,6 +54,24 @@ def y_pred_scores(num_samples: int, rng) -> np.ndarray: return rng.random(size=num_samples) +# TODO: eventually add other predictors as different fixture instantiations (?) +@pytest.fixture +def predictor(y_pred_scores: np.ndarray): + """Predictor function: predicts the generated scores from the sample indices.""" + def predictor_func(idx): + return y_pred_scores[idx] + + return predictor_func + + +@pytest.fixture +def X_features(num_samples: int) -> np.ndarray: + """The sample features are the sample indices. + This must match the `predictor` fixture functionality. + """ + return np.arange(num_samples) + + @pytest.fixture( params=[ 0.2, diff --git a/tests/test_constraints.py b/tests/test_constraints.py index 85c399d..0a01d50 100644 --- a/tests/test_constraints.py +++ b/tests/test_constraints.py @@ -38,7 +38,7 @@ def get_metric_abs_tolerance(group_size: int) -> float: """Reasonable value for metric fulfillment given the inherent randomization of predictions and the size of the group over which the metric is computed. """ - return (0.1 * group_size) ** (-1 / 1.5) + return (0.5 * group_size) ** (-1 / 2) # return group_size ** (-1/2) @@ -62,30 +62,47 @@ def test_invalid_constraint_name(): ) -def test_constraint_fulfillment( +def test_equalized_odds_lp_relaxation( + X_features: np.ndarray, y_true: np.ndarray, - y_pred_scores: np.ndarray, sensitive_attribute: np.ndarray, - fairness_constraint: str, + predictor: callable, constraint_slack: float, + l_p_norm: int, random_seed: int, ): - """Tests fairness constraint fulfillment at the given slack level.""" - # Dataset metadata - num_samples = len(y_true) - unique_groups = np.unique( - sensitive_attribute - ) # return is sorted in ascending order - label_prevalence = np.mean(y_true) + """Tests different l-p relaxations for the "equalized-odds" constraint.""" + clf = RelaxedThresholdOptimizer( + predictor=predictor, + constraint="equalized_odds", + tolerance=constraint_slack, + false_pos_cost=1, + false_neg_cost=1, + seed=random_seed, + l_p_norm=l_p_norm, + ) - # Predictor function - # # > predicts the generated scores from the sample indices - def predictor(idx): - return y_pred_scores[idx] + # Fit postprocessing to data + clf.fit(X=X_features, y=y_true, group=sensitive_attribute) + + check_constraint_fulfillment( + X_features=X_features, + y_true=y_true, + sensitive_attribute=sensitive_attribute, + postprocessed_clf=clf, + ) - # Hence, for this example, the features are the sample indices - X_features = np.arange(num_samples) +def test_constraint_fulfillment( + X_features: np.ndarray, + y_true: np.ndarray, + sensitive_attribute: np.ndarray, + predictor: callable, + fairness_constraint: str, + constraint_slack: float, + random_seed: int, +): + """Tests fairness constraint fulfillment on the given synthetic data.""" clf = RelaxedThresholdOptimizer( predictor=predictor, constraint=fairness_constraint, @@ -98,15 +115,39 @@ def predictor(idx): # Fit postprocessing to data clf.fit(X=X_features, y=y_true, group=sensitive_attribute) - # Check that theoretical solution fulfills relaxed constraint - assert clf.constraint_violation() <= constraint_slack + SOLUTION_TOLERANCE, ( + check_constraint_fulfillment( + X_features=X_features, + y_true=y_true, + sensitive_attribute=sensitive_attribute, + postprocessed_clf=clf, + ) + + +def check_constraint_fulfillment( + X_features: np.ndarray, + y_true: np.ndarray, + sensitive_attribute: np.ndarray, + postprocessed_clf: RelaxedThresholdOptimizer, +): + """Checks that the postprocessed classifier fulfills its target constraint.""" + + # Dataset metadata + unique_groups = np.unique( + sensitive_attribute + ) # return is sorted in ascending order + label_prevalence = np.mean(y_true) + + fairness_constraint = postprocessed_clf.constraint + + # Check that theoretical solution fulfills constraint tolerance + assert postprocessed_clf.constraint_violation() <= postprocessed_clf.tolerance + SOLUTION_TOLERANCE, ( f"Solution fails to fulfill the '{fairness_constraint}' inequality; " - f"got: {clf.constraint_violation()}; " - f"expected less than {constraint_slack};" + f"got: {postprocessed_clf.constraint_violation()}; " + f"expected less than {postprocessed_clf.tolerance};" ) # Optimal binarized predictions - y_pred_binary = clf(X_features, group=sensitive_attribute) + y_pred_binary = postprocessed_clf(X_features, group=sensitive_attribute) # Check realized group-specific ROC points actual_group_roc_points = np.vstack( @@ -125,7 +166,7 @@ def predictor(idx): g_label_prevalence = np.mean(y_true[g_filter]) actual_fpr, actual_tpr = actual_group_roc_points[g] - target_fpr, target_tpr = clf.groupwise_roc_points[g] + target_fpr, target_tpr = postprocessed_clf.groupwise_roc_points[g] # Check group FPR check_metric_tolerance( @@ -159,7 +200,15 @@ def predictor(idx): empirical_constraint_violation: float if fairness_constraint == "equalized_odds": - empirical_constraint_violation = empirical_fairness_results["equalized_odds_diff"] + l_p_norm = postprocessed_clf.l_p_norm + if l_p_norm == 1: + empirical_constraint_violation = empirical_fairness_results["equalized_odds_diff_l1"] + elif l_p_norm == 2: + empirical_constraint_violation = empirical_fairness_results["equalized_odds_diff_l2"] + elif l_p_norm == np.inf: + empirical_constraint_violation = empirical_fairness_results["equalized_odds_diff"] + else: + raise NotImplementedError(f"Tests not implemented for eq. odds with l-p norm {l_p_norm}") elif fairness_constraint in {"true_positive_rate_parity", "false_negative_rate_parity"}: empirical_constraint_violation = empirical_fairness_results["tpr_diff"] @@ -176,14 +225,14 @@ def predictor(idx): # Assert realized constraint violation is close to theoretical solution found check_metric_tolerance( # NOTE: it's fine if actual violation is below slack (and not fine if above) - empirical_val=max(empirical_constraint_violation - constraint_slack, 0), + empirical_val=max(empirical_constraint_violation - postprocessed_clf.tolerance, 0), theory_val=0.0, group_size=smallest_denominator, metric_name=f"{fairness_constraint} violation above slack", ) # Check realized global ROC point - target_fpr, target_tpr = clf.global_roc_point + target_fpr, target_tpr = postprocessed_clf.global_roc_point actual_fpr, actual_tpr = compute_roc_point_from_predictions( y_true=y_true, y_pred_binary=y_pred_binary, @@ -206,19 +255,19 @@ def predictor(idx): ) # Check realized classification loss - theoretical_cost = clf.cost() + theoretical_cost = postprocessed_clf.cost() actual_cost = calc_cost_of_point( fpr=actual_fpr, fnr=1 - actual_tpr, prevalence=label_prevalence, - false_pos_cost=clf.false_pos_cost, - false_neg_cost=clf.false_neg_cost, + false_pos_cost=postprocessed_clf.false_pos_cost, + false_neg_cost=postprocessed_clf.false_neg_cost, ) check_metric_tolerance( theoretical_cost, actual_cost, - group_size=num_samples, + group_size=len(y_true), metric_name="classification loss", ) diff --git a/tests/test_optimizer_instantiation.py b/tests/test_optimizer_instantiation.py new file mode 100644 index 0000000..1c114e8 --- /dev/null +++ b/tests/test_optimizer_instantiation.py @@ -0,0 +1 @@ +"""Test safe-guards for the instantiation of a `RelaxedThresholdOptimizer` object."""