Skip to content

Commit

Permalink
added tests for l-p relaxations
Browse files Browse the repository at this point in the history
Co-authored-by: Kajetan Schweighofer <[email protected]>
  • Loading branch information
AndreFCruz and kschweig committed Apr 24, 2024
1 parent fd1c707 commit f3c4c9f
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 41 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,13 @@ can be chosen by altering the `l_p_norm` parameter.

A few useful values:
- `l_p_norm="inf"` **[default]** evaluates equalized-odds as the maximum
between group-wise TPR and FPR differences (as shown above).
between group-wise TPR and FPR differences (as shown above);
- `l_p_norm=1` evaluates equalized-odds as the average of the
absolute difference in group-wise TPR and FPR.
- this is also known as `average_abs_odds_difference`.
absolute difference in group-wise TPR and FPR;
- this is also known as `average_abs_odds_difference`;
- specifically, the l-1 distance is *twice* the "average absolute odds" metric, so change `tolerance` accordingly;
- `l_p_norm=p` for any other positive integer $p$: computes the distance between group-wise ROC
points using the specified l-p norm.
points using the specified l-p norm;

The actual equalized odds constraint implemented is:

Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def rng(random_seed: int) -> np.random.Generator:
return np.random.default_rng(random_seed)


@pytest.fixture(params=[0.01, 0.02, 0.05, 0.1, 0.2, 1.0])
@pytest.fixture(params=[0, 0.01, 0.02, 0.05, 0.1, 0.2, 1.0])
def constraint_slack(request) -> float:
"""Fixture for constraint slack/violation (fairness tolerance)."""
return request.param
Expand Down Expand Up @@ -135,7 +135,7 @@ def y_true(
n_samples = len(sensitive_attribute)

# Different levels of gaussian noise per group
group_noise = [0.2 + rng.random() / 2 for _ in range(n_groups)]
group_noise = [0.1 + rng.random() * 0.5 for _ in range(n_groups)]

# Generate predictions
label_prevalence = 0.2 + (rng.random() * 0.6) # in [0.2, 0.8]
Expand Down
48 changes: 24 additions & 24 deletions tests/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from error_parity.roc_utils import compute_roc_point_from_predictions
from error_parity.evaluation import evaluate_fairness

from .utils import check_metric_tolerance


def test_synthetic_data_generation(
y_true: np.ndarray,
Expand All @@ -34,26 +36,6 @@ def test_synthetic_data_generation(
f"Synthetic data generated has group-{g} AUC of {group_auc}"


def get_metric_abs_tolerance(group_size: int) -> float:
"""Reasonable value for metric fulfillment given the inherent randomization
of predictions and the size of the group over which the metric is computed.
"""
return (0.5 * group_size) ** (-1 / 2)
# return group_size ** (-1/2)


def check_metric_tolerance(
theory_val: float, empirical_val, group_size: int, metric_name: str = ""
) -> bool:
"""Checks that the empirical value is within a reasonable tolerance of the expected theoretical value."""
assert np.isclose(
theory_val,
empirical_val,
atol=get_metric_abs_tolerance(group_size),
rtol=0.01,
), f"> '{metric_name}' mismatch; expected {theory_val:.3}; got {empirical_val:.3};"


def test_invalid_constraint_name():
with pytest.raises(ValueError):
_ = RelaxedThresholdOptimizer(
Expand All @@ -62,7 +44,7 @@ def test_invalid_constraint_name():
)


def test_equalized_odds_lp_relaxation(
def test_equalized_odds_lp_relaxation_fulfillment(
X_features: np.ndarray,
y_true: np.ndarray,
sensitive_attribute: np.ndarray,
Expand Down Expand Up @@ -146,6 +128,23 @@ def check_constraint_fulfillment(
f"expected less than {postprocessed_clf.tolerance};"
)

# # NOTE: This test is disabled as it is too strict for some cases
# # Check that, for tight constraints, theoretical solution is near the constraint boundary
# # > i.e., if constraint tolerance is small, then the theoretical solution found should have
# # constraint violation very close to that tolerance, as lower violation would mean a
# # lower-cost solution could've been found.
# TIGHT_CONSTRAINT_THRESHOLD = 0.04
# UNDERSHOOT_TOLERANCE = 0.01
# if postprocessed_clf.tolerance <= TIGHT_CONSTRAINT_THRESHOLD:
# assert (
# postprocessed_clf.constraint_violation()
# >= postprocessed_clf.tolerance - UNDERSHOOT_TOLERANCE
# ), (
# f"Solution violates tight '{fairness_constraint}_l{postprocessed_clf.l_p_norm}' constraint; "
# f"got: {postprocessed_clf.constraint_violation()}; "
# f"expected near {postprocessed_clf.tolerance};"
# )

# Optimal binarized predictions
y_pred_binary = postprocessed_clf(X_features, group=sensitive_attribute)

Expand Down Expand Up @@ -224,11 +223,12 @@ def check_constraint_fulfillment(

# Assert realized constraint violation is close to theoretical solution found
check_metric_tolerance(
# NOTE: it's fine if actual violation is below slack (and not fine if above)
empirical_val=max(empirical_constraint_violation - postprocessed_clf.tolerance, 0),
theory_val=0.0,
empirical_val=empirical_constraint_violation,
# NOTE: we're more lenient for tolerances close to 0, as 0 is literally impossible to achieve
theory_val=max(postprocessed_clf.constraint_violation(), 0.01),
group_size=smallest_denominator,
metric_name=f"{fairness_constraint} violation above slack",
less_or_equal=True, # constraint violation below slack is acceptable
)

# Check realized global ROC point
Expand Down
55 changes: 44 additions & 11 deletions tests/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import logging
import pytest
import numpy as np

from error_parity import RelaxedThresholdOptimizer
from error_parity.cvxpy_utils import SOLUTION_TOLERANCE
from error_parity.evaluation import _safe_division


Expand All @@ -26,14 +30,43 @@ def test_valid_safe_division(caplog, rng):
assert "error" not in caplog.text.lower()


def test_equalized_odds_measure():
pass
# # Check realized constraint violation
# groupwise_differences = [
# np.linalg.norm(
# actual_group_roc_points[i] - actual_group_roc_points[j],
# ord=np.inf,
# )
# for i, j in product(unique_groups, unique_groups)
# if i < j
# ]
def test_equalized_odds_relaxation_costs(
X_features: np.ndarray,
y_true: np.ndarray,
sensitive_attribute: np.ndarray,
predictor: callable,
constraint_slack: float,
random_seed: int,
):
"""Tests whether l-p norms follow standard orders (lower p -> higher norm)."""

results = {}
sorted_p_norms = (1, 2, 3, 10, np.inf)
for norm in sorted_p_norms:
# Fit postprocessing to data
clf = RelaxedThresholdOptimizer(
predictor=predictor,
constraint="equalized_odds",
tolerance=constraint_slack,
false_pos_cost=1,
false_neg_cost=1,
seed=random_seed,
l_p_norm=norm,
)
clf.fit(X=X_features, y=y_true, group=sensitive_attribute)

# Store results
results[norm] = clf.cost()

# Check that l-p norms with lower p achieve lower costs and higher unfairness
for idx in range(1, len(sorted_p_norms)):

lower_p_norm = sorted_p_norms[idx - 1]
higher_p_norm = sorted_p_norms[idx]

lower_p_cost = results[lower_p_norm]
higher_p_cost = results[higher_p_norm]

# Assert lower-p costs are higher (accuracy is lower)
assert lower_p_cost > higher_p_cost - SOLUTION_TOLERANCE, \
f"l-{lower_p_norm} cost: {lower_p_cost} < l-{higher_p_norm} cost: {higher_p_cost}"
43 changes: 43 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np


def get_metric_abs_tolerance(group_size: int) -> float:
"""Reasonable value for metric fulfillment given the inherent randomization
of predictions and the size of the group over which the metric is computed.
"""
return (0.1 * group_size) ** (-1 / 1.7) # tighter for larger groups, less tight for smaller groups
# return group_size ** (-1/2)


def check_metric_tolerance(
theory_val: float,
empirical_val: float,
group_size: int,
metric_name: str = "",
less_or_equal: bool = False,
) -> bool:
"""Checks that empirical value approximately matches theoretical value.
Parameters
----------
theory_val : float
The theoretical value to fulfill for the metrics.
empirical_val : float
The actual realized value for the metric.
group_size : int
The smallest group size over which the metric is evaluated.
metric_name : str, optional
The metric's name, by default "". This is used for debugging purposes.
less_or_equal : bool, optional
Whether a lower empirical value compared to theory is fine, by default
False.
"""
if less_or_equal and empirical_val <= theory_val:
return True

assert np.isclose(
theory_val,
empirical_val,
atol=get_metric_abs_tolerance(group_size),
rtol=0.01,
), f"> '{metric_name}' mismatch; expected {theory_val:.3}; got {empirical_val:.3};"

0 comments on commit f3c4c9f

Please sign in to comment.