Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,16 @@ After completing a milestone, create a pull request with your changes for review

## PR7: Model Evaluation & Interpretation

- [ ] Create performance metrics calculator
- [ ] Implement confusion matrix for classification
- [ ] Add ROC curve generator for classification
- [ ] Create precision-recall curve for classification
- [ ] Implement actual vs predicted plots for regression
- [ ] Add residual plot generator for regression
- [ ] Create feature importance visualization
- [ ] Implement SHAP value calculator and visualizer
- [ ] Write tests for all model evaluation metrics
- [ ] Test visualization of model interpretability features
- [x] Create performance metrics calculator
- [x] Implement confusion matrix for classification
- [x] Add ROC curve generator for classification
- [x] Create precision-recall curve for classification
- [x] Implement actual vs predicted plots for regression
- [x] Add residual plot generator for regression
- [x] Create feature importance visualization
- [x] Implement SHAP value calculator and visualizer
- [x] Write tests for all model evaluation metrics
- [x] Test visualization of model interpretability features

## PR8: Prediction & Export Functionality

Expand Down
64 changes: 64 additions & 0 deletions tests/test_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pandas as pd
from sklearn.datasets import make_classification, make_regression
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestRegressor

from utils import eval as evaluation
from utils import viz


def sample_classification():
X, y = make_classification(n_samples=50, n_features=4, random_state=0)
return pd.DataFrame(X, columns=[f"f{i}" for i in range(4)]), pd.Series(y)


def sample_regression():
X, y = make_regression(n_samples=50, n_features=4, noise=0.1, random_state=0)
return pd.DataFrame(X, columns=[f"f{i}" for i in range(4)]), pd.Series(y)


def test_performance_metrics_classification():
X, y = sample_classification()
clf = LogisticRegression(max_iter=100).fit(X, y)
preds = clf.predict(X)
metrics = evaluation.performance_metrics(y, preds, problem_type="classification")
assert set(metrics) == {"accuracy", "precision", "recall", "f1"}


def test_performance_metrics_regression():
X, y = sample_regression()
reg = LinearRegression().fit(X, y)
preds = reg.predict(X)
metrics = evaluation.performance_metrics(y, preds, problem_type="regression")
assert set(metrics) == {"mae", "mse", "rmse", "r2"}


def test_confusion_matrix_and_curves():
X, y = sample_classification()
clf = LogisticRegression(max_iter=50).fit(X, y)
preds = clf.predict(X)
prob = clf.predict_proba(X)[:, 1]
cm = evaluation.confusion_matrix(y, preds)
assert cm.shape[0] == cm.shape[1]
fig = viz.confusion_matrix_plot(y, preds)
assert fig.data
roc = viz.roc_curve_plot(y, prob)
pr = viz.precision_recall_curve_plot(y, prob)
assert roc.data and pr.data


def test_regression_plots_and_importance():
X, y = sample_regression()
reg = RandomForestRegressor(n_estimators=10, random_state=0).fit(X, y)
preds = reg.predict(X)
avp = viz.actual_vs_predicted_plot(y, preds)
residual = viz.residual_plot(y, preds)
imp = viz.feature_importance_plot(reg, list(X.columns))
assert avp.data and residual.data and imp.data


def test_shap_summary_plot():
X, y = sample_regression()
reg = RandomForestRegressor(n_estimators=5, random_state=0).fit(X, y)
fig = viz.shap_summary_plot(reg, X.head())
assert hasattr(fig, "axes")
3 changes: 2 additions & 1 deletion utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
from . import eda
from . import viz
from . import model
from . import eval

__all__ = ["config", "data", "eda", "viz", "model"]
__all__ = ["config", "data", "eda", "viz", "model", "eval"]
64 changes: 64 additions & 0 deletions utils/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Model evaluation utilities."""

from __future__ import annotations

from typing import Dict, Iterable

import numpy as np
import pandas as pd
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
f1_score,
mean_absolute_error,
mean_squared_error,
r2_score,
confusion_matrix as sk_confusion_matrix,
roc_curve,
precision_recall_curve,
)


def performance_metrics(
y_true: Iterable,
y_pred: Iterable,
*,
problem_type: str,
) -> Dict[str, float]:
"""Return performance metrics based on problem type."""
if problem_type == "classification":
return {
"accuracy": accuracy_score(y_true, y_pred),
"precision": precision_score(y_true, y_pred, average="weighted", zero_division=0),
"recall": recall_score(y_true, y_pred, average="weighted", zero_division=0),
"f1": f1_score(y_true, y_pred, average="weighted", zero_division=0),
}
if problem_type == "regression":
mse = mean_squared_error(y_true, y_pred)
return {
"mae": mean_absolute_error(y_true, y_pred),
"mse": mse,
"rmse": float(np.sqrt(mse)),
"r2": r2_score(y_true, y_pred),
}
raise ValueError(f"Unknown problem_type: {problem_type}")


def confusion_matrix(y_true: Iterable, y_pred: Iterable) -> pd.DataFrame:
"""Return confusion matrix as DataFrame."""
cm = sk_confusion_matrix(y_true, y_pred)
return pd.DataFrame(cm)


def roc_curve_data(y_true: Iterable, y_score: Iterable) -> pd.DataFrame:
"""Return false positive rate, true positive rate, and thresholds."""
fpr, tpr, thresh = roc_curve(y_true, y_score)
return pd.DataFrame({"fpr": fpr, "tpr": tpr, "threshold": thresh})


def precision_recall_curve_data(y_true: Iterable, y_score: Iterable) -> pd.DataFrame:
"""Return precision-recall curve values."""
precision, recall, thresh = precision_recall_curve(y_true, y_score)
return pd.DataFrame({"precision": precision, "recall": recall, "threshold": np.append(thresh, np.nan)})

88 changes: 88 additions & 0 deletions utils/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import (
roc_curve,
precision_recall_curve,
confusion_matrix as sk_confusion_matrix,
)
import matplotlib.pyplot as plt
import shap


def histogram(
Expand Down Expand Up @@ -101,3 +109,83 @@ def heatmap(
def export_figure(fig: go.Figure, path: Path) -> None:
"""Export a figure to an HTML file."""
fig.write_html(str(path))


def confusion_matrix_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure:
"""Return a confusion matrix heatmap."""
cm = sk_confusion_matrix(y_true, y_pred)
fig = px.imshow(cm, text_auto=True, color_continuous_scale="Blues", title=title)
fig.update_xaxes(title="Predicted")
fig.update_yaxes(title="Actual")
return fig


def roc_curve_plot(y_true, y_score, *, title: Optional[str] = None) -> go.Figure:
"""Return ROC curve figure."""
fpr, tpr, _ = roc_curve(y_true, y_score)
fig = go.Figure()
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", name="ROC"))
fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=1, line=dict(dash="dash"))
fig.update_layout(
title=title or "ROC Curve",
xaxis_title="False Positive Rate",
yaxis_title="True Positive Rate",
)
return fig


def precision_recall_curve_plot(y_true, y_score, *, title: Optional[str] = None) -> go.Figure:
"""Return precision-recall curve figure."""
precision, recall, _ = precision_recall_curve(y_true, y_score)
fig = go.Figure()
fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", name="PR"))
fig.update_layout(
title=title or "Precision-Recall Curve",
xaxis_title="Recall",
yaxis_title="Precision",
)
return fig


def actual_vs_predicted_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure:
"""Return actual vs predicted scatter plot."""
fig = px.scatter(x=y_true, y=y_pred, labels={"x": "Actual", "y": "Predicted"}, title=title)
min_val = min(np.min(y_true), np.min(y_pred))
max_val = max(np.max(y_true), np.max(y_pred))
fig.add_shape(type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val, line=dict(dash="dash"))
return fig


def residual_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure:
"""Return residual plot."""
residuals = np.array(y_true) - np.array(y_pred)
fig = px.scatter(x=y_pred, y=residuals, labels={"x": "Predicted", "y": "Residual"}, title=title)
fig.add_shape(type="line", x0=np.min(y_pred), y0=0, x1=np.max(y_pred), y1=0, line=dict(dash="dash"))
return fig


def feature_importance_plot(model, feature_names: list[str], *, title: Optional[str] = None) -> go.Figure:
"""Return feature importance bar chart."""
if hasattr(model, "feature_importances_"):
importances = model.feature_importances_
elif hasattr(model, "coef_"):
importances = np.abs(model.coef_)
if importances.ndim > 1:
importances = importances[0]
else:
raise ValueError("Model has no feature importances")
df = pd.DataFrame({"feature": feature_names, "importance": importances})
df = df.sort_values("importance", ascending=False)
fig = px.bar(df, x="feature", y="importance", title=title or "Feature Importance")
return fig


def shap_summary_plot(model, X: pd.DataFrame, *, title: Optional[str] = None):
"""Return SHAP summary plot as a Matplotlib figure."""
explainer = shap.Explainer(model, X)
values = explainer(X)
shap.plots.beeswarm(values, show=False)
fig = plt.gcf()
if title:
fig.suptitle(title)
return fig