diff --git a/TODO.md b/TODO.md index 9bba5dc..b36b635 100644 --- a/TODO.md +++ b/TODO.md @@ -85,16 +85,16 @@ After completing a milestone, create a pull request with your changes for review ## PR7: Model Evaluation & Interpretation -- [ ] Create performance metrics calculator -- [ ] Implement confusion matrix for classification -- [ ] Add ROC curve generator for classification -- [ ] Create precision-recall curve for classification -- [ ] Implement actual vs predicted plots for regression -- [ ] Add residual plot generator for regression -- [ ] Create feature importance visualization -- [ ] Implement SHAP value calculator and visualizer -- [ ] Write tests for all model evaluation metrics -- [ ] Test visualization of model interpretability features +- [x] Create performance metrics calculator +- [x] Implement confusion matrix for classification +- [x] Add ROC curve generator for classification +- [x] Create precision-recall curve for classification +- [x] Implement actual vs predicted plots for regression +- [x] Add residual plot generator for regression +- [x] Create feature importance visualization +- [x] Implement SHAP value calculator and visualizer +- [x] Write tests for all model evaluation metrics +- [x] Test visualization of model interpretability features ## PR8: Prediction & Export Functionality diff --git a/tests/test_eval.py b/tests/test_eval.py new file mode 100644 index 0000000..613c217 --- /dev/null +++ b/tests/test_eval.py @@ -0,0 +1,64 @@ +import pandas as pd +from sklearn.datasets import make_classification, make_regression +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestRegressor + +from utils import eval as evaluation +from utils import viz + + +def sample_classification(): + X, y = make_classification(n_samples=50, n_features=4, random_state=0) + return pd.DataFrame(X, columns=[f"f{i}" for i in range(4)]), pd.Series(y) + + +def sample_regression(): + X, y = make_regression(n_samples=50, n_features=4, noise=0.1, random_state=0) + return pd.DataFrame(X, columns=[f"f{i}" for i in range(4)]), pd.Series(y) + + +def test_performance_metrics_classification(): + X, y = sample_classification() + clf = LogisticRegression(max_iter=100).fit(X, y) + preds = clf.predict(X) + metrics = evaluation.performance_metrics(y, preds, problem_type="classification") + assert set(metrics) == {"accuracy", "precision", "recall", "f1"} + + +def test_performance_metrics_regression(): + X, y = sample_regression() + reg = LinearRegression().fit(X, y) + preds = reg.predict(X) + metrics = evaluation.performance_metrics(y, preds, problem_type="regression") + assert set(metrics) == {"mae", "mse", "rmse", "r2"} + + +def test_confusion_matrix_and_curves(): + X, y = sample_classification() + clf = LogisticRegression(max_iter=50).fit(X, y) + preds = clf.predict(X) + prob = clf.predict_proba(X)[:, 1] + cm = evaluation.confusion_matrix(y, preds) + assert cm.shape[0] == cm.shape[1] + fig = viz.confusion_matrix_plot(y, preds) + assert fig.data + roc = viz.roc_curve_plot(y, prob) + pr = viz.precision_recall_curve_plot(y, prob) + assert roc.data and pr.data + + +def test_regression_plots_and_importance(): + X, y = sample_regression() + reg = RandomForestRegressor(n_estimators=10, random_state=0).fit(X, y) + preds = reg.predict(X) + avp = viz.actual_vs_predicted_plot(y, preds) + residual = viz.residual_plot(y, preds) + imp = viz.feature_importance_plot(reg, list(X.columns)) + assert avp.data and residual.data and imp.data + + +def test_shap_summary_plot(): + X, y = sample_regression() + reg = RandomForestRegressor(n_estimators=5, random_state=0).fit(X, y) + fig = viz.shap_summary_plot(reg, X.head()) + assert hasattr(fig, "axes") diff --git a/utils/__init__.py b/utils/__init__.py index d2d1c8e..bd5abcc 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -5,5 +5,6 @@ from . import eda from . import viz from . import model +from . import eval -__all__ = ["config", "data", "eda", "viz", "model"] +__all__ = ["config", "data", "eda", "viz", "model", "eval"] diff --git a/utils/eval.py b/utils/eval.py new file mode 100644 index 0000000..d2d595a --- /dev/null +++ b/utils/eval.py @@ -0,0 +1,64 @@ +"""Model evaluation utilities.""" + +from __future__ import annotations + +from typing import Dict, Iterable + +import numpy as np +import pandas as pd +from sklearn.metrics import ( + accuracy_score, + precision_score, + recall_score, + f1_score, + mean_absolute_error, + mean_squared_error, + r2_score, + confusion_matrix as sk_confusion_matrix, + roc_curve, + precision_recall_curve, +) + + +def performance_metrics( + y_true: Iterable, + y_pred: Iterable, + *, + problem_type: str, +) -> Dict[str, float]: + """Return performance metrics based on problem type.""" + if problem_type == "classification": + return { + "accuracy": accuracy_score(y_true, y_pred), + "precision": precision_score(y_true, y_pred, average="weighted", zero_division=0), + "recall": recall_score(y_true, y_pred, average="weighted", zero_division=0), + "f1": f1_score(y_true, y_pred, average="weighted", zero_division=0), + } + if problem_type == "regression": + mse = mean_squared_error(y_true, y_pred) + return { + "mae": mean_absolute_error(y_true, y_pred), + "mse": mse, + "rmse": float(np.sqrt(mse)), + "r2": r2_score(y_true, y_pred), + } + raise ValueError(f"Unknown problem_type: {problem_type}") + + +def confusion_matrix(y_true: Iterable, y_pred: Iterable) -> pd.DataFrame: + """Return confusion matrix as DataFrame.""" + cm = sk_confusion_matrix(y_true, y_pred) + return pd.DataFrame(cm) + + +def roc_curve_data(y_true: Iterable, y_score: Iterable) -> pd.DataFrame: + """Return false positive rate, true positive rate, and thresholds.""" + fpr, tpr, thresh = roc_curve(y_true, y_score) + return pd.DataFrame({"fpr": fpr, "tpr": tpr, "threshold": thresh}) + + +def precision_recall_curve_data(y_true: Iterable, y_score: Iterable) -> pd.DataFrame: + """Return precision-recall curve values.""" + precision, recall, thresh = precision_recall_curve(y_true, y_score) + return pd.DataFrame({"precision": precision, "recall": recall, "threshold": np.append(thresh, np.nan)}) + diff --git a/utils/viz.py b/utils/viz.py index 1087f4c..24d9b31 100644 --- a/utils/viz.py +++ b/utils/viz.py @@ -8,6 +8,14 @@ import pandas as pd import plotly.express as px import plotly.graph_objects as go +import numpy as np +from sklearn.metrics import ( + roc_curve, + precision_recall_curve, + confusion_matrix as sk_confusion_matrix, +) +import matplotlib.pyplot as plt +import shap def histogram( @@ -101,3 +109,83 @@ def heatmap( def export_figure(fig: go.Figure, path: Path) -> None: """Export a figure to an HTML file.""" fig.write_html(str(path)) + + +def confusion_matrix_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure: + """Return a confusion matrix heatmap.""" + cm = sk_confusion_matrix(y_true, y_pred) + fig = px.imshow(cm, text_auto=True, color_continuous_scale="Blues", title=title) + fig.update_xaxes(title="Predicted") + fig.update_yaxes(title="Actual") + return fig + + +def roc_curve_plot(y_true, y_score, *, title: Optional[str] = None) -> go.Figure: + """Return ROC curve figure.""" + fpr, tpr, _ = roc_curve(y_true, y_score) + fig = go.Figure() + fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", name="ROC")) + fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=1, line=dict(dash="dash")) + fig.update_layout( + title=title or "ROC Curve", + xaxis_title="False Positive Rate", + yaxis_title="True Positive Rate", + ) + return fig + + +def precision_recall_curve_plot(y_true, y_score, *, title: Optional[str] = None) -> go.Figure: + """Return precision-recall curve figure.""" + precision, recall, _ = precision_recall_curve(y_true, y_score) + fig = go.Figure() + fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", name="PR")) + fig.update_layout( + title=title or "Precision-Recall Curve", + xaxis_title="Recall", + yaxis_title="Precision", + ) + return fig + + +def actual_vs_predicted_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure: + """Return actual vs predicted scatter plot.""" + fig = px.scatter(x=y_true, y=y_pred, labels={"x": "Actual", "y": "Predicted"}, title=title) + min_val = min(np.min(y_true), np.min(y_pred)) + max_val = max(np.max(y_true), np.max(y_pred)) + fig.add_shape(type="line", x0=min_val, y0=min_val, x1=max_val, y1=max_val, line=dict(dash="dash")) + return fig + + +def residual_plot(y_true, y_pred, *, title: Optional[str] = None) -> go.Figure: + """Return residual plot.""" + residuals = np.array(y_true) - np.array(y_pred) + fig = px.scatter(x=y_pred, y=residuals, labels={"x": "Predicted", "y": "Residual"}, title=title) + fig.add_shape(type="line", x0=np.min(y_pred), y0=0, x1=np.max(y_pred), y1=0, line=dict(dash="dash")) + return fig + + +def feature_importance_plot(model, feature_names: list[str], *, title: Optional[str] = None) -> go.Figure: + """Return feature importance bar chart.""" + if hasattr(model, "feature_importances_"): + importances = model.feature_importances_ + elif hasattr(model, "coef_"): + importances = np.abs(model.coef_) + if importances.ndim > 1: + importances = importances[0] + else: + raise ValueError("Model has no feature importances") + df = pd.DataFrame({"feature": feature_names, "importance": importances}) + df = df.sort_values("importance", ascending=False) + fig = px.bar(df, x="feature", y="importance", title=title or "Feature Importance") + return fig + + +def shap_summary_plot(model, X: pd.DataFrame, *, title: Optional[str] = None): + """Return SHAP summary plot as a Matplotlib figure.""" + explainer = shap.Explainer(model, X) + values = explainer(X) + shap.plots.beeswarm(values, show=False) + fig = plt.gcf() + if title: + fig.suptitle(title) + return fig