scikit-fingerprints · Thematiq · Feb 25, 2026 · Copilot · Feb 25, 2026 · Copilot
@@ -24,6 +24,11 @@
     "strong_interpretation_raw",
 ]
 
+InterpretationTypes = Literal[
+    "weak",
+    "strong",
+]
+
 ALL_PROPERTIES_COLUMNS: list[ReportedPropertyColumnType] = list(
     get_args(ReportedPropertyColumnType)
 )
@@ -7,15 +7,19 @@
 import pandas as pd
 from tqdm.auto import tqdm
 
-from .const import UNNAMED_COLUMNS_WARNING_TEMPLATE
-
 ALG1_COL = 2
 ALG2_COL = 3
 TIE_COL = 4
 
 logger = log.getLogger(__name__)
 
 
+UNNAMED_COLUMNS_WARNING_TEMPLATE = """Some algorithm names are unnamed. This may lead to issues in the win table construction.
+Algorithm names extracted: {algorithms_names}
+Dataset column: {dataset_col}
+"""
+
+
 def _gen_pairs(no_algs: int) -> Generator[tuple[int, int, int], None, None]:
     k = 0
     for i in range(no_algs):

@@ -0,0 +1,3 @@
+from ._critical_difference import plot_cdd_diagram
+
+__all__ = ["plot_cdd_diagram"]
@@ -0,0 +1,196 @@
+import warnings
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import pandas as pd
+
+NO_EQUIVALENCE_CLIQUEST_WARNING_TEMPLATE = """No groups of equivalent algorithms were found in the posterior table.
+CDD plot will not contain any equivalence bars."""
+
+
+def get_bars_for_ccd(
+    posterior_df: pd.DataFrame,
+    models_df: pd.DataFrame,
+    interpretation_col: str,
+) -> list[tuple[int, int]]:
+    """Calculate equivalence bars using the equivalence cliques in the posterior table."""
+    # Construct Graph and find the cliques
+    g = nx.Graph()
+
+    for _, row in posterior_df.iterrows():
+        left = row["left_model"]
+        right = row["right_model"]
+        equiv = row[interpretation_col] == "="
+        if equiv:
+            g.add_edge(left, right)
+
+    cliques = list(nx.find_cliques(g))
+
+    # Map cliques to bars
+    res = []
+
+    for clique in cliques:
+        clique_pos = models_df.loc[models_df["model"].isin(clique), "pos"]
+        res.append((clique_pos.min(), clique_pos.max()))
+
+    return res
+
+
+def assign_bar_position(
+    bars: list[tuple[int, int]], min_distance: int = 1
+) -> list[int]:
+    """Order the bars vertically to minimize the size of the plot."""
+    if len(bars) == 0:
+        return []
+
+    indexed_bars = [
+        (
+            i,
+            start - min_distance,
+            end + min_distance,
+        )  # add min distance to the bar sizes
+        for i, (start, end) in enumerate(bars)
+    ]
+
+    rows: list[tuple[int, int]] = []
+    rows_assigments = [0] * len(indexed_bars)
+
+    for task_idx, start, end in indexed_bars:
+        assigned = False
+        for i, (row_end_value, row_id) in enumerate(rows):
+            if row_end_value < start:
+                # This row is available
+                rows[i] = (end, row_id)
+                rows_assigments[task_idx] = row_id
+                assigned = True
+                break
+        if not assigned:
+            # No rows are available, create a new one
+            new_row_id = len(rows)
+            rows.append((end, new_row_id))
+            rows_assigments[task_idx] = new_row_id
+
+    return rows_assigments
+
+
+def _plot_cdd_diagram(
+    models_df: pd.DataFrame,
+    bars: list[tuple[int, int]],
+    bars_positions: list[int],
+    bar_y_spacing: float = 0.12,
+    ax: plt.Axes | None = None,
+    xlabel_spacing: int = 5,
+    draw_equivalence_lines_to_axis: bool = True,
+) -> plt.Axes:
+    """Plot a critical difference diagram."""
+    if ax is None:
+        _, ax = plt.subplots()
+
+    n_models = len(models_df)
+
+    # Ruler at the top
+    ruler_y = 0
+    ax.hlines(ruler_y, 0.5, n_models + 0.5, color="black", linewidth=2)
+
+    # Add ticks for each model
+    for _, row in models_df.iterrows():
+        pos = row["pos"]
+        name = row["model"]
+        # Invert so rank 1 is on the right
+        inv_pos = n_models - pos + 1
+
+        ax.vlines(inv_pos, ruler_y, ruler_y + 0.15, color="black", linewidth=1.2)
+        ax.text(
+            inv_pos,
+            ruler_y + 0.2,
+            name,
+            ha="left",
+            va="bottom",
+            fontsize=8,
+            rotation=45,
+        )
+
+    if len(bars) == 0:
+        warnings.warn(NO_EQUIVALENCE_CLIQUEST_WARNING_TEMPLATE, UserWarning)
+        max_bar_pos = 0
+    else:
+        max_bar_pos = max(bars_positions)
+        # Draw equivalence bars
+        for i, (min_pos, max_pos) in enumerate(bars):
+            bar_y = ruler_y - 0.4 - bars_positions[i] * bar_y_spacing
+
+            inv_min = n_models - max_pos + 1
+            inv_max = n_models - min_pos + 1
+
+            ax.hlines(bar_y, inv_min, inv_max, color="black", linewidth=2.5)
+
+            if draw_equivalence_lines_to_axis:
+                ax.vlines(inv_min, bar_y, -0.25, color="black", linewidth=0.5)
+                ax.vlines(inv_max, bar_y, -0.25, color="black", linewidth=0.5)
+            else:
+                ax.vlines(inv_min, bar_y, bar_y + 0.05, color="black", linewidth=1.5)
+                ax.vlines(inv_max, bar_y, bar_y + 0.05, color="black", linewidth=1.5)
+
+    # Add rank numbers - first and last manually
+    ax.text(
+        1,
+        ruler_y - 0.1,
+        str(n_models),
+        ha="center",
+        va="top",
+        fontsize=8,
+        fontweight="bold",
+    )
+    ax.text(
+        n_models,
+        ruler_y - 0.1,
+        "1",
+        ha="center",
+        va="top",
+        fontsize=8,
+        fontweight="bold",
+    )
+
+    for i in range(xlabel_spacing + 1, n_models, xlabel_spacing):
+        inv_pos = n_models - i + 1
+        ax.text(inv_pos, ruler_y - 0.1, str(i), ha="center", va="top", fontsize=8)
+
+    # Clip axes
+    min_bar_y = ruler_y - 0.4 - max_bar_pos * bar_y_spacing
+    ax.set_xlim(0, n_models + 1)
+    ax.set_ylim(min_bar_y - 0.3, 2.5)
+    ax.axis("off")
+
+    # Legend
+    ax.text(
+        0.5,
+        min_bar_y - 0.1,
+        "← worse                                    better →",
+        fontsize=8,
+        style="italic",
+    )
+
+    return ax
+
+
+def plot_cdd_diagram(
+    models_df: pd.DataFrame,
+    posterior_df: pd.DataFrame,
+    interpretation_col: str,
+    ax: plt.Axes | None = None,
+    **kwargs,
+) -> plt.Axes:
+    """Plot a critical difference diagram."""
+    bars = get_bars_for_ccd(
+        posterior_df=posterior_df,
+        models_df=models_df,
+        interpretation_col=interpretation_col,
+    )
+    bars_positions = assign_bar_position(bars)
+    return _plot_cdd_diagram(
+        models_df=models_df,
+        bars=bars,
+        bars_positions=bars_positions,
+        ax=ax,
+        **kwargs,
+    )
-    )
+    )
+
+
+# ---------------------------------------------------------------------------
+# Internal tests / self-checks
+# ---------------------------------------------------------------------------
+
+def test_get_bars_for_ccd_basic() -> None:
+    """Basic sanity check for get_bars_for_ccd.
+
+    Builds a small posterior table with a known equivalence structure and
+    verifies that the resulting bars correspond to the min/max positions of
+    each equivalence clique.
+    """
+    models_df = pd.DataFrame(
+        {
+            "model": ["A", "B", "C", "D"],
+            "pos": [1, 2, 3, 4],
+        }
+    )
+
+    posterior_df = pd.DataFrame(
+        [
+            {"left_model": "A", "right_model": "B", "interp": "="},
+            {"left_model": "B", "right_model": "C", "interp": "="},
+            # A, B, C form a clique (all equivalent); D is isolated
+            {"left_model": "A", "right_model": "C", "interp": "="},
+            {"left_model": "C", "right_model": "D", "interp": "<"},
+        ]
+    )
+
+    bars = get_bars_for_ccd(
+        posterior_df=posterior_df,
+        models_df=models_df,
+        interpretation_col="interp",
+    )
+
+    # Only one equivalence group: models A (pos 1), B (pos 2), C (pos 3)
+    # so we expect a single bar spanning from 1 to 3.
+    assert len(bars) == 1
+    assert bars[0] == (1, 3)
+
+
+def test_assign_bar_position_non_overlapping() -> None:
+    """Check that non-overlapping bars are placed on the same row."""
+    bars = [(0, 1), (2, 3), (4, 5)]
+    positions = assign_bar_position(bars, min_distance=0)
+
+    # All bars are disjoint; the greedy algorithm should be able to place
+    # them all on the same row.
+    assert len(positions) == len(bars)
+    assert set(positions) == {0}
+
+
+def test_assign_bar_position_overlapping() -> None:
+    """Check that overlapping bars are not placed on the same row."""
+    # Bar 0 overlaps with bar 1, bar 1 overlaps with bar 2
+    bars = [(0, 3), (2, 5), (4, 7)]
+    positions = assign_bar_position(bars, min_distance=0)
+
+    assert len(positions) == len(bars)
+    # At least two rows are required for these overlapping intervals.
+    assert max(positions) >= 1
+    # Overlapping bars should not share the same row id.
+    for i in range(len(bars)):
+        for j in range(i + 1, len(bars)):
+            s1, e1 = bars[i]
+            s2, e2 = bars[j]
+            if not (e1 <= s2 or e2 <= s1):
+                # Bars i and j overlap; they must be on different rows.
+                assert positions[i] != positions[j]
+
+
+def test_plot_cdd_diagram_smoke() -> None:
+    """Smoke test for _plot_cdd_diagram.
+
+    Ensures that the function can be called with a minimal, valid input and
+    returns a matplotlib Axes instance without raising an exception.
+    """
+    models_df = pd.DataFrame(
+        {
+            "model": ["A", "B", "C"],
+            "pos": [1, 2, 3],
+            "mean": [0.1, 0.2, 0.3],
+        }
+    )
+
+    # A single bar spanning all three models on row 0
+    bars = [(1, 3)]
+    bars_positions = [0]
+
+    fig, ax = plt.subplots()
+    try:
+        result_ax = _plot_cdd_diagram(
+            models_df=models_df,
+            bars=bars,
+            bars_positions=bars_positions,
+            ax=ax,
+        )
+    finally:
+        plt.close(fig)
+
+    assert isinstance(result_ax, plt.Axes)
-    )
+    )
+
+
+# ---------------------------------------------------------------------------
+# Internal tests / self-checks
+# ---------------------------------------------------------------------------
+
+def test_get_bars_for_ccd_basic() -> None:
+    """Basic sanity check for get_bars_for_ccd.
+
+    Builds a small posterior table with a known equivalence structure and
+    verifies that the resulting bars correspond to the min/max positions of
+    each equivalence clique.
+    """
+    models_df = pd.DataFrame(
+        {
+            "model": ["A", "B", "C", "D"],
+            "pos": [1, 2, 3, 4],
+        }
+    )
+
+    posterior_df = pd.DataFrame(
+        [
+            {"left_model": "A", "right_model": "B", "interp": "="},
+            {"left_model": "B", "right_model": "C", "interp": "="},
+            # A, B, C form a clique (all equivalent); D is isolated
+            {"left_model": "A", "right_model": "C", "interp": "="},
+            {"left_model": "C", "right_model": "D", "interp": "<"},
+        ]
+    )
+
+    bars = get_bars_for_ccd(
+        posterior_df=posterior_df,
+        models_df=models_df,
+        interpretation_col="interp",
+    )
+
+    # Only one equivalence group: models A (pos 1), B (pos 2), C (pos 3)
+    # so we expect a single bar spanning from 1 to 3.
+    assert len(bars) == 1
+    assert bars[0] == (1, 3)
+
+
+def test_assign_bar_position_non_overlapping() -> None:
+    """Check that non-overlapping bars are placed on the same row."""
+    bars = [(0, 1), (2, 3), (4, 5)]
+    positions = assign_bar_position(bars, min_distance=0)
+
+    # All bars are disjoint; the greedy algorithm should be able to place
+    # them all on the same row.
+    assert len(positions) == len(bars)
+    assert set(positions) == {0}
+
+
+def test_assign_bar_position_overlapping() -> None:
+    """Check that overlapping bars are not placed on the same row."""
+    # Bar 0 overlaps with bar 1, bar 1 overlaps with bar 2
+    bars = [(0, 3), (2, 5), (4, 7)]
+    positions = assign_bar_position(bars, min_distance=0)
+
+    assert len(positions) == len(bars)
+    # At least two rows are required for these overlapping intervals.
+    assert max(positions) >= 1
+    # Overlapping bars should not share the same row id.
+    for i in range(len(bars)):
+        for j in range(i + 1, len(bars)):
+            s1, e1 = bars[i]
+            s2, e2 = bars[j]
+            if not (e1 <= s2 or e2 <= s1):
+                # Bars i and j overlap; they must be on different rows.
+                assert positions[i] != positions[j]
+
+
+def test_plot_cdd_diagram_smoke() -> None:
+    """Smoke test for _plot_cdd_diagram.
+
+    Ensures that the function can be called with a minimal, valid input and
+    returns a matplotlib Axes instance without raising an exception.
+    """
+    models_df = pd.DataFrame(
+        {
+            "model": ["A", "B", "C"],
+            "pos": [1, 2, 3],
+            "mean": [0.1, 0.2, 0.3],
+        }
+    )
+
+    # A single bar spanning all three models on row 0
+    bars = [(1, 3)]
+    bars_positions = [0]
+
+    fig, ax = plt.subplots()
+    try:
+        result_ax = _plot_cdd_diagram(
+            models_df=models_df,
+            bars=bars,
+            bars_positions=bars_positions,
+            ax=ax,
+        )
+    finally:
+        plt.close(fig)
+
+    assert isinstance(result_ax, plt.Axes)
@@ -1,18 +1,20 @@
 from collections.abc import Iterable, Sequence
-from typing import Literal
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
 from ._types import (
     ALL_PROPERTIES_COLUMNS,
     HyperPriorType,
+    InterpretationTypes,
     ReportedPropertyColumnType,
     TieSolverType,
 )
 from ._utils import _validate_params
 from .alg import _construct_win_table, _get_pwin, _hdi
 from .model import _mcmcbbt_pymc
+from .plots import plot_cdd_diagram
 
 
 class PyBBT:
@@ -111,6 +113,16 @@ def _check_if_fitted(self):
         if not self._fitted:
             raise RuntimeError("The model must be fitted before accessing this method.")
 
+    @staticmethod
+    def _get_interpretation_columns(
+        interpretation: InterpretationTypes,
+    ) -> ReportedPropertyColumnType:
+        return (
+            "weak_interpretation_raw"
+            if interpretation == "weak"
+            else "strong_interpretation_raw"
+        )
+
     @property
     def fitted(self):
         """Whether the model has been fitted."""
@@ -163,6 +175,24 @@ def fit(
 
         return self
 
+    @property
+    def beta_ranking(self) -> dict[str, float]:
+        r"""
+        Get the $\beta$ values for each model.
+
+        Beta values can be used for ranking the models globally from best to worst (higher beta indicates better performance).
+        However, they do not have a direct probabilistic interpretation like the pairwise probabilities obtained from the posterior table.
+
+        Returns
+        -------
+        dict[str, float]
+            Dictionary mapping model names to their posterior mean beta values.
+        """
+        self._check_if_fitted()
+        beta = self._fit_posterior.posterior["beta"].to_numpy()
+        mean_beta = np.mean(beta.reshape(-1, beta.shape[-1]), axis=0)
+        return dict(zip(self._algorithms, mean_beta, strict=True))
+
     def posterior_table(
         self,
         rope_value: tuple[float, float] = (0.45, 0.55),
@@ -275,7 +305,7 @@ def rope_comparison_control_table(
         rope_values: Sequence[tuple[float, float]],
         control_model: str,
         selected_models: Sequence[str] | None = None,
-        interpretation: Literal["weak", "strong"] = "weak",
+        interpretation: InterpretationTypes = "weak",
         return_as_array: bool = False,
         join_char: str = ", ",
     ) -> pd.DataFrame:
@@ -307,6 +337,7 @@ def rope_comparison_control_table(
         """
         self._check_if_fitted()
         records = []
+        interpretation_col = self._get_interpretation_columns(interpretation)
         for rope in rope_values:
             posterior_df = self.posterior_table(
                 rope_value=rope,
@@ -324,11 +355,6 @@ def rope_comparison_control_table(
             worse_models: list[str] = []
             unknown_models: list[str] = []
             for _, row in posterior_df.iterrows():
-                interpretation_col = (
-                    "weak_interpretation_raw"
-                    if interpretation == "weak"
-                    else "strong_interpretation_raw"
-                )
                 non_control_model = (
                     row["right_model"]
                     if row["left_model"] == control_model
@@ -374,3 +400,62 @@ def rope_comparison_control_table(
                 )
         result_df = pd.DataFrame.from_records(records)
         return result_df
+
+    @_validate_params
+    def plot_cdd_diagram(
+        self,
+        rope_value: tuple[float, float] = (0.45, 0.55),
+        interpretation: InterpretationTypes = "weak",
+        ax: plt.Axes | None = None,
+        **kwargs,
+    ):
-    ):
+    ) -> plt.Axes:
-    ):
+    ) -> plt.Axes:
+        """
+        Plot the Critical Difference Diagram (CDD) based on the fitted BBT model.
+
+        Critical Difference Diagram visualizes the global ranking of the models along
+        with the equivalence bars connecting models that are considered equivalent based on the specified BBT interpretation.
+        The global ranking is determined based on the posterior mean beta values for each model.
+
+        Parameters
+        ----------
+        rope_value : tuple[float, float], optional
+            Region of Practical Equivalence (ROPE) used to determine ties in the posterior table. Defaults to (0.45, 0.55).
+        interpretation : {"weak", "strong"}, optional
+            Type of interpretation to use for determining equivalence bars. Defaults to "weak".
+        ax : plt.Axes | None, optional
+            Matplotlib Axes to plot on. If None, a new figure and axes are created. Defaults to None.
+        **kwargs
+            Additional keyword arguments passed to the underlying plotting function. See `plot_cdd_diagram`.
+
+        Returns
+        -------
+        plt.Axes
+            Matplotlib Axes containing the CDD plot.
+        """
+        self._check_if_fitted()
+        interpretation_col = self._get_interpretation_columns(interpretation)
+
+        model_ranking = self.beta_ranking
+        models_df = pd.DataFrame(
+            {
+                "model": list(model_ranking.keys()),
+                "beta": list(model_ranking.values()),
+            }
+        )
+        models_df["pos"] = models_df["beta"].rank(ascending=False, method="first")
-        models_df["pos"] = models_df["beta"].rank(ascending=False, method="first")
+        models_df["pos"] = models_df["beta"].rank(ascending=False, method="first").astype(int)
-        models_df["pos"] = models_df["beta"].rank(ascending=False, method="first")
+        models_df["pos"] = models_df["beta"].rank(ascending=False, method="first").astype(int)
+        models_df = models_df.sort_values("pos").reset_index(drop=True)
+        posterior_df = self.posterior_table(
+            rope_value=rope_value,
+            columns=(
+                "left_model",
+                "right_model",
+                interpretation_col,
+            ),
+        )
+        return plot_cdd_diagram(
+            models_df=models_df,
+            posterior_df=posterior_df,
+            interpretation_col=interpretation_col,
+            ax=ax,
+            **kwargs,
+        )
-        return plot_cdd_diagram(
-            models_df=models_df,
-            posterior_df=posterior_df,
-            interpretation_col=interpretation_col,
-            ax=ax,
-            **kwargs,
-        )
+        ax_out = plot_cdd_diagram(
+            models_df=models_df,
+            posterior_df=posterior_df,
+            interpretation_col=interpretation_col,
+            ax=ax,
+            **kwargs,
+        )
+        if not isinstance(ax_out, plt.Axes):
+            raise TypeError(
+                "plot_cdd_diagram is expected to return a matplotlib Axes object, "
+                f"but got {type(ax_out)!r} instead."
+            )
+        return ax_out
-        return plot_cdd_diagram(
-            models_df=models_df,
-            posterior_df=posterior_df,
-            interpretation_col=interpretation_col,
-            ax=ax,
-            **kwargs,
-        )
+        ax_out = plot_cdd_diagram(
+            models_df=models_df,
+            posterior_df=posterior_df,
+            interpretation_col=interpretation_col,
+            ax=ax,
+            **kwargs,
+        )
+        if not isinstance(ax_out, plt.Axes):
+            raise TypeError(
+                "plot_cdd_diagram is expected to return a matplotlib Axes object, "
+                f"but got {type(ax_out)!r} instead."
+            )
+        return ax_out
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from ._critical_difference import plot_cdd_diagram

		__all__ = ["plot_cdd_diagram"]