diff --git a/pdm.lock b/pdm.lock
index 914f146..a2ef3b2 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,7 +5,7 @@
 groups = ["default", "test"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:e273b4e2a24a70e4b0bbcb98ac1f7cec78e390d595ac221ab0c85eb21c5edc29"
+content_hash = "sha256:7979ae553052f5db03a354a1a2a85fdf5b3e319aa60803091a97c43e62e46dc1"
 
 [[metadata.targets]]
 requires_python = ">=3.12"
@@ -281,6 +281,31 @@ files = [
     {file = "json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559"},
 ]
 
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+requires_python = ">=3.8"
+summary = "Python port of markdown-it. Markdown parsing, done right!"
+groups = ["default"]
+dependencies = [
+    "mdurl~=0.1",
+]
+files = [
+    {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+    {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+requires_python = ">=3.7"
+summary = "Markdown URL utilities"
+groups = ["default"]
+files = [
+    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
 [[package]]
 name = "mock"
 version = "5.1.0"
@@ -354,6 +379,17 @@ files = [
     {file = "polars-1.20.0.tar.gz", hash = "sha256:e8e9e3156fae02b58e276e5f2c16a5907a79b38617a9e2d731b533d87798f451"},
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.1"
+requires_python = ">=3.8"
+summary = "Pygments is a syntax highlighting package written in Python."
+groups = ["default"]
+files = [
+    {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
+    {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
+]
+
 [[package]]
 name = "pympler"
 version = "1.1"
@@ -482,6 +518,39 @@ files = [
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
 ]
 
+[[package]]
+name = "rich"
+version = "13.9.4"
+requires_python = ">=3.8.0"
+summary = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+groups = ["default"]
+dependencies = [
+    "markdown-it-py>=2.2.0",
+    "pygments<3.0.0,>=2.13.0",
+    "typing-extensions<5.0,>=4.0.0; python_version < \"3.11\"",
+]
+files = [
+    {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"},
+    {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
+]
+
+[[package]]
+name = "rich-click"
+version = "1.8.5"
+requires_python = ">=3.7"
+summary = "Format click help output nicely with rich"
+groups = ["default"]
+dependencies = [
+    "click>=7",
+    "importlib-metadata; python_version < \"3.8\"",
+    "rich>=10.7",
+    "typing-extensions>=4",
+]
+files = [
+    {file = "rich_click-1.8.5-py3-none-any.whl", hash = "sha256:0fab7bb5b66c15da17c210b4104277cd45f3653a7322e0098820a169880baee0"},
+    {file = "rich_click-1.8.5.tar.gz", hash = "sha256:a3eebe81da1c9da3c32f3810017c79bd687ff1b3fa35bfc9d8a3338797f1d1a1"},
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.6"
@@ -520,7 +589,7 @@ name = "typing-extensions"
 version = "4.12.2"
 requires_python = ">=3.8"
 summary = "Backported and Experimental Type Hints for Python 3.8+"
-groups = ["test"]
+groups = ["default", "test"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
diff --git a/pyproject.toml b/pyproject.toml
index 7cfe6da..a21a2da 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "File oriented ASV benchmark comparer"
 authors = [
     {name = "Rohit Goswami", email = "rgoswami@ieee.org"},
 ]
-dependencies = ["asv @ git+https://github.com/airspeed-velocity/asv/", "click>=8.1.8", "asv-runner>=0.2.1", "polars>=1.20.0"]
+dependencies = ["asv @ git+https://github.com/airspeed-velocity/asv/", "click>=8.1.8", "asv-runner>=0.2.1", "polars>=1.20.0", "rich-click>=1.8.5"]
 requires-python = ">=3.12"
 readme = "README.md"
 license = {text = "MIT"}
diff --git a/src/asv_spyglass/_asv_ro.py b/src/asv_spyglass/_asv_ro.py
index 8e249e4..7e8e001 100644
--- a/src/asv_spyglass/_asv_ro.py
+++ b/src/asv_spyglass/_asv_ro.py
@@ -1,8 +1,6 @@
 import itertools
-import json
 import re
 from pathlib import Path
-from typing import Iterator, Union
 
 from asv.util import load_json as asv_json_load
 
@@ -14,19 +12,21 @@ class ReadOnlyASVBenchmarks:
 
     api_version = 2
 
-    def __init__(self, benchmarks_file: Path, regex: Union[str, list[str]] = None):
+    def __init__(self, benchmarks_file: Path, regex: str | list[str] = None):
         """
         Initialize and load benchmarks from a JSON file, optionally filtering them.
 
         Args:
             benchmarks_file (Path): Path to the benchmarks JSON file.
-            regex (Union[str, list[str]], optional): Regular expression(s) to filter benchmarks.
-                Defaults to None (all benchmarks included).
+            regex (Union[str, list[str]], optional): Regular expression(s) to
+                filter benchmarks.  Defaults to None (all benchmarks included).
         """
         d = asv_json_load(getstrform(benchmarks_file), api_version=self.api_version)
         self._base_benchmarks = {}  # Store all benchmarks here
         self._benchmark_selection = {}  # Track selected parameter combinations
-        self.filtered_benchmarks = {}  # Store selected benchmarks here after parameter expansion
+        self.filtered_benchmarks = (
+            {}
+        )  # Store selected benchmarks here after parameter expansion
 
         if not regex:
             regex = []
diff --git a/src/asv_spyglass/_num.py b/src/asv_spyglass/_num.py
new file mode 100644
index 0000000..0dfdf4d
--- /dev/null
+++ b/src/asv_spyglass/_num.py
@@ -0,0 +1,90 @@
+import math
+from dataclasses import dataclass
+
+
+@dataclass
+class BenchNum:
+    val: float
+    err: float | None
+    unit: str | None
+
+
+class Ratio:
+    """
+    Represents the ratio between two numeric values (t2 / t1), handling
+    potential issues like division by zero, NaN, and None values. It also
+    supports marking a ratio as insignificant.
+
+    Attributes:
+        _t1 (float | None): The first value (denominator).
+        _t2 (float | None): The second value (numerator).
+        val (float): The calculated ratio, which can be:
+            - The actual ratio if both _t1 and _t2 are valid numbers and _t1 is
+              not zero.
+            - math.inf if either _t1 or _t2 is None or NaN, or if _t1 is zero.
+        is_insignificant (bool): A flag indicating whether the ratio should be
+            considered statistically insignificant. Defaults to False.
+
+    Methods:
+        __init__(self, t1: float | None, t2: float | None):
+            Initializes a Ratio object, calculating the ratio if possible.
+
+        __repr__(self):
+            Returns a string representation of the ratio:
+            - "n/a" if the ratio is undefined (val is math.inf).
+            - "~" followed by the formatted ratio if is_insignificant is True.
+            - A formatted string with 2 decimal places otherwise.
+
+        _is_invalid(self, t1, t2):
+            A private helper method to check if the inputs are invalid for ratio
+            calculation.
+    """
+
+    def __init__(self, t1: float | None, t2: float | None):
+        """
+        Initializes a Ratio object.
+
+        Args:
+            t1 (float | None): The first value (denominator).
+            t2 (float | None): The second value (numerator).
+        """
+        self._t1 = t1
+        self._t2 = t2
+        self.is_insignificant = False
+        self.val = None
+
+        if self._is_invalid(t1, t2):
+            self.val = math.inf
+        else:
+            try:
+                self.val = t2 / t1
+            except ZeroDivisionError:
+                self.val = math.inf
+
+    def __repr__(self):
+        """
+        Returns a string representation of the ratio.
+
+        If val is math.inf, returns "n/a".
+        If is_insignificant is True, returns "~" followed by the formatted ratio.
+        Otherwise, returns the ratio formatted to 2 decimal places.
+        """
+        if self.val == math.inf:
+            return "n/a"
+        elif self.is_insignificant:
+            return "~" + f"{self.val:6.2f}".strip()
+        else:
+            return f"{self.val:6.2f}"
+
+    def _is_invalid(self, t1, t2):
+        """
+        Checks if the inputs are invalid for ratio calculation.
+
+        Args:
+            t1 (float | None): The first value.
+            t2 (float | None): The second value.
+
+        Returns:
+            bool: True if either t1 or t2 is None or NaN, False otherwise.
+        """
+        return t1 is None or t2 is None or math.isnan(t1) or math.isnan(t2)
diff --git a/src/asv_spyglass/changes.py b/src/asv_spyglass/changes.py
new file mode 100644
index 0000000..2b5a28f
--- /dev/null
+++ b/src/asv_spyglass/changes.py
@@ -0,0 +1,95 @@
+import enum
+from dataclasses import dataclass
+
+
+class ResultColor(enum.StrEnum):
+    DEFAULT = "white"
+    GREEN = enum.auto()
+    RED = enum.auto()
+    LIGHTGREY = "light_grey"
+
+
+class ResultMark(enum.StrEnum):
+    BETTER = "-"
+    WORSE = "+"
+    FAILURE = "!"
+    FIXED = "*"
+    INCOMPARABLE = "x"
+    UNCHANGED = " "
+    INSIGNIFICANT = "~"
+
+
+class AfterIs(enum.Enum):
+    LUKEWARM = 0
+    WORSE = -1
+    BETTER = 1
+
+
+@dataclass
+class ASVChangeInfo:
+    mark: ResultMark
+    color: ResultColor
+    description: str
+    state: AfterIs
+    before: str
+    after: str
+
+
+@dataclass
+class Incomparable(ASVChangeInfo):
+    mark: ResultMark = ResultMark.INCOMPARABLE
+    color: ResultColor = ResultColor.LIGHTGREY
+    description: str = "Not comparable"
+    state: AfterIs = AfterIs.LUKEWARM
+    before: str = ""
+    after: str = ""
+
+
+@dataclass
+class Failure(ASVChangeInfo):
+    mark: ResultMark = ResultMark.FAILURE
+    color: ResultColor = ResultColor.RED
+    description: str = "Introduced a failure"
+    state: AfterIs = AfterIs.WORSE
+    before: str = "Succeeded"
+    after: str = "Failed"
+
+
+@dataclass
+class Fixed(ASVChangeInfo):
+    mark: ResultMark = ResultMark.FIXED
+    color: ResultColor = ResultColor.GREEN
+    description: str = "Fixed a failure"
+    state: AfterIs = AfterIs.BETTER
+    before: str = "Failed"
+    after: str = "Succeeded"
+
+
+@dataclass
+class NoChange(ASVChangeInfo):
+    mark: ResultMark = ResultMark.UNCHANGED
+    color: ResultColor = ResultColor.DEFAULT
+    description: str = "Both failed or either was skipped or no significant change"
+    state: AfterIs = AfterIs.LUKEWARM
+    before: str = ""
+    after: str = ""
+
+
+@dataclass
+class Better(ASVChangeInfo):
+    mark: ResultMark = ResultMark.BETTER
+    color: ResultColor = ResultColor.GREEN
+    description: str = "Relative improvement"
+    state: AfterIs = AfterIs.BETTER
+    before: str = "Worse"
+    after: str = "Better"
+
+
+@dataclass
+class Worse(ASVChangeInfo):
+    mark: ResultMark = ResultMark.WORSE
+    color: ResultColor = ResultColor.RED
+    description: str = "Relatively worse"
+    state: AfterIs = AfterIs.WORSE
+    before: str = "Better"
+    after: str = "Worse"
diff --git a/src/asv_spyglass/cli.py b/src/asv_spyglass/cli.py
index c26f6f9..910a50a 100644
--- a/src/asv_spyglass/cli.py
+++ b/src/asv_spyglass/cli.py
@@ -3,13 +3,17 @@
 import click
 import polars as pl
 from asv import results
+from rich import box
+from rich.console import Console
+from rich.table import Table
+from rich_click import RichCommand, RichGroup
 
 from asv_spyglass._asv_ro import ReadOnlyASVBenchmarks
-from asv_spyglass._aux import getstrform
+from asv_spyglass.changes import ResultMark
 from asv_spyglass.compare import ResultPreparer, do_compare
 
 
-@click.group()
+@click.group(cls=RichGroup)
 def cli():
     """
     Command-line interface for ASV benchmark analysis.
@@ -17,7 +21,7 @@ def cli():
     pass
 
 
-@cli.command()
+@cli.command(cls=RichCommand)
 @click.argument("b1", type=click.Path(exists=True), required=True)
 @click.argument("b2", type=click.Path(exists=True), required=True)
 @click.argument("bconf", type=click.Path(exists=True), required=True)
@@ -44,14 +48,65 @@ def cli():
     show_default=True,
     help="Sort output by change, ratio, or name.",
 )
-def compare(b1, b2, bconf, factor, split, only_changed, sort):  # Renamed to 'compare'
+def compare(b1, b2, bconf, factor, split, only_changed, sort):
     """
     Compare two ASV result files.
     """
-    print(do_compare(b1, b2, bconf, factor, split, only_changed, sort))
+    all_tables = do_compare(b1, b2, bconf, factor, split, only_changed, sort)
 
+    console = Console()
 
-@cli.command()
+    for key, table_data in all_tables.items():
+        if not only_changed:
+            console.print("")
+            console.print(table_data["title"], style="bold")
+            console.print("")
+
+        table = Table(
+            title=table_data["title"],
+            show_header=True,
+            header_style="bold magenta",
+            box=box.SIMPLE,
+        )
+
+        for header in table_data["headers"]:
+            table.add_column(
+                header, justify="right" if header != "Benchmark (Parameter)" else "left"
+            )
+
+        for row in table_data["table_data"]:
+            change_mark = row[0]
+            row_style = ""
+
+            # Determine row style based on change_mark
+            if change_mark == ResultMark.BETTER:
+                row_style = "green"
+            elif change_mark == ResultMark.WORSE:
+                row_style = "red"
+            elif change_mark == ResultMark.FAILURE:
+                row_style = "red"
+            elif change_mark == ResultMark.FIXED:
+                row_style = "green"
+            elif change_mark == ResultMark.INCOMPARABLE:
+                row_style = "light_grey"
+            elif change_mark == ResultMark.UNCHANGED:
+                row_style = "white"
+            elif change_mark == ResultMark.INSIGNIFICANT:
+                row_style = "white"
+
+            table.add_row(*row, style=row_style)
+
+        console.print(table)
+
+        # Print summary of worsened/improved status
+        if not split:
+            if (av_x := sum([x.value for x in table_data["states"]])) > 0:
+                console.print("[bold green]Net Improvement![/]")
+            elif av_x < 0:
+                console.print("[bold red]Net Regression![/]")
+
+
+@cli.command(cls=RichCommand)
 @click.argument("bres", type=click.Path(exists=True), required=True)
 @click.argument("bdat", type=click.Path(exists=True), required=True)
 @click.option(
diff --git a/src/asv_spyglass/compare.py b/src/asv_spyglass/compare.py
index ab91c4c..30e43e2 100644
--- a/src/asv_spyglass/compare.py
+++ b/src/asv_spyglass/compare.py
@@ -1,17 +1,23 @@
-import math
 from pathlib import Path
 
 import polars as pl
-import tabulate
 from asv import results
 from asv.commands.compare import _is_result_better, _isna, unroll_result
-from asv.console import log
 from asv.util import human_value
-from asv_runner.console import color_print
-from asv_runner.statistics import get_err
 
 from asv_spyglass._asv_ro import ReadOnlyASVBenchmarks
-from asv_spyglass.results import PreparedResult, result_iter
+from asv_spyglass._num import Ratio
+from asv_spyglass.changes import (
+    ASVChangeInfo,
+    Better,
+    Failure,
+    Fixed,
+    Incomparable,
+    NoChange,
+    ResultMark,
+    Worse,
+)
+from asv_spyglass.results import ASVBench, PreparedResult, result_iter
 
 
 class ResultPreparer:
@@ -88,229 +94,241 @@ def prepare(self, result_data):
         )
 
 
-def do_compare(
-    b1,
-    b2,
-    bdat,
-    factor=1.1,
-    split=False,
-    only_changed=False,
-    sort="default",
-    machine=None,
-    env_spec=None,
-    use_stats=True,
-):
-    # Load results
-    res_1 = results.Results.load(b1)
-    res_2 = results.Results.load(b2)
+def _get_change_info(
+    asv1: ASVBench, asv2: ASVBench, factor, use_stats
+) -> ASVChangeInfo:
+    if (
+        asv1.version is not None
+        and asv2.version is not None
+        and asv1.version != asv2.version
+    ):
+        # not comparable
+        return Incomparable()
+    elif asv1.time is not None and asv2.time is None:
+        # introduced a failure
+        return Failure()
+    elif asv1.time is None and asv2.time is not None:
+        # fixed a failure
+        return Fixed()
+    elif asv1.time is None and asv2.time is None:
+        # both failed
+        return NoChange()
+    elif _isna(asv1.time) or _isna(asv2.time):
+        # either one was skipped
+        return NoChange()
+    elif _is_result_better(
+        asv2.time,
+        asv1.time,
+        asv2.stats_n_samples,
+        asv1.stats_n_samples,
+        factor,
+        use_stats=use_stats,
+    ):
+        return Better()
+    elif _is_result_better(
+        asv1.time,
+        asv2.time,
+        asv1.stats_n_samples,
+        asv2.stats_n_samples,
+        factor,
+        use_stats=use_stats,
+    ):
+        return Worse()
+    else:
+        return NoChange()
 
-    # Initialize benchmarks
-    benchmarks = ReadOnlyASVBenchmarks(Path(bdat)).benchmarks
 
-    # Prepare results using the ResultPreparer class
-    preparer = ResultPreparer(benchmarks)
-    prepared_results_1 = preparer.prepare(res_1)
-    prepared_results_2 = preparer.prepare(res_2)
-    # Kanged from compare.py
-
-    # Extract data from prepared results
-    results_1 = prepared_results_1.results
-    results_2 = prepared_results_2.results
-    ss_1 = prepared_results_1.stats
-    ss_2 = prepared_results_2.stats
-    versions_1 = prepared_results_1.versions
-    versions_2 = prepared_results_2.versions
-    units = prepared_results_1.units
-
-    machine_env_names = set()
-    mname_1 = f"{prepared_results_1.machine_name}/{prepared_results_1.env_name}"
-    mname_2 = f"{prepared_results_2.machine_name}/{prepared_results_2.env_name}"
-    machine_env_names.add(mname_1)
-    machine_env_names.add(mname_2)
-
-    benchmarks_1 = set(results_1.keys())
-    benchmarks_2 = set(results_2.keys())
-    joint_benchmarks = sorted(list(benchmarks_1 | benchmarks_2))
-    bench = {}
+def _create_comparison_dataframe(
+    pr1: PreparedResult,
+    pr2: PreparedResult,
+    factor: float,
+    only_changed: bool,
+    use_stats: bool,
+):
+    """
+    Creates a Polars DataFrame comparing results from two PreparedResult objects.
 
-    if split:
-        bench["green"] = []
-        bench["red"] = []
-        bench["lightgrey"] = []
-        bench["default"] = []
-    else:
-        bench["all"] = []
+    Args:
+        pr1 (PreparedResult): The first PreparedResult object.
+        pr2 (PreparedResult): The second PreparedResult object.
+        factor (float): The factor used for determining significance.
+        only_changed (bool): Whether to only include changed benchmarks.
+        use_stats (bool): Whether to use statistical significance.
 
-    worsened = False
-    improved = False
+    Returns:
+        pl.DataFrame: A DataFrame with comparison data.
+    """
+    data = []
+    machine_env_names = {
+        f"{pr1.machine_name}/{pr1.env_name}",
+        f"{pr2.machine_name}/{pr2.env_name}",
+    }
+
+    for benchmark in sorted(set(pr1.results.keys()) | set(pr2.results.keys())):
+        asv1 = ASVBench(benchmark, pr1)
+        asv2 = ASVBench(benchmark, pr2)
+
+        ratio = Ratio(asv1.time, asv2.time)
+        diffinfo = _get_change_info(
+            asv1,
+            asv2,
+            factor,
+            use_stats,
+        )
 
-    for benchmark in joint_benchmarks:
-        if benchmark in results_1:
-            time_1 = results_1[benchmark]
-        else:
-            time_1 = math.nan
+        if isinstance(diffinfo, NoChange):
+            # Mark statistically insignificant results
+            if _is_result_better(
+                asv1.time, asv2.time, None, None, factor
+            ) or _is_result_better(asv2.time, asv1.time, None, None, factor):
+                ratio.is_insignificant = True
+
+        if only_changed and diffinfo.mark in (
+            ResultMark.UNCHANGED,
+            ResultMark.INCOMPARABLE,
+            ResultMark.FIXED,
+        ):
+            continue
 
-        if benchmark in results_2:
-            time_2 = results_2[benchmark]
+        # Determine benchmark name format
+        if len(machine_env_names) > 1:
+            benchmark_name = (
+                f"{benchmark} [{pr1.machine_name}/{pr1.env_name}"
+                f" -> {pr2.machine_name}/{pr2.env_name}]"
+            )
         else:
-            time_2 = math.nan
+            benchmark_name = benchmark
 
-        if benchmark in ss_1 and ss_1[benchmark][0]:
-            err_1 = get_err(time_1, ss_1[benchmark][0])
-        else:
-            err_1 = None
+        assert asv1.unit == asv2.unit, "Units for benchmark must match"
+
+        data.append(
+            {
+                "benchmark": benchmark_name,
+                "mark": diffinfo.mark,
+                "before": asv1.time,
+                "after": asv2.time,
+                "ratio": ratio.val,
+                "is_insignificant": ratio.is_insignificant,
+                "err_before": asv1.err,
+                "err_after": asv2.err,
+                "unit": asv1.unit or asv2.unit,  # Use unit from asv1 or asv2
+                "color": diffinfo.color.name.lower(),
+                "state": diffinfo.state,
+            }
+        )
 
-        if benchmark in ss_2 and ss_2[benchmark][0]:
-            err_2 = get_err(time_2, ss_2[benchmark][0])
-        else:
-            err_2 = None
+    return pl.DataFrame(data)
 
-        version_1 = versions_1.get(benchmark)
-        version_2 = versions_2.get(benchmark)
 
-        if _isna(time_1) or _isna(time_2):
-            ratio = "n/a"
-            ratio_num = 1e9
-        else:
-            try:
-                ratio_num = time_2 / time_1
-                ratio = f"{ratio_num:6.2f}"
-            except ZeroDivisionError:
-                ratio_num = 1e9
-                ratio = "n/a"
-
-        if version_1 is not None and version_2 is not None and version_1 != version_2:
-            # not comparable
-            color = "lightgrey"
-            mark = "x"
-        elif time_1 is not None and time_2 is None:
-            # introduced a failure
-            color = "red"
-            mark = "!"
-            worsened = True
-        elif time_1 is None and time_2 is not None:
-            # fixed a failure
-            color = "green"
-            mark = " "
-            improved = True
-        elif time_1 is None and time_2 is None:
-            # both failed
-            color = "default"
-            mark = " "
-        elif _isna(time_1) or _isna(time_2):
-            # either one was skipped
-            color = "default"
-            mark = " "
-        elif _is_result_better(
-            time_2,
-            time_1,
-            ss_2.get(benchmark),
-            ss_1.get(benchmark),
-            factor,
-            use_stats=use_stats,
-        ):
-            color = "green"
-            mark = "-"
-            improved = True
-        elif _is_result_better(
-            time_1,
-            time_2,
-            ss_1.get(benchmark),
-            ss_2.get(benchmark),
-            factor,
-            use_stats=use_stats,
-        ):
-            color = "red"
-            mark = "+"
-            worsened = True
-        else:
-            color = "default"
-            mark = " "
+def _format_comparison_tables(
+    df: pl.DataFrame, sort: str, name_1: str, name_2: str, split: bool
+):
+    """
+    Formats a comparison DataFrame into tables for display.
 
-            # Mark statistically insignificant results
-            if _is_result_better(
-                time_1, time_2, None, None, factor
-            ) or _is_result_better(time_2, time_1, None, None, factor):
-                ratio = "~" + ratio.strip()
+    Args:
+        df (pl.DataFrame): The comparison DataFrame.
+        sort (str): The sorting method ("ratio" or "name").
+        name_1 (str): The name of the first commit/result.
+        name_2 (str): The name of the second commit/result.
 
-        if only_changed and mark in (" ", "x"):
-            continue
+    Returns:
+        dict: A dictionary of formatted tables.
+    """
 
-        unit = units[benchmark]
+    # Sort the DataFrame
+    if sort == "ratio":
+        df = df.sort("ratio", descending=True)
+    elif sort == "name":
+        df = df.sort("benchmark")
 
-        details = "{0:1s} {1:>15s}  {2:>15s} {3:>8s}  ".format(
-            mark,
-            human_value(time_1, unit, err=err_1),
-            human_value(time_2, unit, err=err_2),
-            ratio,
-        )
-        split_line = details.split()
-        if len(machine_env_names) > 1:
-            benchmark_name = f"{benchmark} [{mname_1} -> {mname_2}]"
-        else:
-            benchmark_name = benchmark
-        if len(split_line) == 4:
-            split_line += [benchmark_name]
-        else:
-            split_line = [" "] + split_line + [benchmark_name]
-        if split:
-            bench[color].append(split_line)
-        else:
-            bench["all"].append(split_line)
+    # Construct the table data for each category
+    all_tables = {}
 
     if split:
-        keys = ["green", "default", "red", "lightgrey"]
+        colors = ["green", "default", "red", "lightgrey"]
     else:
-        keys = ["all"]
+        colors = ["all"]
+        df = df.with_columns(pl.lit("all").alias("color"))
 
-    titles = {}
-    titles["green"] = "Benchmarks that have improved:"
-    titles["default"] = "Benchmarks that have stayed the same:"
-    titles["red"] = "Benchmarks that have got worse:"
-    titles["lightgrey"] = "Benchmarks that are not comparable:"
-    titles["all"] = "All benchmarks:"
+    for color in colors:
+        if color != "all":
+            filtered_df = df.filter(pl.col("color") == color)
+        else:
+            filtered_df = df
+
+        if not filtered_df.is_empty():
+            table_data = []
+            for row in filtered_df.iter_rows(named=True):
+                table_data.append(
+                    [
+                        str(row["mark"]),
+                        human_value(row["before"], row["unit"], err=row["err_before"]),
+                        human_value(row["after"], row["unit"], err=row["err_after"]),
+                        str(Ratio(row["before"], row["after"])),
+                        row["benchmark"],
+                    ]
+                )
+            if color == "all":
+                title = "All benchmarks:"
+            else:
+                title = {
+                    "green": "Benchmarks that have improved:",
+                    "default": "Benchmarks that have stayed the same:",
+                    "red": "Benchmarks that have got worse:",
+                    "lightgrey": "Benchmarks that are not comparable:",
+                }[color]
+
+            all_tables[color] = {
+                "title": title,
+                "headers": [
+                    "Change",
+                    f"Before {name_1}",
+                    f"After {name_2}",
+                    "Ratio",
+                    "Benchmark (Parameter)",
+                ],
+                "table_data": table_data,
+                "states": filtered_df.select(pl.col("state")).to_series().to_list(),
+            }
+
+    return all_tables
 
-    log.flush()
 
-    for key in keys:
-        if len(bench[key]) == 0:
-            continue
+def do_compare(
+    b1,
+    b2,
+    bdat,
+    factor=1.1,
+    split=False,
+    only_changed=False,
+    sort="default",
+    machine=None,
+    env_spec=None,
+    use_stats=True,
+):
+    # Load results
+    res_1 = results.Results.load(b1)
+    res_2 = results.Results.load(b2)
 
-        if not only_changed:
-            color_print("")
-            color_print(titles[key])
-            color_print("")
+    # Initialize benchmarks
+    benchmarks = ReadOnlyASVBenchmarks(Path(bdat)).benchmarks
 
-        name_1 = False  # commit_names.get(hash_1)
-        if name_1:
-            name_1 = f"<{name_1}>"
-        else:
-            name_1 = ""
+    # Prepare results
+    preparer = ResultPreparer(benchmarks)
+    pr1 = preparer.prepare(res_1)
+    pr2 = preparer.prepare(res_2)
 
-        name_2 = False  # commit_names.get(hash_2)
-        if name_2:
-            name_2 = f"<{name_2}>"
-        else:
-            name_2 = ""
-
-        if sort == "default":
-            pass
-        elif sort == "ratio":
-            bench[key].sort(key=lambda v: v[3], reverse=True)
-        elif sort == "name":
-            bench[key].sort(key=lambda v: v[2])
-        else:
-            raise ValueError("Unknown 'sort'")
-
-        print(worsened, improved)
-        return tabulate.tabulate(
-            bench[key],
-            headers=[
-                "Change",
-                f"Before {name_1}",
-                f"After {name_2}",
-                "Ratio",
-                "Benchmark (Parameter)",
-            ],
-            tablefmt="github",
-        )
+    # Create the comparison DataFrame
+    df = _create_comparison_dataframe(pr1, pr2, factor, only_changed, use_stats)
+
+    # Get commit names or use empty strings
+    name_1 = ""  # commit_names.get(hash_1, "")
+    name_2 = ""  # commit_names.get(hash_2, "")
+    name_1 = f"<{name_1}>" if name_1 else ""
+    name_2 = f"<{name_2}>" if name_2 else ""
+
+    # Format the DataFrame into tables
+    all_tables = _format_comparison_tables(df, sort, name_1, name_2, split)
+
+    return all_tables
diff --git a/src/asv_spyglass/results.py b/src/asv_spyglass/results.py
index 826f41a..9e2c812 100644
--- a/src/asv_spyglass/results.py
+++ b/src/asv_spyglass/results.py
@@ -1,11 +1,11 @@
 import dataclasses
+import math
 import re
 from collections import namedtuple
+from dataclasses import dataclass, field
 
 import polars as pl
-from asv import results
-
-from asv_spyglass._asv_ro import ReadOnlyASVBenchmarks
+from asv_runner.statistics import get_err
 
 ASVResult = namedtuple(
     "ASVResult",
@@ -41,7 +41,7 @@ def result_iter(bdot):
         )
 
 
-@dataclasses.dataclass
+@dataclass
 class PreparedResult:
     """Augmented with information from the benchmarks.json"""
 
@@ -54,8 +54,8 @@ class PreparedResult:
     param_names: list
 
     def __iter__(self):
-        for field in dataclasses.fields(self):
-            yield getattr(self, field.name)
+        for _field in dataclasses.fields(self):
+            yield getattr(self, _field.name)
 
     def to_df(self):
         """
@@ -111,3 +111,23 @@ def to_df(self):
             data.append(row)
 
         return pl.DataFrame(data)
+
+
+@dataclass
+class ASVBench:
+    name: str
+    _pr: PreparedResult
+    time: float = field(init=False)
+    stats_n_samples: tuple = field(init=False)
+    err: float | None = field(init=False)
+    version: str | None = field(init=False)
+    unit: str | None = field(init=False)
+
+    def __post_init__(self):
+        self.time = self._pr.results.get(self.name, math.nan)
+        self.stats_n_samples = self._pr.stats.get(self.name, (None,))
+        self.err = None
+        if self.name in self._pr.stats and self.stats_n_samples[0]:
+            self.err = get_err(self.time, self.stats_n_samples[0])
+        self.version = self._pr.versions.get(self.name)
+        self.unit = self._pr.units.get(self.name)
diff --git a/tests/approved_files/test_results.test_do_compare.approved.txt b/tests/approved_files/test_results.test_do_compare.approved.txt
index 3f502e9..0d762d7 100644
--- a/tests/approved_files/test_results.test_do_compare.approved.txt
+++ b/tests/approved_files/test_results.test_do_compare.approved.txt
@@ -1,3 +1,14 @@
-| Change   | Before    | After      |   Ratio | Benchmark (Parameter)                                                                                 |
-|----------|-----------|------------|---------|-------------------------------------------------------------------------------------------------------|
-| -        | 94.8±30μs | 28.4±0.2μs |     0.3 | benchmarks.TimeSuite.time_add_arr [rgx1gen11/conda-py3.11-numpy -> rgx1gen11/virtualenv-py3.12-numpy] |
+{'all': {'headers': ['Change',
+                     'Before ',
+                     'After ',
+                     'Ratio',
+                     'Benchmark (Parameter)'],
+         'states': [<AfterIs.BETTER: 1>],
+         'table_data': [['-',
+                         '94.8±30μs',
+                         '28.4±0.2μs',
+                         '  0.30',
+                         'benchmarks.TimeSuite.time_add_arr '
+                         '[rgx1gen11/conda-py3.11-numpy -> '
+                         'rgx1gen11/virtualenv-py3.12-numpy]']],
+         'title': 'All benchmarks:'}}
diff --git a/tests/test_num.py b/tests/test_num.py
new file mode 100644
index 0000000..a3e69bc
--- /dev/null
+++ b/tests/test_num.py
@@ -0,0 +1,58 @@
+import math
+
+from asv_spyglass._num import Ratio
+
+
+def test_ratio_normal_calculation():
+    ratio = Ratio(t1=10, t2=5)
+    assert ratio.val == 0.5
+    assert str(ratio) == "  0.50"
+
+
+def test_ratio_t2_bigger():
+    ratio = Ratio(t1=5, t2=10)
+    assert ratio.val == 2.0
+    assert str(ratio) == "  2.00"
+
+
+def test_ratio_division_by_zero():
+    ratio = Ratio(t1=0, t2=10)
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_t1_is_nan():
+    ratio = Ratio(t1=float("nan"), t2=10)
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_t2_is_nan():
+    ratio = Ratio(t1=10, t2=float("nan"))
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_both_nan():
+    ratio = Ratio(t1=float("nan"), t2=float("nan"))
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_t1_none():
+    ratio = Ratio(t1=None, t2=10)
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_t2_none():
+    ratio = Ratio(t1=10, t2=None)
+    assert ratio.val == math.inf
+    assert str(ratio) == "n/a"
+
+
+def test_ratio_is_insignificant():
+    ratio = Ratio(t1=10, t2=500)
+    ratio.is_insignificant = True
+    assert ratio.val == 50
+    assert str(ratio) == '~50.00'
diff --git a/tests/test_results.py b/tests/test_results.py
index d9e80db..2aad977 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -21,10 +21,12 @@ def test_result_iter(shared_datadir):
 
 def test_do_compare(shared_datadir):
     verify(
-        do_compare(
-            getstrform(shared_datadir / "a0f29428-conda-py3.11-numpy.json"),
-            getstrform(shared_datadir / "a0f29428-virtualenv-py3.12-numpy.json"),
-            shared_datadir / "asv_samples_a0f29428_benchmarks.json",
+        pp.pformat(
+            do_compare(
+                getstrform(shared_datadir / "a0f29428-conda-py3.11-numpy.json"),
+                getstrform(shared_datadir / "a0f29428-virtualenv-py3.12-numpy.json"),
+                shared_datadir / "asv_samples_a0f29428_benchmarks.json",
+            )
         )
     )
 
diff --git a/tests/test_ro.py b/tests/test_ro.py
index c35afbd..0bf29ea 100644
--- a/tests/test_ro.py
+++ b/tests/test_ro.py
@@ -1,5 +1,4 @@
 import pprint as pp
-from pathlib import Path
 
 from approvaltests.approvals import verify