Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "File oriented ASV benchmark comparer"
authors = [
{name = "Rohit Goswami", email = "[email protected]"},
]
dependencies = ["asv @ git+https://github.com/airspeed-velocity/asv/", "click>=8.1.8", "asv-runner>=0.2.1", "polars>=1.20.0"]
dependencies = ["asv @ git+https://github.com/airspeed-velocity/asv/", "click>=8.1.8", "asv-runner>=0.2.1", "polars>=1.20.0", "rich-click>=1.8.5"]
requires-python = ">=3.12"
readme = "README.md"
license = {text = "MIT"}
Expand Down
12 changes: 6 additions & 6 deletions src/asv_spyglass/_asv_ro.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import itertools
import json
import re
from pathlib import Path
from typing import Iterator, Union

from asv.util import load_json as asv_json_load

Expand All @@ -14,19 +12,21 @@ class ReadOnlyASVBenchmarks:

api_version = 2

def __init__(self, benchmarks_file: Path, regex: Union[str, list[str]] = None):
def __init__(self, benchmarks_file: Path, regex: str | list[str] = None):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing None in the type

Suggested change
def __init__(self, benchmarks_file: Path, regex: str | list[str] = None):
def __init__(self, benchmarks_file: Path, regex: str | list[str] | None = None):

"""
Initialize and load benchmarks from a JSON file, optionally filtering them.

Args:
benchmarks_file (Path): Path to the benchmarks JSON file.
regex (Union[str, list[str]], optional): Regular expression(s) to filter benchmarks.
Defaults to None (all benchmarks included).
regex (Union[str, list[str]], optional): Regular expression(s) to
filter benchmarks. Defaults to None (all benchmarks included).
"""
d = asv_json_load(getstrform(benchmarks_file), api_version=self.api_version)
self._base_benchmarks = {} # Store all benchmarks here
self._benchmark_selection = {} # Track selected parameter combinations
self.filtered_benchmarks = {} # Store selected benchmarks here after parameter expansion
self.filtered_benchmarks = (
{}
) # Store selected benchmarks here after parameter expansion

if not regex:
regex = []
Expand Down
90 changes: 90 additions & 0 deletions src/asv_spyglass/_num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import math
from dataclasses import dataclass


@dataclass
class BenchNum:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BenchNum doesn't seem to be used anywhere, we can delete it.

val: float
err: float | None
unit: str | None


class Ratio:
"""
Represents the ratio between two numeric values (t2 / t1), handling
potential issues like division by zero, NaN, and None values. It also
supports marking a ratio as insignificant.
Attributes:
_t1 (float | None): The first value (denominator).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to the logic, but the documenting side of things - I use VSCode and I've never used PyCharm, so I don't know if I'm not maybe jsut missing out on something.

Do you use an IDE where the attributes / methods are documented when using this notation for parameters / methods?

As an example, since I use VSCode (as Github codespace in this case), I don't get hints on hover.

Screenshot 2025-05-30 at 07 03 43

To get hints on hover on VSCode, the attributes / methods need to have their docstrings set:

Screenshot 2025-05-30 at 07 06 48

Or like this

Screenshot 2025-05-30 at 07 05 57

Or maybe there is a VSCode plugin I'm not familiar with?

_t2 (float | None): The second value (numerator).
val (float): The calculated ratio, which can be:
- The actual ratio if both _t1 and _t2 are valid numbers and _t1 is
not zero.
- math.inf if either _t1 or _t2 is None or NaN, or if _t1 is zero.
is_insignificant (bool): A flag indicating whether the ratio should be
considered statistically insignificant. Defaults to False.
Methods:
__init__(self, t1: float | None, t2: float | None):
Initializes a Ratio object, calculating the ratio if possible.
__repr__(self):
Returns a string representation of the ratio:
- "n/a" if the ratio is undefined (val is math.inf).
- "~" followed by the formatted ratio if is_insignificant is True.
- A formatted string with 2 decimal places otherwise.
_is_invalid(self, t1, t2):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, just opinion, but documenting the private fields on the class docstring seems redundant - IMO makes more sense not to mention it here, so we have the freedom to remove _is_invalid as needed, without having peopele being reliant on it too much.

A private helper method to check if the inputs are invalid for ratio
calculation.
"""

def __init__(self, t1: float | None, t2: float | None):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw more generally, does it make sense to support None? I see two uses of this class in production code:

In the first case Mypy says that the values will always be float (math.nan is the default).

Can't tell what the types are for the dataframe in the second case for the values "before" / "after". But in the case that even there the defaults are math.nan (or similar), and there are no None values, we could simplify the signature to:

def __init__(self, t1: float, t2: float):

"""
Initializes a Ratio object.
Args:
t1 (float | None): The first value (denominator).
t2 (float | None): The second value (numerator).
"""
self._t1 = t1
self._t2 = t2
self.is_insignificant = False
self.val = None

if self._is_invalid(t1, t2):
self.val = math.inf

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the value of val is set only once and only at creation, maybe we could make this object immutable with a namedtuple or a dataclass? To prevent the possiblity of putting this object into invalid state. Since there is post-processing in the __init__ method, it would have to be a dataclass:

@dataclass(frozen=True)
class Ratio:
    ...

In that case this if / else logic would be moved to __post_init__

else:
try:
self.val = t2 / t1
except ZeroDivisionError:
self.val = math.inf

def __repr__(self):
"""
Returns a string representation of the ratio.
If val is math.inf, returns "n/a".
If is_insignificant is True, returns "~" followed by the formatted ratio.
Otherwise, returns the ratio formatted to 2 decimal places.
"""
if self.val == math.inf:
return "n/a"
elif self.is_insignificant:
return "~" + f"{self.val:6.2f}".strip()
else:
return f"{self.val:6.2f}"

def _is_invalid(self, t1, t2):
"""
Checks if the inputs are invalid for ratio calculation.
Args:
t1 (float | None): The first value.
t2 (float | None): The second value.
Returns:
bool: True if either t1 or t2 is None or NaN, False otherwise.
"""
return t1 is None or t2 is None or math.isnan(t1) or math.isnan(t2)
95 changes: 95 additions & 0 deletions src/asv_spyglass/changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import enum
from dataclasses import dataclass


class ResultColor(enum.StrEnum):
DEFAULT = "white"
GREEN = enum.auto()
RED = enum.auto()
LIGHTGREY = "light_grey"


class ResultMark(enum.StrEnum):
BETTER = "-"
WORSE = "+"
FAILURE = "!"
FIXED = "*"
INCOMPARABLE = "x"
UNCHANGED = " "
INSIGNIFICANT = "~"


class AfterIs(enum.Enum):
LUKEWARM = 0
WORSE = -1
BETTER = 1


@dataclass
class ASVChangeInfo:
mark: ResultMark
color: ResultColor
description: str
state: AfterIs
before: str
after: str


@dataclass
class Incomparable(ASVChangeInfo):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in Python it makes more sense to not subclass ASVChangeInfo in this case, but rather to make instances of ASVChangeInfo. Because when you are subclassing in Python then you have no obligation to define all the attributes.

In other words, the language would allow even something like this, where we would define only a single attribute:

@dataclass
class Incomparable(ASVChangeInfo):
    mark: ResultMark = ResultMark.INCOMPARABLE

But to ensure that we are forced to set all the fields, we should make instances of ASVChangeInfo instead:

def incomparable():
    return ASVChangeInfo(
        mark=ResultMark.INCOMPARABLE,
        color=ResultColor.LIGHTGREY,
        description="Not comparable",
        state=AfterIs.LUKEWARM,
        before="",
        after="",
    )

Building on top of this, we can leverage Mypy even further to catch and prevent errors. Right now ResultMark has 7 states, but nothing is forcing us to define the ASVChangeInfo for all 7 states. We have to implicitly remember that.

Instead, we can make Mypy to raise error if we forget to define ASVChangeInfo for any of the states.

To do that, we can make a dictionary with dict[ResultMark, ASVChangeInfo] type:

CHANGE_INFO_TYPES: dict[ResultMark, ASVChangeInfo] = {
    ResultMark.INCOMPARABLE: ASVChangeInfo(
        mark=ResultMark.INCOMPARABLE,
        color=ResultColor.LIGHTGREY,
        description="Not comparable",
        state=AfterIs.LUKEWARM,
        before="",
        after="",
    ),
    ResultMark.FAILURE: ASVChangeInfo(
        mark=ResultMark.FAILURE,
        color=ResultColor.RED,
        description="Introduced a failure",
        state=AfterIs.WORSE,
        before=""Succeeded,
        after="Failed",
    ),
    ...
}

And then, when accessing the values in get_change_info(), instead of:

# not comparable
return Incomparable()

We do:

# not comparable
return CHANGE_INFO_TYPES[ResultMark.INCOMPARABLE]

It's a bit more verbose, but reduces the mental load - we don't have to remember that we need to update the states when we update the ResultMark enum the next time. The IDE will highlight this error automatically.


Next step goes a bit more abstract. But to get even more type safety, even the _get_change_info() function could be refactored so that Mypy forces us to handle all cases explicitly.

Again we would use a dictionary with enum keys for that. But in this case the values would be the checks. It could look like this:

CHANGE_INFO_CHECKS: dict[
    ResultMark,
    Callable[[ASVBench, ASVBench, float, bool], bool],
] = {
    ResultMark.INCOMPARABLE: lambda asv1, asv2, factor, use_stats: (
        asv1.version is not None
        and asv2.version is not None
        and asv1.version != asv2.version
    ),
    ...
    # Catch all
    ResultMark.UNCHANGED: lambda *a: True,
}

def _get_change_info(
    asv1: ASVBench, asv2: ASVBench, factor, use_stats
) -> ASVChangeInfo:
    for key, predicate in CHANGE_INFO_CHECKS.items():
        if predicate(asv1, asv2, factor, use_stats):
            return CHANGE_INFO_TYPES[key]

    # Sanity check - Should never be reached
    raise ValueError("No ResultMark matched")

And if we already had both CHANGE_INFO_CHECKS and CHANGE_INFO_TYPES, we could actually combine them:

class ChangeInfo(NamedTuple):
    predicate: Callable[[ASVBench, ASVBench, float, bool], bool]
    info: ASVChangeInfo

CHANGE_INFO: dict[ResultMark, ChangeInfo] = {
    ResultMark.INCOMPARABLE: ChangeInfo(
        predicate=lambda asv1, asv2, factor, use_stats: (
            asv1.version is not None
            and asv2.version is not None
            and asv1.version != asv2.version
        ),
        info=ASVChangeInfo(
            mark=ResultMark.INCOMPARABLE,
            color=ResultColor.LIGHTGREY,
            description="Not comparable",
            state=AfterIs.LUKEWARM,
            before="",
            after="",
        ),
    ),
    ResultMark.FAILURE: ChangeInfo(...),
    ...
    ResultMark.UNCHANGED: ChangeInfo(...),
}

def _get_change_info(
    asv1: ASVBench, asv2: ASVBench, factor, use_stats
) -> ASVChangeInfo:
    for info in CHANGE_INFO.values():
        if info.predicate(asv1, asv2, factor, use_stats):
            return info.info

    # Sanity check - Should never be reached
    raise ValueError("No ResultMark matched")

mark: ResultMark = ResultMark.INCOMPARABLE
color: ResultColor = ResultColor.LIGHTGREY
description: str = "Not comparable"
state: AfterIs = AfterIs.LUKEWARM
before: str = ""
after: str = ""


@dataclass
class Failure(ASVChangeInfo):
mark: ResultMark = ResultMark.FAILURE
color: ResultColor = ResultColor.RED
description: str = "Introduced a failure"
state: AfterIs = AfterIs.WORSE
before: str = "Succeeded"
after: str = "Failed"


@dataclass
class Fixed(ASVChangeInfo):
mark: ResultMark = ResultMark.FIXED
color: ResultColor = ResultColor.GREEN
description: str = "Fixed a failure"
state: AfterIs = AfterIs.BETTER
before: str = "Failed"
after: str = "Succeeded"


@dataclass
class NoChange(ASVChangeInfo):
mark: ResultMark = ResultMark.UNCHANGED
color: ResultColor = ResultColor.DEFAULT
description: str = "Both failed or either was skipped or no significant change"
state: AfterIs = AfterIs.LUKEWARM
before: str = ""
after: str = ""


@dataclass
class Better(ASVChangeInfo):
mark: ResultMark = ResultMark.BETTER
color: ResultColor = ResultColor.GREEN
description: str = "Relative improvement"
state: AfterIs = AfterIs.BETTER
before: str = "Worse"
after: str = "Better"


@dataclass
class Worse(ASVChangeInfo):
mark: ResultMark = ResultMark.WORSE
color: ResultColor = ResultColor.RED
description: str = "Relatively worse"
state: AfterIs = AfterIs.WORSE
before: str = "Better"
after: str = "Worse"
Loading
Loading