Skip to content
6 changes: 5 additions & 1 deletion .github/actions/setup-ubuntu/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ inputs:
required: false
default: 'llvm llvm-runtime'

python-version:
required: false
default: '3.11'

runs:
using: composite
steps:
Expand All @@ -17,7 +21,7 @@ runs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
python-version: ${{ inputs.python-version }}
- name: Install additional packages
if: ${{ inputs.packages != ''}}
shell: bash
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Python test

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
python-test:
runs-on: ubuntu-latest
name: Test Python (${{ matrix.python-version}})
strategy:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
- "3.13"
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup-ubuntu
with:
python-version: ${{ matrix.python-version}}
- name: Run tests
run: |
python3 -W error test.py
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
<image src="./docs/slothy_logo.png" width=160>
</p>

![Python 3.9](https://img.shields.io/badge/Python-3.9-blue?logo=python)
![Python 3.10](https://img.shields.io/badge/Python-3.10-blue?logo=python)
![Python 3.11](https://img.shields.io/badge/Python-3.11-blue?logo=python)
![Python 3.12](https://img.shields.io/badge/Python-3.12-blue?logo=python)
![Python 3.13](https://img.shields.io/badge/Python-3.13-blue?logo=python)

## About SLOTHY

**SLOTHY** - **S**uper (**L**azy) **O**ptimization of **T**ricky **H**andwritten assembl**Y** - is an assembly-level superoptimizer
Expand Down Expand Up @@ -73,7 +79,7 @@ SLOTHY has been successfully used on
- Ubuntu-21.10 and up (64-bit),
- macOS Monterey 12.6 and up.

SLOTHY requires Python 3.11 (consider using pyenv to pin your Python version locally).
SLOTHY supports Python 3.9 up to 3.13. For development Python >= 3.10 is required.
See [requirements.txt](requirements.txt) for package requirements, and install via `pip install -r requirements.txt`.

**Note:** `requirements.txt` pins versions for reproducibility. If you already have newer versions of some dependencies
Expand Down
29 changes: 16 additions & 13 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
numpy==1.26.4
ortools==9.7.2996
pandas==2.2.3
ortools==9.7.2996 ; python_version < "3.13"
ortools==9.12.4544 ; python_version >= "3.13"
# TODO: remove pandas once upgraded to ortools 9.12
pandas>=2.0.3
sympy==1.13.3
unicorn==2.1.3
sphinx==8.1.3
sphinx_rtd_theme==3.0.2
sphinx_autoapi
sphinx_mdinclude
myst_parser
sphinx-autobuild
sphinx-autodoc2
flake8==7.2.0
pydoclint==0.6.6
black
black

# Optional dependencies for linting, documentation (only for Python >= 3.10)
pydoclint==0.6.6 ; python_version >= "3.10"
flake8==7.2.0 ; python_version >= "3.10"
sphinx==8.1.3 ; python_version >= "3.10"
sphinx_rtd_theme==3.0.2 ; python_version >= "3.10"
sphinx_autoapi ; python_version >= "3.10"
sphinx_mdinclude ; python_version >= "3.10"
myst_parser ; python_version >= "3.10"
sphinx-autobuild ; python_version >= "3.10"
sphinx-autodoc2 ; python_version >= "3.10"
57 changes: 37 additions & 20 deletions slothy/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,9 +1776,8 @@ def _mark_loop_siblings(self):
if not self.config.sw_pipelining.enabled:
return

for tlow, thigh in zip(
self._model.tree.nodes_low, self._model.tree.nodes_high, strict=True
):
assert len(self._model.tree.nodes_low) == len(self._model.tree.nodes_high)
for tlow, thigh in zip(self._model.tree.nodes_low, self._model.tree.nodes_high):
tlow.sibling = thigh
thigh.sibling = tlow

Expand Down Expand Up @@ -2346,16 +2345,20 @@ def remember_spill(i, spilled, restore, arg, txt):
spills[p].append((arg, spill_id))
restores[restore].append((arg, spill_id))

assert len(t.out_spills) == len(t.out_lifetime_start)
assert len(t.out_spills) == len(t.inst.args_out)
for i, (spilled, restore, arg) in enumerate(
zip(t.out_spills, t.out_lifetime_start, t.inst.args_out, strict=True)
zip(t.out_spills, t.out_lifetime_start, t.inst.args_out)
):
remember_spill(i, spilled, restore, arg, "out")

assert len(t.in_out_spills) == len(t.inout_lifetime_start)
assert len(t.in_out_spills) == len(t.inst.args_in_out)
for i, (spilled, restore, arg) in enumerate(
zip(
t.in_out_spills,
t.inout_lifetime_start,
t.inst.args_in_out,
strict=True,
)
):
remember_spill(i, spilled, restore, arg, "inout")
Expand Down Expand Up @@ -2521,11 +2524,13 @@ def _add_path_constraint_from(self, consumer, producer, cb_lst):
or producer.is_virtual
or consumer.is_virtual
):
for cb, bvar in zip(cb_lst, bvars, strict=True):
assert len(cb_lst) == len(bvars)
for cb, bvar in zip(cb_lst, bvars):
cb().OnlyEnforceIf(bvar)
return

for cb, bvar in zip(cb_lst, bvars, strict=True):
assert len(cb_lst) == len(vars)
for cb, bvar in zip(cb_lst, bvars):
constraints = [bvar]
if self._is_low(producer):
constraints.append(producer.pre_var.Not())
Expand Down Expand Up @@ -2729,11 +2734,12 @@ def _allow_renaming(_):
self.logger.debug("Create register renaming variables for %s", t)

# Iterate through output registers of current instruction
assert len(t.inst.arg_types_out) == len(t.inst.args_out)
assert len(t.inst.arg_types_out) == len(t.inst.args_out_restrictions)
for arg_ty, arg_out, restrictions in zip(
t.inst.arg_types_out,
t.inst.args_out,
t.inst.args_out_restrictions,
strict=True,
):

self.logger.debug("- Output %s (%s)", arg_out, arg_ty)
Expand Down Expand Up @@ -2867,6 +2873,12 @@ def add_arg_combination_vars(combinations, vs, name, t=t):
self._NewBoolVar("") for _ in t.inst.arg_types_in_out
]
ivals = []

assert len(t.inst.arg_types_out) == len(t.alloc_out_var)
assert len(t.inst.arg_types_out) == len(t.out_lifetime_start)
assert len(t.inst.arg_types_out) == len(t.out_lifetime_duration)
assert len(t.inst.arg_types_out) == len(t.out_lifetime_end)
assert len(t.inst.arg_types_out) == len(t.out_spill_vars)
ivals += list(
zip(
t.inst.arg_types_out,
Expand All @@ -2875,9 +2887,13 @@ def add_arg_combination_vars(combinations, vs, name, t=t):
t.out_lifetime_duration,
t.out_lifetime_end,
t.out_spill_vars,
strict=True,
)
)
assert len(t.inst.arg_types_in_out) == len(t.alloc_in_out_var)
assert len(t.inst.arg_types_in_out) == len(t.inout_lifetime_start)
assert len(t.inst.arg_types_in_out) == len(t.inout_lifetime_duration)
assert len(t.inst.arg_types_in_out) == len(t.inout_lifetime_end)
assert len(t.inst.arg_types_in_out) == len(t.in_out_spill_vars)
ivals += list(
zip(
t.inst.arg_types_in_out,
Expand All @@ -2886,7 +2902,6 @@ def add_arg_combination_vars(combinations, vs, name, t=t):
t.inout_lifetime_duration,
t.inout_lifetime_end,
t.in_out_spill_vars,
strict=True,
)
)

Expand Down Expand Up @@ -3006,7 +3021,8 @@ def _has_cross_iteration_dependencies(self):
def _add_constraints_lifetime_bounds_single(self, t):

def _add_basic_constraints(start_list, end_list):
for start_var, end_var in zip(start_list, end_list, strict=True):
assert len(start_list) == len(end_list)
for start_var, end_var in zip(start_list, end_list):
# Make sure the output argument is considered 'used' for at least
# one instruction. Otherwise, instructions producing outputs that
# are never used would be able to overwrite life registers.
Expand Down Expand Up @@ -3053,12 +3069,13 @@ def _add_constraints_lifetime_bounds(self):
def _force_allocation_variant(self, alloc_dict, combinations, combination_vars):
if combinations is None:
return
for (idx_lst, valid_combinations), vs in zip(
combinations, combination_vars, strict=True
):
assert len(combinations) == len(combination_vars)
for (idx_lst, valid_combinations), vs in zip(combinations, combination_vars):
self._AddExactlyOne(vs)
for combination, var in zip(valid_combinations, vs, strict=True):
for idx, reg in zip(idx_lst, combination, strict=True):
assert len(valid_combinations) == len(vs)
for combination, var in zip(valid_combinations, vs):
assert len(idx_lst) == len(combination)
for idx, reg in zip(idx_lst, combination):
self._AddImplication(var, alloc_dict[idx].get(reg, False))

def _forbid_renaming_collision_single(self, var_dic_a, var_dic_b, condition=None):
Expand Down Expand Up @@ -3091,7 +3108,8 @@ def _force_allocation_restriction_single(self, valid_allocs, var_dict):
self._Add(v == False) # noqa: E712

def _force_allocation_restriction_many(self, restriction_lst, var_dict_lst):
for r, v in zip(restriction_lst, var_dict_lst, strict=True):
assert len(restriction_lst) == len(var_dict_lst)
for r, v in zip(restriction_lst, var_dict_lst):
if r is None:
continue
self._force_allocation_restriction_single(r, v)
Expand Down Expand Up @@ -3680,9 +3698,8 @@ def _add_constraints_loop_periodic(self):
# Additionally, they should use exactly the same registers, so we can roll the
# loop again

for t0, t1 in zip(
self._model.tree.nodes_low, self._model.tree.nodes_high, strict=True
):
assert len(self._model.tree.nodes_low) == len(self._model.tree.nodes_high)
for t0, t1 in zip(self._model.tree.nodes_low, self._model.tree.nodes_high):
self._Add(t0.pre_var == t1.pre_var)
self._Add(t0.post_var == t1.post_var)
self._Add(t0.core_var == t1.core_var)
Expand Down
4 changes: 3 additions & 1 deletion slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1534,7 +1534,9 @@ def failure_dump():
# which must hold valid memory addresses.
initial_register_contents = {}
for r in regs:
initial_register_contents[r] = int.from_bytes(os.urandom(16))
initial_register_contents[r] = int.from_bytes(
os.urandom(16), byteorder="little"
)
for reg, sz in address_registers.items():
# allocate 2*sz and place pointer in the middle
# this makes sure that memory can be accessed at negative offsets
Expand Down
33 changes: 22 additions & 11 deletions slothy/targets/aarch64/aarch64_neon.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,12 @@ def __repr__(self):
return self.name

@cache
@staticmethod
def spillable(reg_type):
def _spillable(reg_type):
return reg_type in [RegisterType.GPR] # For now, only GPRs

# TODO: remove workaround (needed for Python 3.9)
spillable = staticmethod(_spillable)

@staticmethod
def callee_saved_registers():
return [f"x{i}" for i in range(18, 31)] + [f"v{i}" for i in range(8, 16)]
Expand All @@ -169,8 +171,7 @@ def unicorn_program_counter():
return UC_ARM64_REG_PC

@cache
@staticmethod
def unicorn_reg_by_name(reg):
def _unicorn_reg_by_name(reg):
"""Converts string name of register into numerical identifiers used
within the unicorn engine"""

Expand Down Expand Up @@ -241,9 +242,11 @@ def unicorn_reg_by_name(reg):
}
return d.get(reg, None)

# TODO: remove workaround (needed for Python 3.9)
unicorn_reg_by_name = staticmethod(_unicorn_reg_by_name)

@cache
@staticmethod
def list_registers(
def _list_registers(
reg_type, only_extra=False, only_normal=False, with_variants=False
):
"""Return the list of all registers of a given type"""
Expand Down Expand Up @@ -288,6 +291,9 @@ def list_registers(
RegisterType.FLAGS: flags,
}[reg_type]

# TODO: remove workaround (needed for Python 3.9)
list_registers = staticmethod(_list_registers)

@staticmethod
def find_type(r):
"""Find type of architectural register"""
Expand Down Expand Up @@ -914,8 +920,7 @@ def get_parser(pattern):
return parser

@cache
@staticmethod
def _infer_register_type(ptrn):
def __infer_register_type(ptrn):
if ptrn[0].upper() in ["X", "W"]:
return RegisterType.GPR
if ptrn[0].upper() in ["V", "Q", "D", "B"]:
Expand All @@ -924,6 +929,9 @@ def _infer_register_type(ptrn):
return RegisterType.HINT
raise FatalParsingException(f"Unknown pattern: {ptrn}")

# TODO: remove workaround (needed for Python 3.9)
_infer_register_type = staticmethod(__infer_register_type)

def __init__(
self,
pattern,
Expand Down Expand Up @@ -967,9 +975,12 @@ def __init__(
self.in_outs = in_outs

self.pattern = pattern
self.pattern_inputs = list(zip(inputs, arg_types_in, strict=True))
self.pattern_outputs = list(zip(outputs, arg_types_out, strict=True))
self.pattern_in_outs = list(zip(in_outs, arg_types_in_out, strict=True))
assert len(inputs) == len(arg_types_in)
self.pattern_inputs = list(zip(inputs, arg_types_in))
assert len(outputs) == len(arg_types_out)
self.pattern_outputs = list(zip(outputs, arg_types_out))
assert len(in_outs) == len(arg_types_in_out)
self.pattern_in_outs = list(zip(in_outs, arg_types_in_out))

@staticmethod
def _to_reg(ty, s):
Expand Down
Loading