Skip to content

Commit

Permalink
Add conversion from TACO format
Browse files Browse the repository at this point in the history
  • Loading branch information
eriknw committed Sep 6, 2022
1 parent 3e926c0 commit 76cc4f7
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 23 deletions.
4 changes: 2 additions & 2 deletions spz_python/notebooks/Example_Rank4.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,7 @@
"```\n",
"structure = [sparse, sparse, doubly_compressed, sparse]\n",
"\n",
"taco_structure = [compressed-nonunique, singleton, singleton, compressed]\n",
"taco_structure = [compressed, singleton, singleton, compressed]\n",
"```"
],
"text/plain": [
Expand Down Expand Up @@ -19640,7 +19640,6 @@
"- [compressed, dense, singleton, singleton]\n",
"- [compressed, singleton, compressed, singleton]\n",
"- [compressed, singleton, dense, singleton]\n",
"- [compressed, singleton, singleton, compressed]\n",
"- [compressed, singleton, singleton, singleton]\n",
"- [compressed-nonunique, compressed, compressed, singleton]\n",
"- [compressed-nonunique, compressed, dense, singleton]\n",
Expand All @@ -19653,6 +19652,7 @@
"- [compressed-nonunique, dense, singleton, singleton]\n",
"- [compressed-nonunique, singleton, compressed, singleton]\n",
"- [compressed-nonunique, singleton, dense, singleton]\n",
"- [compressed-nonunique, singleton, singleton, compressed]\n",
"- [dense, compressed, compressed, singleton]\n",
"- [dense, compressed, dense, singleton]\n",
"- [dense, compressed, singleton, singleton]\n",
Expand Down
32 changes: 12 additions & 20 deletions spz_python/spz/_core.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from itertools import zip_longest

import numpy as np
import pandas as pd

from .sparsetype import DC, C, S, abbreviate, unabbreviate
from .sparsetype import DC, C, S, abbreviate
from .sparsetype import from_taco as _from_taco
from .sparsetype import to_taco as _to_taco
from .sparsetype import unabbreviate


def repeatrange(repeat, *args):
Expand All @@ -16,6 +17,12 @@ def issorted(array):


class SPZ:
@classmethod
def from_taco(cls, arrays, shape=None, structure=None):
if structure is not None:
structure = _from_taco(structure)
return cls(arrays, shape=shape, structure=structure)

def __init__(self, arrays, shape=None, structure=None):
if not isinstance(arrays, (list, tuple)):
raise TypeError("arrays argument must be a list or tuple of numpy arrays")
Expand Down Expand Up @@ -220,7 +227,7 @@ def _validate(self):
assert len(ptr) == len(set(ptr))
else: # pragma: no cover
raise AssertionError()
self.taco_structure
assert _from_taco(self.taco_structure) == structure

def as_structure(self, structure):
return SPZ(self.arrays, self.shape, structure)
Expand Down Expand Up @@ -271,22 +278,7 @@ def arrays(self):

@property
def taco_structure(self):
# I'm not 100% certain of the use of "singleton" and "compressed-nonunique"
rv = []
L = [DC] + self._structure
for prev, cur, nxt in zip_longest(L[:-1], L[1:], L[2:]):
if cur == C:
rv.append("dense")
elif prev == S and cur in {S, DC}:
rv.append("singleton")
elif prev in {C, DC} and cur == S and nxt in {S, C}:
# I'm not certain about `nxt == C` case here
rv.append("compressed-nonunique")
elif prev in {C, DC}:
rv.append("compressed")
else:
raise NotImplementedError()
return rv
return _to_taco(self._structure)

def _repr_svg_(self):
try:
Expand Down
72 changes: 72 additions & 0 deletions spz_python/spz/sparsetype.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from itertools import zip_longest


class StructureType:
def __repr__(self):
return self.name
Expand Down Expand Up @@ -74,3 +77,72 @@ def unabbreviate(abbr):
rv.append(DC)
rv.pop() # One extra DC
return rv


def to_taco(structure):
if isinstance(structure, str):
structure = unabbreviate(structure)
else:
structure = unabbreviate(abbreviate(*structure))
rv = []
L = [DC] + structure
lookahead = S # backwards-fill S values
for prev, cur, nxt in reversed(list(zip_longest(L[:-1], L[1:], L[2:]))):
# Uh, these rules totally make sense. Right? Right?!
if cur != S:
lookahead = cur
if cur == C:
rv.append("dense")
elif prev == S and cur in {S, DC}:
rv.append("singleton")
elif prev in {C, DC} and cur == S and lookahead in {S, C} and nxt is not None:
rv.append("compressed-nonunique")
elif prev in {C, DC}:
rv.append("compressed")
else:
# We should be able to always go to TACO
raise NotImplementedError(f"Unable to convert to TACO structure: {structure}")
rv.reverse()
return rv


def from_taco(structure):
compressed = "compressed"
dense = "dense"
nonunique = "compressed-nonunique"
singleton = "singleton"
rv = []
prev_nonS = structure[0]
# fmt: off
for prev, cur, nxt in zip_longest([None] + list(structure[:-1]), structure, structure[1:]):
# These rules were developed via trial and error. Fingers crossed!
# Let's try come up with a clearer way to convert from taco.
if cur == dense and nxt in {dense, nonunique, compressed}:
rv.append(C)
elif (
prev in {None, dense, singleton, compressed} and cur == compressed
and nxt in {dense, nonunique, compressed}
or cur == singleton and (
prev == compressed and nxt in {dense, nonunique, compressed}
or prev == singleton and nxt == compressed
or prev == singleton and nxt in {dense, nonunique} and prev_nonS == compressed
) and not (prev == singleton and nxt == compressed and prev_nonS == nonunique)
):
rv.append(DC)
elif (
cur == nonunique and nxt in {dense, singleton}
or cur == compressed and nxt in {None, singleton}
or cur == singleton and (
prev == nonunique and nxt in {None, dense, singleton}
or prev == compressed and nxt == singleton
or prev == singleton and nxt in {None, singleton}
or prev == singleton and nxt == dense and prev_nonS == nonunique
) and not (nxt is None and prev_nonS == compressed)
):
rv.append(S)
else:
raise ValueError(f"Unable to convert from TACO structure: {structure}")
if cur != "singleton":
prev_nonS = cur
# fmt: on
return rv
32 changes: 31 additions & 1 deletion spz_python/spz/tests/test_sparsetype.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from spz.sparsetype import DC, C, S, abbreviate, unabbreviate
import itertools

import pytest

from spz.sparsetype import DC, C, S, abbreviate, from_taco, to_taco, unabbreviate


def test_abbreviate():
Expand All @@ -14,3 +18,29 @@ def test_unabbreviate():
assert unabbreviate("S-C-S-DC-C-S") == expected
assert unabbreviate("SCSDCCS") == expected
assert unabbreviate("SC-S-D-C-S") == expected


@pytest.mark.parametrize("N", range(1, 9))
def test_from_taco(N):
compressed = "compressed"
dense = "dense"
nonunique = "compressed-nonunique"
singleton = "singleton"
options = [compressed, dense, nonunique, singleton]
results = {}
for taco in itertools.product(*([options] * N)):
try:
structure = tuple(from_taco(taco))
except ValueError:
continue
if structure in results: # pragma: no cover
print(structure)
print(" ", results[structure])
print(" ", taco)
raise AssertionError(
"Multiple TACO structures give the same structure: "
f"{taco} and {results[structure]} -> {structure}"
)
results[structure] = taco
assert tuple(to_taco(structure)) == taco
assert len(results) == 3 ** (N - 1)

0 comments on commit 76cc4f7

Please sign in to comment.