Skip to content

Commit

Permalink
Normalize IndexSet.data DB storage (#122)
Browse files Browse the repository at this point in the history
* Add new DB data types and imports
* Refactor indexset.add_data()
* Introduce new IndexSetData table to normalize data
* Rename .elements to .add for consistency
* Adapt tests to refactoring
* Cast types using numpy for efficiency
* Reduce number of lines needing tests
* Make data loading during tabulate() optional
* Use bulk insert for IndexSetData
* Use a normal property for `.data`
* Remove not-required parameter
* Add all missing and new db migrations
  • Loading branch information
glatterf42 authored Nov 22, 2024
1 parent 70366d4 commit 60d7e29
Show file tree
Hide file tree
Showing 30 changed files with 655 additions and 235 deletions.
28 changes: 15 additions & 13 deletions ixmp4/core/optimization/indexset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ def name(self) -> str:
return self._model.name

@property
def elements(self) -> list[float | int | str]:
return self._model.elements
def data(self) -> list[float | int | str]:
return self._model.data

def add(self, elements: float | int | list[float | int | str] | str) -> None:
"""Adds elements to an existing IndexSet."""
self.backend.optimization.indexsets.add_elements(
indexset_id=self._model.id, elements=elements
def add(self, data: float | int | list[float | int | str] | str) -> None:
"""Adds data to an existing IndexSet."""
self.backend.optimization.indexsets.add_data(
indexset_id=self._model.id, data=data
)
self._model.elements = self.backend.optimization.indexsets.get(
self._model.data = self.backend.optimization.indexsets.get(
run_id=self._model.run__id, name=self._model.name
).elements
).data

@property
def run_id(self) -> int:
Expand All @@ -48,21 +48,21 @@ def created_by(self) -> str | None:
return self._model.created_by

@property
def docs(self):
def docs(self) -> str | None:
try:
return self.backend.optimization.indexsets.docs.get(self.id).description
except DocsModel.NotFound:
return None

@docs.setter
def docs(self, description):
def docs(self, description: str | None) -> None:
if description is None:
self.backend.optimization.indexsets.docs.delete(self.id)
else:
self.backend.optimization.indexsets.docs.set(self.id, description)

@docs.deleter
def docs(self):
def docs(self) -> None:
try:
self.backend.optimization.indexsets.docs.delete(self.id)
# TODO: silently failing
Expand Down Expand Up @@ -105,7 +105,9 @@ def list(self, name: str | None = None) -> list[IndexSet]:
for i in indexsets
]

def tabulate(self, name: str | None = None) -> pd.DataFrame:
def tabulate(
self, name: str | None = None, include_data: bool = False
) -> pd.DataFrame:
return self.backend.optimization.indexsets.tabulate(
run_id=self._run.id, name=name
run_id=self._run.id, name=name, include_data=include_data
)
2 changes: 1 addition & 1 deletion ixmp4/data/abstract/optimization/equation.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def add_data(self, equation_id: int, data: dict[str, Any] | pd.DataFrame) -> Non
The data will be validated with the linked constrained
:class:`ixmp4.data.abstract.optimization.IndexSet`s. For that, `data.keys()`
must correspond to the names of the Equation's columns. Each column can only
contain values that are in the linked `IndexSet.elements`. Each row of entries
contain values that are in the linked `IndexSet.data`. Each row of entries
must be unique. No values can be missing, `None`, or `NaN`. If `data.keys()`
contains names already present in `Equation.data`, existing values will be
overwritten.
Expand Down
25 changes: 15 additions & 10 deletions ixmp4/data/abstract/optimization/indexset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class IndexSet(base.BaseModel, Protocol):
"""The id of the :class:`ixmp4.data.abstract.Run` for which this IndexSet is
defined. """

elements: types.JsonList
"""Unique list of str or int."""
data: types.OptimizationDataList
"""Unique list of str, int, or float."""

created_at: types.DateTime
"Creation date/time. TODO"
Expand Down Expand Up @@ -102,13 +102,18 @@ def list(self, *, name: str | None = None, **kwargs) -> list[IndexSet]:
"""
...

def tabulate(self, *, name: str | None = None, **kwargs) -> pd.DataFrame:
def tabulate(
self, *, name: str | None = None, include_data: bool = False, **kwargs
) -> pd.DataFrame:
r"""Tabulate IndexSets by specified criteria.
Parameters
----------
name : str
name : str, optional
The name of an IndexSet. If supplied only one result will be returned.
include_data : bool, optional
Whether to load all IndexSet data, which reduces loading speed. Defaults to
`False`.
# TODO: Update kwargs
\*\*kwargs: any
More filter parameters as specified in
Expand All @@ -120,24 +125,24 @@ def tabulate(self, *, name: str | None = None, **kwargs) -> pd.DataFrame:
A data frame with the columns:
- id
- name
- elements
- data
- run__id
- created_at
- created_by
"""
...

def add_elements(
self, indexset_id: int, elements: float | int | List[float | int | str] | str
def add_data(
self, indexset_id: int, data: float | int | List[float | int | str] | str
) -> None:
"""Adds elements to an existing IndexSet.
"""Adds data to an existing IndexSet.
Parameters
----------
indexset_id : int
The id of the target IndexSet.
elements : float | int | List[float | int | str] | str
The elements to be added to the IndexSet.
data : float | int | List[float | int | str] | str
The data to be added to the IndexSet.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion ixmp4/data/abstract/optimization/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def add_data(self, parameter_id: int, data: dict[str, Any] | pd.DataFrame) -> No
The data will be validated with the linked constrained
:class:`ixmp4.data.abstract.optimization.IndexSet`s. For that, `data.keys()`
must correspond to the names of the Parameter's columns. Each column can only
contain values that are in the linked `IndexSet.elements`. Each row of entries
contain values that are in the linked `IndexSet.data`. Each row of entries
must be unique. No values can be missing, `None`, or `NaN`. If `data.keys()`
contains names already present in `Parameter.data`, existing values will be
overwritten.
Expand Down
2 changes: 1 addition & 1 deletion ixmp4/data/abstract/optimization/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def add_data(self, table_id: int, data: dict[str, Any] | pd.DataFrame) -> None:
The data will be validated with the linked constrained
:class:`ixmp4.data.abstract.optimization.IndexSet`s. For that, `data.keys()`
must correspond to the names of the Table's columns. Each column can only
contain values that are in the linked `IndexSet.elements`. Each row of entries
contain values that are in the linked `IndexSet.data`. Each row of entries
must be unique. No values can be missing, `None`, or `NaN`. If `data.keys()`
contains names already present in `Table.data`, existing values will be
overwritten.
Expand Down
2 changes: 1 addition & 1 deletion ixmp4/data/abstract/optimization/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def add_data(self, variable_id: int, data: dict[str, Any] | pd.DataFrame) -> Non
The data will be validated with the linked constrained
:class:`ixmp4.data.abstract.optimization.IndexSet`s. For that, `data.keys()`
must correspond to the names of the Variable's columns. Each column can only
contain values that are in the linked `IndexSet.elements`. Each row of entries
contain values that are in the linked `IndexSet.data`. Each row of entries
must be unique. No values can be missing, `None`, or `NaN`. If `data.keys()`
contains names already present in `Variable.data`, existing values will be
overwritten.
Expand Down
22 changes: 6 additions & 16 deletions ixmp4/data/api/optimization/indexset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import ClassVar, List

import pandas as pd
from pydantic import StrictFloat, StrictInt, StrictStr

from ixmp4.data import abstract

Expand All @@ -17,13 +16,7 @@ class IndexSet(base.BaseModel):

id: int
name: str
elements: (
StrictFloat
| StrictInt
| StrictStr
| list[StrictFloat | StrictInt | StrictStr]
| None
)
data: float | int | str | list[int | float | str] | None
run__id: int

created_at: datetime | None
Expand Down Expand Up @@ -64,16 +57,13 @@ def enumerate(self, **kwargs) -> list[IndexSet] | pd.DataFrame:
def list(self, **kwargs) -> list[IndexSet]:
return super()._list(json=kwargs)

def tabulate(self, **kwargs) -> pd.DataFrame:
return super()._tabulate(json=kwargs)
def tabulate(self, include_data: bool = False, **kwargs) -> pd.DataFrame:
return super()._tabulate(json=kwargs, params={"include_data": include_data})

def add_elements(
def add_data(
self,
indexset_id: int,
elements: StrictFloat
| StrictInt
| List[StrictFloat | StrictInt | StrictStr]
| StrictStr,
data: float | int | str | List[float | int | str],
) -> None:
kwargs = {"indexset_id": indexset_id, "elements": elements}
kwargs = {"indexset_id": indexset_id, "data": data}
self._request("PATCH", self.prefix + str(indexset_id) + "/", json=kwargs)
2 changes: 1 addition & 1 deletion ixmp4/data/db/optimization/equation/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _add_column(
self.columns.create(
name=column_name,
constrained_to_indexset=indexset.id,
dtype=pd.Series(indexset.elements).dtype.name,
dtype=pd.Series(indexset.data).dtype.name,
equation_id=equation_id,
unique=True,
**kwargs,
Expand Down
40 changes: 27 additions & 13 deletions ixmp4/data/db/optimization/indexset/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import ClassVar

from sqlalchemy.orm import validates
import numpy as np

from ixmp4 import db
from ixmp4.core.exceptions import OptimizationDataValidationError
Expand All @@ -16,20 +16,34 @@ class IndexSet(base.BaseModel):
DataInvalid: ClassVar = OptimizationDataValidationError
DeletionPrevented: ClassVar = abstract.IndexSet.DeletionPrevented

elements: types.JsonList = db.Column(db.JsonType, nullable=False, default=[])
_data_type: types.OptimizationDataType

@validates("elements")
def validate_elements(self, key, value: list[float | int | str]):
unique = set()
for element in value:
if element in unique:
raise self.DataInvalid(
f"{element} already defined for IndexSet {self.name}!"
)
else:
unique.add(element)
return value
_data: types.Mapped[list["IndexSetData"]] = db.relationship(
back_populates="indexset"
)

@property
def data(self) -> list[float | int | str]:
return (
[]
if self._data_type is None
else np.array([d.value for d in self._data], dtype=self._data_type).tolist()
)

@data.setter
def data(self, value: list[float | int | str]) -> None:
return None

run__id: types.RunId

__table_args__ = (db.UniqueConstraint("name", "run__id"),)


class IndexSetData(base.RootBaseModel):
table_prefix = "optimization_"

indexset: types.Mapped["IndexSet"] = db.relationship(back_populates="_data")
indexset__id: types.IndexSetId
value: types.String = db.Column(db.String, nullable=False)

__table_args__ = (db.UniqueConstraint("indexset__id", "value"),)
45 changes: 34 additions & 11 deletions ixmp4/data/db/optimization/indexset/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .. import base
from .docs import IndexSetDocsRepository
from .model import IndexSet
from .model import IndexSet, IndexSetData


class IndexSetRepository(
Expand Down Expand Up @@ -60,22 +60,45 @@ def list(self, *args, **kwargs) -> list[IndexSet]:
return super().list(*args, **kwargs)

@guard("view")
def tabulate(self, *args, **kwargs) -> pd.DataFrame:
return super().tabulate(*args, **kwargs)
def tabulate(self, *args, include_data: bool = False, **kwargs) -> pd.DataFrame:
if not include_data:
return (
super()
.tabulate(*args, **kwargs)
.rename(columns={"_data_type": "data_type"})
)
else:
result = super().tabulate(*args, **kwargs).drop(labels="_data_type", axis=1)
result.insert(
loc=0,
column="data",
value=[indexset.data for indexset in self.list(**kwargs)],
)
return result

@guard("edit")
def add_elements(
def add_data(
self,
indexset_id: int,
elements: float | int | List[float | int | str] | str,
data: float | int | List[float | int | str] | str,
) -> None:
indexset = self.get_by_id(id=indexset_id)
if not isinstance(elements, list):
elements = [elements]
if indexset.elements is None:
indexset.elements = elements
else:
indexset.elements = indexset.elements + elements
if not isinstance(data, list):
data = [data]

bulk_insert_enabled_data: list[dict[str, str]] = [
{"value": str(d)} for d in data
]
try:
self.session.execute(
db.insert(IndexSetData).values(indexset__id=indexset_id),
bulk_insert_enabled_data,
)
except db.IntegrityError as e:
self.session.rollback()
raise indexset.DataInvalid from e

indexset._data_type = type(data[0]).__name__

self.session.add(indexset)
self.session.commit()
2 changes: 1 addition & 1 deletion ixmp4/data/db/optimization/parameter/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _add_column(
self.columns.create(
name=column_name,
constrained_to_indexset=indexset.id,
dtype=pd.Series(indexset.elements).dtype.name,
dtype=pd.Series(indexset.data).dtype.name,
parameter_id=parameter_id,
unique=True,
**kwargs,
Expand Down
2 changes: 1 addition & 1 deletion ixmp4/data/db/optimization/table/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _add_column(
self.columns.create(
name=column_name,
constrained_to_indexset=indexset.id,
dtype=pd.Series(indexset.elements).dtype.name,
dtype=pd.Series(indexset.data).dtype.name,
table_id=table_id,
unique=True,
**kwargs,
Expand Down
4 changes: 2 additions & 2 deletions ixmp4/data/db/optimization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ def collect_indexsets_to_check(
columns: list["Column"],
) -> dict[str, Any]:
"""Creates a {key:value} dict from linked Column.names and their
IndexSet.elements."""
IndexSet.data."""
collection: dict[str, Any] = {}
for column in columns:
collection[column.name] = column.indexset.elements
collection[column.name] = column.indexset.data
return collection


Expand Down
2 changes: 1 addition & 1 deletion ixmp4/data/db/optimization/variable/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _add_column(
self.columns.create(
name=column_name,
constrained_to_indexset=indexset.id,
dtype=pd.Series(indexset.elements).dtype.name,
dtype=pd.Series(indexset.data).dtype.name,
variable_id=variable_id,
unique=True,
**kwargs,
Expand Down
6 changes: 4 additions & 2 deletions ixmp4/data/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import Any
from typing import Any, Literal

from sqlalchemy.orm import Mapped as Mapped

Expand All @@ -8,9 +8,11 @@
Boolean = Mapped[bool]
DateTime = Mapped[datetime]
Float = Mapped[float]
IndexSetId = Mapped[db.IndexSetIdType]
Integer = Mapped[int]
JsonList = Mapped[list[float | int | str]]
OptimizationDataList = Mapped[list[float | int | str]]
JsonDict = Mapped[dict[str, Any]]
OptimizationDataType = Mapped[Literal["float", "int", "str"] | None]
String = Mapped[str]
Name = Mapped[db.NameType]
UniqueName = Mapped[db.UniqueNameType]
Expand Down
Loading

0 comments on commit 60d7e29

Please sign in to comment.