Skip to content

Commit

Permalink
support for python 3.8
Browse files Browse the repository at this point in the history
  • Loading branch information
portellaa committed Jan 15, 2024
1 parent 0945d35 commit 0fd00cf
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 46 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ wheel: ### Compiles the wheel
$(PYTHON) -m twine check wheels/*

upload:
$(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py310-none-any.whl
$(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl

publish-docs: ### Publishes the documentation
mike deploy --push --update-aliases $(version) latest
4 changes: 2 additions & 2 deletions docs/examples/synthesize_timeseries_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

**Use YData's *TimeSeriesSynthesizer* to generate time-series synthetic data**

Timeseries is the most common type of data we encounter in data problems.
Tabular data is the most common type of data we encounter in data problems.

When thinking about timeseries data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data.
When thinking about tabular data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data.

Thus, sequential or time-series data refers to any data containing elements ordered into sequences in a structured format.
Dissecting any time-series dataset, we see differences in variables' behavior that need to be understood for an effective generation of synthetic data. Typically any time-series dataset is composed of the following:
Expand Down
18 changes: 12 additions & 6 deletions src/ydata/sdk/common/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ def __init__(self, credentials: Optional[Union[str, Dict]] = None, project: Opti
if set_as_global:
self.__set_global()

def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
project: Project | None = None, files: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
def post(
self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
project: Optional[Project] = None, files: Optional[Dict] = None, raise_for_status: bool = True
) -> Response:
"""POST request to the backend.
Args:
Expand All @@ -83,8 +85,10 @@ def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict]

return response

def get(self, endpoint: str, params: Optional[Dict] = None,
project: Project | None = None, cookies: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
def get(
self, endpoint: str, params: Optional[Dict] = None, project: Optional[Project] = None,
cookies: Optional[Dict] = None, raise_for_status: bool = True
) -> Response:
"""GET request to the backend.
Args:
Expand All @@ -104,7 +108,9 @@ def get(self, endpoint: str, params: Optional[Dict] = None,

return response

def get_static_file(self, endpoint: str, project: Project | None = None, raise_for_status: bool = True) -> Response:
def get_static_file(
self, endpoint: str, project: Optional[Project] = None, raise_for_status: bool = True
) -> Response:
"""Retrieve a static file from the backend.
Args:
Expand Down Expand Up @@ -141,7 +147,7 @@ def _get_default_project(self, token: str):
return data['myWorkspace']

def __build_url(self, endpoint: str, params: Optional[Dict] = None, data: Optional[Dict] = None,
json: Optional[Dict] = None, project: Project | None = None, files: Optional[Dict] = None,
json: Optional[Dict] = None, project: Optional[Project] = None, files: Optional[Dict] = None,
cookies: Optional[Dict] = None) -> Dict:
"""Build a request for the backend.
Expand Down
14 changes: 7 additions & 7 deletions src/ydata/sdk/datasources/_models/datasource.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from typing import Optional

from ydata.sdk.common.types import UID
from ydata.sdk.datasources._models.datatype import DataSourceType
Expand All @@ -8,13 +9,12 @@

@dataclass
class DataSource:

uid: UID | None = None
author: str | None = None
name: str | None = None
datatype: DataSourceType | None = None
metadata: Metadata | None = None
status: Status | None = None
uid: Optional[UID] = None
author: Optional[str] = None
name: Optional[str] = None
datatype: Optional[DataSourceType] = None
metadata: Optional[Metadata] = None
status: Optional[Status] = None

def to_payload(self):
return {}
12 changes: 6 additions & 6 deletions src/ydata/sdk/synthesizers/_models/status.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Generic, TypeVar
from typing import Generic, Optional, TypeVar

from pydantic import BaseModel, Field

Expand All @@ -8,7 +8,7 @@


class GenericStateErrorStatus(BaseModel, Generic[T]):
state: T | None = Field(None)
state: Optional[T] = Field(None)

class Config:
use_enum_values = True
Expand Down Expand Up @@ -50,10 +50,10 @@ class State(StringEnum):
REPORT = "report"
READY = "ready"

state: State | None = Field(None)
prepare: PrepareStatus | None = Field(None)
training: TrainingStatus | None = Field(None)
report: ReportStatus | None = Field(None)
state: Optional[State] = Field(None)
prepare: Optional[PrepareStatus] = Field(None)
training: Optional[TrainingStatus] = Field(None)
report: Optional[ReportStatus] = Field(None)

@staticmethod
def not_initialized() -> "Status":
Expand Down
10 changes: 6 additions & 4 deletions src/ydata/sdk/synthesizers/_models/synthesizer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import Optional

from pydantic import BaseModel, Field

from .status import Status


class Synthesizer(BaseModel):
uid: str | None = None
author: str | None = None
name: str | None = None
status: Status | None = Field(None)
uid: Optional[str] = Field(None)
author: Optional[str] = Field(None)
name: Optional[str] = Field(None)
status: Optional[Status] = Field(None)
29 changes: 16 additions & 13 deletions src/ydata/sdk/synthesizers/multitable.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from __future__ import annotations

from time import sleep
from typing import Dict, List, Optional, Union

from ydata.datascience.common import PrivacyLevel
from ydata.sdk.common.client import Client
Expand Down Expand Up @@ -31,8 +34,8 @@ class MultiTableSynthesizer(BaseSynthesizer):
"""

def __init__(
self, write_connector: Connector | UID, uid: UID | None = None, name: str | None = None,
project: Project | None = None, client: Client | None = None):
self, write_connector: Union[Connector, UID], uid: Optional[UID] = None, name: Optional[str] = None,
project: Optional[Project] = None, client: Optional[Client] = None):

super().__init__(uid, name, project, client)

Expand All @@ -41,15 +44,15 @@ def __init__(

def fit(self, X: DataSource,
privacy_level: PrivacyLevel = PrivacyLevel.HIGH_FIDELITY,
datatype: DataSourceType | str | None = None,
sortbykey: str | list[str] | None = None,
entities: str | list[str] | None = None,
generate_cols: list[str] | None = None,
exclude_cols: list[str] | None = None,
dtypes: dict[str, str | DataType] | None = None,
target: str | None = None,
anonymize: dict | None = None,
condition_on: list[str] | None = None) -> None:
datatype: Optional[Union[DataSourceType, str]] = None,
sortbykey: Optional[Union[str, List[str]]] = None,
entities: Optional[Union[str, List[str]]] = None,
generate_cols: Optional[List[str]] = None,
exclude_cols: Optional[List[str]] = None,
dtypes: Optional[Dict[str, Union[str, DataType]]] = None,
target: Optional[str] = None,
anonymize: Optional[dict] = None,
condition_on: Optional[List[str]] = None) -> None:
"""Fit the synthesizer.
The synthesizer accepts as training dataset a YData [`DataSource`][ydata.sdk.datasources.DataSource].
Expand All @@ -61,7 +64,7 @@ def fit(self, X: DataSource,

self._fit_from_datasource(X)

def sample(self, frac: int | float = 1, write_connector: Connector | UID | None = None) -> None:
def sample(self, frac: Union[int, float] = 1, write_connector: Optional[Union[Connector, UID]] = None) -> None:
"""Sample from a [`MultiTableSynthesizer`][ydata.sdk.synthesizers.MultiTableSynthesizer]
instance.
The sample is saved in the connector that was provided in the synthesizer initialization
Expand Down Expand Up @@ -108,7 +111,7 @@ def _create_payload(self) -> dict:

return payload

def _check_or_fetch_connector(self, write_connector: Connector | UID) -> Connector:
def _check_or_fetch_connector(self, write_connector: Union[Connector, UID]) -> Connector:
self._logger.debug(f'Write connector is {write_connector}')
if isinstance(write_connector, str):
self._logger.debug(f'Write connector is of type `UID` {write_connector}')
Expand Down
16 changes: 9 additions & 7 deletions src/ydata/sdk/synthesizers/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ class BaseSynthesizer(ABC, ModelFactoryMixin):
client (Client): (optional) Client to connect to the backend
"""

def __init__(self, uid: UID | None = None, name: str | None = None, project: Project | None = None, client: Client | None = None):
def __init__(
self, uid: Optional[UID] = None, name: Optional[str] = None,
project: Optional[Project] = None, client: Optional[Client] = None):
self._init_common(client=client)
self._model = mSynthesizer(uid=uid, name=name or str(uuid4()))
self._project = project
Expand Down Expand Up @@ -179,7 +181,7 @@ def _validate_datasource_attributes(X: Union[DataSource, pdDataFrame], dataset_a
@staticmethod
def _metadata_to_payload(
datatype: DataSourceType, ds_metadata: Metadata,
dataset_attrs: Optional[DataSourceAttrs] = None, target: str | None = None
dataset_attrs: Optional[DataSourceAttrs] = None, target: Optional[str] = None
) -> dict:
"""Transform a the metadata and dataset attributes into a valid
payload.
Expand Down Expand Up @@ -218,11 +220,11 @@ def _metadata_to_payload(
def _fit_from_datasource(
self,
X: DataSource,
privacy_level: PrivacyLevel | None = None,
dataset_attrs: DataSourceAttrs | None = None,
target: str | None = None,
anonymize: dict | None = None,
condition_on: list[str] | None = None
privacy_level: Optional[PrivacyLevel] = None,
dataset_attrs: Optional[DataSourceAttrs] = None,
target: Optional[str] = None,
anonymize: Optional[dict] = None,
condition_on: Optional[List[str]] = None
) -> None:
payload = self._create_payload()

Expand Down

0 comments on commit 0fd00cf

Please sign in to comment.