diff --git a/Makefile b/Makefile index 07ce061f..e847e663 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ wheel: ### Compiles the wheel $(PYTHON) -m twine check wheels/* upload: - $(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py310-none-any.whl + $(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl publish-docs: ### Publishes the documentation mike deploy --push --update-aliases $(version) latest diff --git a/docs/examples/synthesize_timeseries_data.md b/docs/examples/synthesize_timeseries_data.md index 5bfd3234..a224a530 100644 --- a/docs/examples/synthesize_timeseries_data.md +++ b/docs/examples/synthesize_timeseries_data.md @@ -2,9 +2,9 @@ **Use YData's *TimeSeriesSynthesizer* to generate time-series synthetic data** -Timeseries is the most common type of data we encounter in data problems. +Tabular data is the most common type of data we encounter in data problems. -When thinking about timeseries data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data. +When thinking about tabular data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data. Thus, sequential or time-series data refers to any data containing elements ordered into sequences in a structured format. Dissecting any time-series dataset, we see differences in variables' behavior that need to be understood for an effective generation of synthetic data. Typically any time-series dataset is composed of the following: diff --git a/src/ydata/sdk/common/client/client.py b/src/ydata/sdk/common/client/client.py index a3a3a258..3040c0c3 100644 --- a/src/ydata/sdk/common/client/client.py +++ b/src/ydata/sdk/common/client/client.py @@ -60,8 +60,10 @@ def __init__(self, credentials: Optional[Union[str, Dict]] = None, project: Opti if set_as_global: self.__set_global() - def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None, - project: Project | None = None, files: Optional[Dict] = None, raise_for_status: bool = True) -> Response: + def post( + self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None, + project: Optional[Project] = None, files: Optional[Dict] = None, raise_for_status: bool = True + ) -> Response: """POST request to the backend. Args: @@ -83,8 +85,10 @@ def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] return response - def get(self, endpoint: str, params: Optional[Dict] = None, - project: Project | None = None, cookies: Optional[Dict] = None, raise_for_status: bool = True) -> Response: + def get( + self, endpoint: str, params: Optional[Dict] = None, project: Optional[Project] = None, + cookies: Optional[Dict] = None, raise_for_status: bool = True + ) -> Response: """GET request to the backend. Args: @@ -104,7 +108,9 @@ def get(self, endpoint: str, params: Optional[Dict] = None, return response - def get_static_file(self, endpoint: str, project: Project | None = None, raise_for_status: bool = True) -> Response: + def get_static_file( + self, endpoint: str, project: Optional[Project] = None, raise_for_status: bool = True + ) -> Response: """Retrieve a static file from the backend. Args: @@ -141,7 +147,7 @@ def _get_default_project(self, token: str): return data['myWorkspace'] def __build_url(self, endpoint: str, params: Optional[Dict] = None, data: Optional[Dict] = None, - json: Optional[Dict] = None, project: Project | None = None, files: Optional[Dict] = None, + json: Optional[Dict] = None, project: Optional[Project] = None, files: Optional[Dict] = None, cookies: Optional[Dict] = None) -> Dict: """Build a request for the backend. diff --git a/src/ydata/sdk/datasources/_models/datasource.py b/src/ydata/sdk/datasources/_models/datasource.py index cd33c162..0b3fffca 100644 --- a/src/ydata/sdk/datasources/_models/datasource.py +++ b/src/ydata/sdk/datasources/_models/datasource.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Optional from ydata.sdk.common.types import UID from ydata.sdk.datasources._models.datatype import DataSourceType @@ -8,13 +9,12 @@ @dataclass class DataSource: - - uid: UID | None = None - author: str | None = None - name: str | None = None - datatype: DataSourceType | None = None - metadata: Metadata | None = None - status: Status | None = None + uid: Optional[UID] = None + author: Optional[str] = None + name: Optional[str] = None + datatype: Optional[DataSourceType] = None + metadata: Optional[Metadata] = None + status: Optional[Status] = None def to_payload(self): return {} diff --git a/src/ydata/sdk/synthesizers/_models/status.py b/src/ydata/sdk/synthesizers/_models/status.py index 3655fcc6..dd892bd5 100644 --- a/src/ydata/sdk/synthesizers/_models/status.py +++ b/src/ydata/sdk/synthesizers/_models/status.py @@ -1,4 +1,4 @@ -from typing import Generic, TypeVar +from typing import Generic, Optional, TypeVar from pydantic import BaseModel, Field @@ -8,7 +8,7 @@ class GenericStateErrorStatus(BaseModel, Generic[T]): - state: T | None = Field(None) + state: Optional[T] = Field(None) class Config: use_enum_values = True @@ -50,10 +50,10 @@ class State(StringEnum): REPORT = "report" READY = "ready" - state: State | None = Field(None) - prepare: PrepareStatus | None = Field(None) - training: TrainingStatus | None = Field(None) - report: ReportStatus | None = Field(None) + state: Optional[State] = Field(None) + prepare: Optional[PrepareStatus] = Field(None) + training: Optional[TrainingStatus] = Field(None) + report: Optional[ReportStatus] = Field(None) @staticmethod def not_initialized() -> "Status": diff --git a/src/ydata/sdk/synthesizers/_models/synthesizer.py b/src/ydata/sdk/synthesizers/_models/synthesizer.py index 79242ece..7928c9a2 100644 --- a/src/ydata/sdk/synthesizers/_models/synthesizer.py +++ b/src/ydata/sdk/synthesizers/_models/synthesizer.py @@ -1,10 +1,12 @@ +from typing import Optional + from pydantic import BaseModel, Field from .status import Status class Synthesizer(BaseModel): - uid: str | None = None - author: str | None = None - name: str | None = None - status: Status | None = Field(None) + uid: Optional[str] = Field(None) + author: Optional[str] = Field(None) + name: Optional[str] = Field(None) + status: Optional[Status] = Field(None) diff --git a/src/ydata/sdk/synthesizers/multitable.py b/src/ydata/sdk/synthesizers/multitable.py index 8dd2f851..faff0a75 100644 --- a/src/ydata/sdk/synthesizers/multitable.py +++ b/src/ydata/sdk/synthesizers/multitable.py @@ -1,4 +1,7 @@ +from __future__ import annotations + from time import sleep +from typing import Dict, List, Optional, Union from ydata.datascience.common import PrivacyLevel from ydata.sdk.common.client import Client @@ -31,8 +34,8 @@ class MultiTableSynthesizer(BaseSynthesizer): """ def __init__( - self, write_connector: Connector | UID, uid: UID | None = None, name: str | None = None, - project: Project | None = None, client: Client | None = None): + self, write_connector: Union[Connector, UID], uid: Optional[UID] = None, name: Optional[str] = None, + project: Optional[Project] = None, client: Optional[Client] = None): super().__init__(uid, name, project, client) @@ -41,15 +44,15 @@ def __init__( def fit(self, X: DataSource, privacy_level: PrivacyLevel = PrivacyLevel.HIGH_FIDELITY, - datatype: DataSourceType | str | None = None, - sortbykey: str | list[str] | None = None, - entities: str | list[str] | None = None, - generate_cols: list[str] | None = None, - exclude_cols: list[str] | None = None, - dtypes: dict[str, str | DataType] | None = None, - target: str | None = None, - anonymize: dict | None = None, - condition_on: list[str] | None = None) -> None: + datatype: Optional[Union[DataSourceType, str]] = None, + sortbykey: Optional[Union[str, List[str]]] = None, + entities: Optional[Union[str, List[str]]] = None, + generate_cols: Optional[List[str]] = None, + exclude_cols: Optional[List[str]] = None, + dtypes: Optional[Dict[str, Union[str, DataType]]] = None, + target: Optional[str] = None, + anonymize: Optional[dict] = None, + condition_on: Optional[List[str]] = None) -> None: """Fit the synthesizer. The synthesizer accepts as training dataset a YData [`DataSource`][ydata.sdk.datasources.DataSource]. @@ -61,7 +64,7 @@ def fit(self, X: DataSource, self._fit_from_datasource(X) - def sample(self, frac: int | float = 1, write_connector: Connector | UID | None = None) -> None: + def sample(self, frac: Union[int, float] = 1, write_connector: Optional[Union[Connector, UID]] = None) -> None: """Sample from a [`MultiTableSynthesizer`][ydata.sdk.synthesizers.MultiTableSynthesizer] instance. The sample is saved in the connector that was provided in the synthesizer initialization @@ -108,7 +111,7 @@ def _create_payload(self) -> dict: return payload - def _check_or_fetch_connector(self, write_connector: Connector | UID) -> Connector: + def _check_or_fetch_connector(self, write_connector: Union[Connector, UID]) -> Connector: self._logger.debug(f'Write connector is {write_connector}') if isinstance(write_connector, str): self._logger.debug(f'Write connector is of type `UID` {write_connector}') diff --git a/src/ydata/sdk/synthesizers/synthesizer.py b/src/ydata/sdk/synthesizers/synthesizer.py index 6a3e91f3..604c3211 100644 --- a/src/ydata/sdk/synthesizers/synthesizer.py +++ b/src/ydata/sdk/synthesizers/synthesizer.py @@ -49,7 +49,9 @@ class BaseSynthesizer(ABC, ModelFactoryMixin): client (Client): (optional) Client to connect to the backend """ - def __init__(self, uid: UID | None = None, name: str | None = None, project: Project | None = None, client: Client | None = None): + def __init__( + self, uid: Optional[UID] = None, name: Optional[str] = None, + project: Optional[Project] = None, client: Optional[Client] = None): self._init_common(client=client) self._model = mSynthesizer(uid=uid, name=name or str(uuid4())) self._project = project @@ -179,7 +181,7 @@ def _validate_datasource_attributes(X: Union[DataSource, pdDataFrame], dataset_a @staticmethod def _metadata_to_payload( datatype: DataSourceType, ds_metadata: Metadata, - dataset_attrs: Optional[DataSourceAttrs] = None, target: str | None = None + dataset_attrs: Optional[DataSourceAttrs] = None, target: Optional[str] = None ) -> dict: """Transform a the metadata and dataset attributes into a valid payload. @@ -218,11 +220,11 @@ def _metadata_to_payload( def _fit_from_datasource( self, X: DataSource, - privacy_level: PrivacyLevel | None = None, - dataset_attrs: DataSourceAttrs | None = None, - target: str | None = None, - anonymize: dict | None = None, - condition_on: list[str] | None = None + privacy_level: Optional[PrivacyLevel] = None, + dataset_attrs: Optional[DataSourceAttrs] = None, + target: Optional[str] = None, + anonymize: Optional[dict] = None, + condition_on: Optional[List[str]] = None ) -> None: payload = self._create_payload()