support for python 3.8

ydataai · Jan 15, 2024 · 0fd00cf · 0fd00cf
1 parent 0945d35
commit 0fd00cf
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 46 deletions.
diff --git a/Makefile b/Makefile
@@ -77,7 +77,7 @@ wheel:  ### Compiles the wheel
 	$(PYTHON) -m twine check wheels/*
 
 upload:
-	$(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py310-none-any.whl
+	$(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl
 
 publish-docs: ### Publishes the documentation
 	mike deploy --push --update-aliases $(version) latest
diff --git a/docs/examples/synthesize_timeseries_data.md b/docs/examples/synthesize_timeseries_data.md
@@ -2,9 +2,9 @@
 
 **Use YData's *TimeSeriesSynthesizer* to generate time-series synthetic data**
 
-Timeseries is the most common type of data we encounter in data problems.
+Tabular data is the most common type of data we encounter in data problems.
 
-When thinking about timeseries data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data.
+When thinking about tabular data, we assume independence between different records, but this does not happen in reality. Suppose we check events from our day-to-day life, such as room temperature changes, bank account transactions, stock price fluctuations, and air quality measurements in our neighborhood. In that case, we might end up with datasets where measures and records evolve and are related through time. This type of data is known to be sequential or time-series data.
 
 Thus, sequential or time-series data refers to any data containing elements ordered into sequences in a structured format.
 Dissecting any time-series dataset, we see differences in variables' behavior that need to be understood for an effective generation of synthetic data. Typically any time-series dataset is composed of the following:

diff --git a/src/ydata/sdk/common/client/client.py b/src/ydata/sdk/common/client/client.py
@@ -60,8 +60,10 @@ def __init__(self, credentials: Optional[Union[str, Dict]] = None, project: Opti
         if set_as_global:
             self.__set_global()
 
-    def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
-             project: Project | None = None, files: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
+    def post(
+        self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
+        project: Optional[Project] = None, files: Optional[Dict] = None, raise_for_status: bool = True
+    ) -> Response:
         """POST request to the backend.
 
         Args:
@@ -83,8 +85,10 @@ def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict]
 
         return response
 
-    def get(self, endpoint: str, params: Optional[Dict] = None,
-            project: Project | None = None, cookies: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
+    def get(
+        self, endpoint: str, params: Optional[Dict] = None, project: Optional[Project] = None,
+        cookies: Optional[Dict] = None, raise_for_status: bool = True
+    ) -> Response:
         """GET request to the backend.
 
         Args:
@@ -104,7 +108,9 @@ def get(self, endpoint: str, params: Optional[Dict] = None,
 
         return response
 
-    def get_static_file(self, endpoint: str, project: Project | None = None, raise_for_status: bool = True) -> Response:
+    def get_static_file(
+        self, endpoint: str, project: Optional[Project] = None, raise_for_status: bool = True
+    ) -> Response:
         """Retrieve a static file from the backend.
 
         Args:
@@ -141,7 +147,7 @@ def _get_default_project(self, token: str):
         return data['myWorkspace']
 
     def __build_url(self, endpoint: str, params: Optional[Dict] = None, data: Optional[Dict] = None,
-                    json: Optional[Dict] = None, project: Project | None = None, files: Optional[Dict] = None,
+                    json: Optional[Dict] = None, project: Optional[Project] = None, files: Optional[Dict] = None,
                     cookies: Optional[Dict] = None) -> Dict:
         """Build a request for the backend.
 

diff --git a/src/ydata/sdk/datasources/_models/datasource.py b/src/ydata/sdk/datasources/_models/datasource.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from typing import Optional
 
 from ydata.sdk.common.types import UID
 from ydata.sdk.datasources._models.datatype import DataSourceType
@@ -8,13 +9,12 @@
 
 @dataclass
 class DataSource:
-
-    uid: UID | None = None
-    author: str | None = None
-    name: str | None = None
-    datatype: DataSourceType | None = None
-    metadata: Metadata | None = None
-    status: Status | None = None
+    uid: Optional[UID] = None
+    author: Optional[str] = None
+    name: Optional[str] = None
+    datatype: Optional[DataSourceType] = None
+    metadata: Optional[Metadata] = None
+    status: Optional[Status] = None
 
     def to_payload(self):
         return {}
diff --git a/src/ydata/sdk/synthesizers/_models/status.py b/src/ydata/sdk/synthesizers/_models/status.py
@@ -1,4 +1,4 @@
-from typing import Generic, TypeVar
+from typing import Generic, Optional, TypeVar
 
 from pydantic import BaseModel, Field
 
@@ -8,7 +8,7 @@
 
 
 class GenericStateErrorStatus(BaseModel, Generic[T]):
-    state: T | None = Field(None)
+    state: Optional[T] = Field(None)
 
     class Config:
         use_enum_values = True
@@ -50,10 +50,10 @@ class State(StringEnum):
         REPORT = "report"
         READY = "ready"
 
-    state: State | None = Field(None)
-    prepare: PrepareStatus | None = Field(None)
-    training: TrainingStatus | None = Field(None)
-    report: ReportStatus | None = Field(None)
+    state: Optional[State] = Field(None)
+    prepare: Optional[PrepareStatus] = Field(None)
+    training: Optional[TrainingStatus] = Field(None)
+    report: Optional[ReportStatus] = Field(None)
 
     @staticmethod
     def not_initialized() -> "Status":

diff --git a/src/ydata/sdk/synthesizers/_models/synthesizer.py b/src/ydata/sdk/synthesizers/_models/synthesizer.py
@@ -1,10 +1,12 @@
+from typing import Optional
+
 from pydantic import BaseModel, Field
 
 from .status import Status
 
 
 class Synthesizer(BaseModel):
-    uid: str | None = None
-    author: str | None = None
-    name: str | None = None
-    status: Status | None = Field(None)
+    uid: Optional[str] = Field(None)
+    author: Optional[str] = Field(None)
+    name: Optional[str] = Field(None)
+    status: Optional[Status] = Field(None)
diff --git a/src/ydata/sdk/synthesizers/multitable.py b/src/ydata/sdk/synthesizers/multitable.py
@@ -1,4 +1,7 @@
+from __future__ import annotations
+
 from time import sleep
+from typing import Dict, List, Optional, Union
 
 from ydata.datascience.common import PrivacyLevel
 from ydata.sdk.common.client import Client
@@ -31,8 +34,8 @@ class MultiTableSynthesizer(BaseSynthesizer):
     """
 
     def __init__(
-            self, write_connector: Connector | UID, uid: UID | None = None, name: str | None = None,
-            project: Project | None = None, client: Client | None = None):
+            self, write_connector: Union[Connector, UID], uid: Optional[UID] = None, name: Optional[str] = None,
+            project: Optional[Project] = None, client: Optional[Client] = None):
 
         super().__init__(uid, name, project, client)
 
@@ -41,15 +44,15 @@ def __init__(
 
     def fit(self, X: DataSource,
             privacy_level: PrivacyLevel = PrivacyLevel.HIGH_FIDELITY,
-            datatype: DataSourceType | str | None = None,
-            sortbykey: str | list[str] | None = None,
-            entities: str | list[str] | None = None,
-            generate_cols: list[str] | None = None,
-            exclude_cols: list[str] | None = None,
-            dtypes: dict[str, str | DataType] | None = None,
-            target: str | None = None,
-            anonymize: dict | None = None,
-            condition_on: list[str] | None = None) -> None:
+            datatype: Optional[Union[DataSourceType, str]] = None,
+            sortbykey: Optional[Union[str, List[str]]] = None,
+            entities: Optional[Union[str, List[str]]] = None,
+            generate_cols: Optional[List[str]] = None,
+            exclude_cols: Optional[List[str]] = None,
+            dtypes: Optional[Dict[str, Union[str, DataType]]] = None,
+            target: Optional[str] = None,
+            anonymize: Optional[dict] = None,
+            condition_on: Optional[List[str]] = None) -> None:
         """Fit the synthesizer.
 
         The synthesizer accepts as training dataset a YData [`DataSource`][ydata.sdk.datasources.DataSource].
@@ -61,7 +64,7 @@ def fit(self, X: DataSource,
 
         self._fit_from_datasource(X)
 
-    def sample(self, frac: int | float = 1, write_connector: Connector | UID | None = None) -> None:
+    def sample(self, frac: Union[int, float] = 1, write_connector: Optional[Union[Connector, UID]] = None) -> None:
         """Sample from a [`MultiTableSynthesizer`][ydata.sdk.synthesizers.MultiTableSynthesizer]
         instance.
         The sample is saved in the connector that was provided in the synthesizer initialization
@@ -108,7 +111,7 @@ def _create_payload(self) -> dict:
 
         return payload
 
-    def _check_or_fetch_connector(self, write_connector: Connector | UID) -> Connector:
+    def _check_or_fetch_connector(self, write_connector: Union[Connector, UID]) -> Connector:
         self._logger.debug(f'Write connector is {write_connector}')
         if isinstance(write_connector, str):
             self._logger.debug(f'Write connector is of type `UID` {write_connector}')

diff --git a/src/ydata/sdk/synthesizers/synthesizer.py b/src/ydata/sdk/synthesizers/synthesizer.py
@@ -49,7 +49,9 @@ class BaseSynthesizer(ABC, ModelFactoryMixin):
         client (Client): (optional) Client to connect to the backend
     """
 
-    def __init__(self, uid: UID | None = None, name: str | None = None, project: Project | None = None, client: Client | None = None):
+    def __init__(
+            self, uid: Optional[UID] = None, name: Optional[str] = None,
+            project: Optional[Project] = None, client: Optional[Client] = None):
         self._init_common(client=client)
         self._model = mSynthesizer(uid=uid, name=name or str(uuid4()))
         self._project = project
@@ -179,7 +181,7 @@ def _validate_datasource_attributes(X: Union[DataSource, pdDataFrame], dataset_a
     @staticmethod
     def _metadata_to_payload(
         datatype: DataSourceType, ds_metadata: Metadata,
-        dataset_attrs: Optional[DataSourceAttrs] = None, target: str | None = None
+        dataset_attrs: Optional[DataSourceAttrs] = None, target: Optional[str] = None
     ) -> dict:
         """Transform a the metadata and dataset attributes into a valid
         payload.
@@ -218,11 +220,11 @@ def _metadata_to_payload(
     def _fit_from_datasource(
         self,
         X: DataSource,
-        privacy_level: PrivacyLevel | None = None,
-        dataset_attrs: DataSourceAttrs | None = None,
-        target: str | None = None,
-        anonymize: dict | None = None,
-        condition_on: list[str] | None = None
+        privacy_level: Optional[PrivacyLevel] = None,
+        dataset_attrs: Optional[DataSourceAttrs] = None,
+        target: Optional[str] = None,
+        anonymize: Optional[dict] = None,
+        condition_on: Optional[List[str]] = None
     ) -> None:
         payload = self._create_payload()