ydataai · portellaa · Jan 16, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 12, 2024
diff --git a/Makefile b/Makefile
@@ -68,13 +68,16 @@ package:  ### Builds the package in wheel format
 	echo "$(version)" > src/ydata/sdk/VERSION
 	stubgen src/ydata/sdk -o src --export-less
 	$(PYTHON) -m build --wheel
-	twine check dist/*
+	$(PYTHON) -m twine check dist/*
 
 wheel:  ### Compiles the wheel
 	test -d wheels || mkdir -p wheels
 	cp dist/ydata_sdk-$(version)-py3-none-any.whl wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl
 	$(PYTHON) -m pyc_wheel wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl
-	twine check wheels/*
+	$(PYTHON) -m twine check wheels/*
+
+upload:
+	$(PYTHON) -m twine upload -r ydata wheels/ydata_sdk-$(version)-py$(PYV)-none-any.whl
 
 publish-docs: ### Publishes the documentation
 	mike deploy --push --update-aliases $(version) latest
diff --git a/docs/examples/synthesizer_multitable.md b/docs/examples/synthesizer_multitable.md
@@ -0,0 +1,17 @@
+# Synthesize Multi Table
+
+**Use YData's *MultiTableSynthesizer* to generate multi table synthetic data from multiple RDBMS tables**
+
+Multi table is the way to synthesize data from multiple tables from a database, with a relational in mind...
+
+Quickstart example:
+
+```python
+--8<-- "examples/synthesizers/multi_table_quickstart.py"
+```
+
+Sample write connector overriding example:
+
+```python
+--8<-- "examples/synthesizers/multi_table_sample_write_override.py"
+```
diff --git a/docs/sdk/reference/api/synthesizers/multitable.md b/docs/sdk/reference/api/synthesizers/multitable.md
@@ -0,0 +1 @@
+::: ydata.sdk.synthesizers.multitable.MultiTableSynthesizer
diff --git a/examples/synthesizers/multi_table_quickstart.py b/examples/synthesizers/multi_table_quickstart.py
@@ -0,0 +1,25 @@
+import os
+
+from ydata.sdk.datasources import DataSource
+from ydata.sdk.synthesizers import MultiTableSynthesizer
+
+# Do not forget to add your token as env variables
+os.environ["YDATA_TOKEN"] = '<TOKEN>'  # Remove if already defined
+
+# In this example, we demonstrate how to train a synthesizer from an existing multi table RDBMS datasource.
+# After training a Multi Table Synthesizer, we request a sample.
+# In this case, we don't return the Dataset for the sample, it will be saved in the database
+# that the connector refers to.
+
+X = DataSource.get('<DATASOURCE_UID>')
+
+# Initialize a multi table synthesizer with the connector to write to
+# As long as the synthesizer does not call `fit`, it exists only locally
+# write_connector can be an UID or a Connector instance
+synth = MultiTableSynthesizer(write_connector='<CONNECTOR_UID')
+
+# The synthesizer training is requested
+synth.fit(X)
+
+# We request a synthetic dataset with a fracion of 1.5
+synth.sample(frac=1.5)
diff --git a/examples/synthesizers/multi_table_sample_write_override.py b/examples/synthesizers/multi_table_sample_write_override.py
@@ -0,0 +1,32 @@
+import os
+
+from ydata.sdk.connectors import Connector
+from ydata.sdk.datasources import DataSource
+from ydata.sdk.synthesizers import MultiTableSynthesizer
+
+# Do not forget to add your token as env variables
+os.environ["YDATA_TOKEN"] = '<TOKEN>'  # Remove if already defined
+
+# In this example, we demonstrate how to train a synthesizer from an existing multi table RDBMS datasource.
+# After training a Multi Table Synthesizer, we request a sample.
+# In this case, we don't return the Dataset for the sample, it will be saved in the database
+# that the connector refers to.
+
+X = DataSource.get('<DATASOURCE_UID>')
+
+# For demonstration purposes, we will use a connector instance, but you can just send the UID
+
+write_connector = Connector.get('<CONNECTOR_UID>')
+
+# Initialize a multi table synthesizer with the connector to write to
+# As long as the synthesizer does not call `fit`, it exists only locally
+# write_connector can be an UID or a Connector instance
+synth = MultiTableSynthesizer(write_connector=write_connector)
+
+# The synthesizer training is requested
+synth.fit(X)
+
+# We request a synthetic dataset with a fracion of 1.5
+# In this case we use a Connector instance.
+# You can just use the <CONNECTOR_UID> you don't need to get the connector upfront.
+synth.sample(frac=1.5, write_connector=write_connector)
diff --git a/src/ydata/sdk/common/client/client.py b/src/ydata/sdk/common/client/client.py
@@ -60,8 +60,10 @@ def __init__(self, credentials: Optional[Union[str, Dict]] = None, project: Opti
         if set_as_global:
             self.__set_global()
 
-    def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
-             project: Project | None = None, files: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
+    def post(
+        self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict] = None,
+        project: Optional[Project] = None, files: Optional[Dict] = None, raise_for_status: bool = True
+    ) -> Response:
         """POST request to the backend.
 
         Args:
@@ -83,8 +85,10 @@ def post(self, endpoint: str, data: Optional[Dict] = None, json: Optional[Dict]
 
         return response
 
-    def get(self, endpoint: str, params: Optional[Dict] = None,
-            project: Project | None = None, cookies: Optional[Dict] = None, raise_for_status: bool = True) -> Response:
+    def get(
+        self, endpoint: str, params: Optional[Dict] = None, project: Optional[Project] = None,
+        cookies: Optional[Dict] = None, raise_for_status: bool = True
+    ) -> Response:
         """GET request to the backend.
 
         Args:
@@ -104,7 +108,9 @@ def get(self, endpoint: str, params: Optional[Dict] = None,
 
         return response
 
-    def get_static_file(self, endpoint: str, project: Project | None = None, raise_for_status: bool = True) -> Response:
+    def get_static_file(
+        self, endpoint: str, project: Optional[Project] = None, raise_for_status: bool = True
+    ) -> Response:
         """Retrieve a static file from the backend.
 
         Args:
@@ -141,7 +147,7 @@ def _get_default_project(self, token: str):
         return data['myWorkspace']
 
     def __build_url(self, endpoint: str, params: Optional[Dict] = None, data: Optional[Dict] = None,
-                    json: Optional[Dict] = None, project: Project | None = None, files: Optional[Dict] = None,
+                    json: Optional[Dict] = None, project: Optional[Project] = None, files: Optional[Dict] = None,
                     cookies: Optional[Dict] = None) -> Dict:
         """Build a request for the backend.
 

diff --git a/src/ydata/sdk/connectors/connector.py b/src/ydata/sdk/connectors/connector.py
@@ -47,7 +47,7 @@ def uid(self) -> UID:
         return self._model.uid
 
     @property
-    def type(self) -> str:
+    def type(self) -> ConnectorType:
         return self._model.type
 
     @staticmethod

diff --git a/src/ydata/sdk/datasources/_models/datasource.py b/src/ydata/sdk/datasources/_models/datasource.py
@@ -4,34 +4,17 @@
 from ydata.sdk.common.types import UID
 from ydata.sdk.datasources._models.datatype import DataSourceType
 from ydata.sdk.datasources._models.metadata.metadata import Metadata
-from ydata.sdk.datasources._models.status import State, Status
+from ydata.sdk.datasources._models.status import Status
 
 
 @dataclass
 class DataSource:
-
     uid: Optional[UID] = None
     author: Optional[str] = None
     name: Optional[str] = None
     datatype: Optional[DataSourceType] = None
     metadata: Optional[Metadata] = None
     status: Optional[Status] = None
-    state: Optional[State] = None
-
-    def __post_init__(self):
-        if self.metadata is not None:
-            self.metadata = Metadata(**self.metadata)
-
-        if self.state is not None:
-            data = {
-                'validation': self.state.get('validation', {}).get('state', 'unknown'),
-                'metadata': self.state.get('metadata', {}).get('state', 'unknown'),
-                'profiling': self.state.get('profiling', {}).get('state', 'unknown')
-            }
-            self.state = State.parse_obj(data)
-
-        if self.status is not None:
-            self.status = Status(self.status)
 
     def to_payload(self):
         return {}
diff --git a/src/ydata/sdk/datasources/_models/status.py b/src/ydata/sdk/datasources/_models/status.py
@@ -27,7 +27,7 @@ class ProfilingState(StringEnum):
     AVAILABLE = 'available'
 
 
-class Status(StringEnum):
+class State(StringEnum):
     """Represent the status of a [`DataSource`][ydata.sdk.datasources.datasource.DataSource]."""
 
     AVAILABLE = 'available'
@@ -59,7 +59,8 @@ class Status(StringEnum):
     """
 
 
-class State(BaseModel):
+class Status(BaseModel):
+    state: State
     validation: ValidationState
     metadata: MetadataState
     profiling: ProfilingState
diff --git a/src/ydata/sdk/datasources/datasource.py b/src/ydata/sdk/datasources/datasource.py
@@ -13,7 +13,7 @@
 from ydata.sdk.datasources._models.datasource_list import DataSourceList
 from ydata.sdk.datasources._models.datatype import DataSourceType
 from ydata.sdk.datasources._models.metadata.metadata import Metadata
-from ydata.sdk.datasources._models.status import Status, ValidationState
+from ydata.sdk.datasources._models.status import Status
 from ydata.sdk.utils.model_mixin import ModelFactoryMixin
 from ydata.sdk.utils.model_utils import filter_dict
 
@@ -174,20 +174,9 @@ def _wait_for_metadata(datasource):
             sleep(BACKOFF)
         return datasource
 
-    @staticmethod
-    def _resolve_api_status(api_status: Dict) -> Status:
-        status = Status(api_status.get('state', Status.UNKNOWN.name))
-        validation = ValidationState(api_status.get('validation', {}).get(
-            'state', ValidationState.UNKNOWN.name))
-        if validation == ValidationState.FAILED:
-            status = Status.FAILED
-        return status
-
     @staticmethod
     def _model_from_api(data: Dict, datasource_type: Type[mDataSource]) -> mDataSource:
-        data['datatype'] = data.pop('dataType')
-        data['state'] = data['status']
-        data['status'] = DataSource._resolve_api_status(data['status'])
+        data['datatype'] = data.pop('dataType', None)
         data = filter_dict(datasource_type, data)
         model = datasource_type(**data)
         return model

diff --git a/src/ydata/sdk/synthesizers/__init__.py b/src/ydata/sdk/synthesizers/__init__.py
@@ -1,8 +1,9 @@
 from ydata.datascience.common import PrivacyLevel
 from ydata.sdk.synthesizers._models.synthesizers_list import SynthesizersList
+from ydata.sdk.synthesizers.multitable import MultiTableSynthesizer
 from ydata.sdk.synthesizers.regular import RegularSynthesizer
 from ydata.sdk.synthesizers.synthesizer import BaseSynthesizer as Synthesizer
 from ydata.sdk.synthesizers.timeseries import TimeSeriesSynthesizer
 
 __all__ = ["RegularSynthesizer", "TimeSeriesSynthesizer",
-           "Synthesizer", "SynthesizersList", "PrivacyLevel"]
+           "Synthesizer", "SynthesizersList", "PrivacyLevel", "MultiTableSynthesizer"]
diff --git a/src/ydata/sdk/synthesizers/_models/status.py b/src/ydata/sdk/synthesizers/_models/status.py
@@ -1,49 +1,64 @@
-from typing import Generic, TypeVar
+from typing import Generic, Optional, TypeVar
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from ydata.core.enum import StringEnum
 
 T = TypeVar("T")
 
 
 class GenericStateErrorStatus(BaseModel, Generic[T]):
-    state: T
+    state: Optional[T] = Field(None)
+
+    class Config:
+        use_enum_values = True
 
 
 class PrepareState(StringEnum):
-    PREPARING = 'preparing'
-    DISCOVERING = 'discovering'
-    FINISHED = 'finished'
-    FAILED = 'failed'
-    UNKNOWN = 'unknown'
+    PREPARING = "preparing"
+    DISCOVERING = "discovering"
+    FINISHED = "finished"
+    FAILED = "failed"
 
 
 class TrainingState(StringEnum):
-    PREPARING = 'preparing'
-    RUNNING = 'running'
-    FINISHED = 'finished'
-    FAILED = 'failed'
-    UNKNOWN = 'unknown'
+    PREPARING = "preparing"
+    RUNNING = "running"
+    FINISHED = "finished"
+    FAILED = "failed"
 
 
 class ReportState(StringEnum):
-    UNKNOWN = 'unknown'
-    DISCOVERING = 'discovering'
-    FINISHED = 'finished'
-    FAILED = 'failed'
+    PREPARING = "preparing"
+    GENERATING = "generating"
+    AVAILABLE = "available"
+    FAILED = "failed"
 
 
 PrepareStatus = GenericStateErrorStatus[PrepareState]
 TrainingStatus = GenericStateErrorStatus[TrainingState]
 ReportStatus = GenericStateErrorStatus[ReportState]
 
 
-class Status(StringEnum):
-    NOT_INITIALIZED = 'not initialized'
-    FAILED = 'failed'
-    PREPARE = 'prepare'
-    TRAIN = 'train'
-    REPORT = 'report'  # Should not be here for SDK
-    READY = 'ready'
-    UNKNOWN = 'unknown'
+class Status(BaseModel):
+    class State(StringEnum):
+        NOT_INITIALIZED = 'not initialized'
+        UNKNOWN = 'unknown'
+
+        PREPARE = "prepare"
+        TRAIN = "train"
+        REPORT = "report"
+        READY = "ready"
+
+    state: Optional[State] = Field(None)
+    prepare: Optional[PrepareStatus] = Field(None)
+    training: Optional[TrainingStatus] = Field(None)
+    report: Optional[ReportStatus] = Field(None)
+
+    @staticmethod
+    def not_initialized() -> "Status":
+        return Status(state=Status.State.NOT_INITIALIZED)
+
+    @staticmethod
+    def unknown() -> "Status":
+        return Status(state=Status.State.UNKNOWN)
diff --git a/src/ydata/sdk/synthesizers/_models/synthesizer.py b/src/ydata/sdk/synthesizers/_models/synthesizer.py
@@ -1,11 +1,12 @@
-from dataclasses import dataclass, field
-from typing import Dict, Optional
+from typing import Optional
 
+from pydantic import BaseModel, Field
 
-@dataclass
-class Synthesizer:
+from .status import Status
 
-    uid: Optional[str] = None
-    author: Optional[str] = None
-    name: Optional[str] = None
-    status: Optional[Dict] = field(default_factory=dict)
+
+class Synthesizer(BaseModel):
+    uid: Optional[str] = Field(None)
+    author: Optional[str] = Field(None)
+    name: Optional[str] = Field(None)
+    status: Optional[Status] = Field(None)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		::: ydata.sdk.synthesizers.multitable.MultiTableSynthesizer