-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feature/#397 scenario duplication #2373
base: develop
Are you sure you want to change the base?
Changes from 5 commits
beee00d
f79d591
85dac00
190f6a6
9639503
190274a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,17 @@ class _DataManager(_Manager[DataNode], _VersionMixin): | |
_EVENT_ENTITY_TYPE = EventEntityType.DATA_NODE | ||
_repository: _DataFSRepository | ||
|
||
@classmethod | ||
def _get_owner_id( | ||
cls, scope, cycle_id, scenario_id | ||
) -> Union[Optional[SequenceId], Optional[ScenarioId], Optional[CycleId]]: | ||
if scope == Scope.SCENARIO: | ||
return scenario_id | ||
elif scope == Scope.CYCLE: | ||
return cycle_id | ||
else: | ||
return None | ||
|
||
@classmethod | ||
def _bulk_get_or_create( | ||
cls, | ||
|
@@ -48,13 +59,7 @@ def _bulk_get_or_create( | |
dn_configs_and_owner_id = [] | ||
for dn_config in data_node_configs: | ||
scope = dn_config.scope | ||
owner_id: Union[Optional[SequenceId], Optional[ScenarioId], Optional[CycleId]] | ||
if scope == Scope.SCENARIO: | ||
owner_id = scenario_id | ||
elif scope == Scope.CYCLE: | ||
owner_id = cycle_id | ||
else: | ||
owner_id = None | ||
owner_id = cls._get_owner_id(scope, cycle_id, scenario_id) | ||
dn_configs_and_owner_id.append((dn_config, owner_id)) | ||
|
||
data_nodes = cls._repository._get_by_configs_and_owner_ids( | ||
|
@@ -174,3 +179,19 @@ def _get_by_config_id(cls, config_id: str, version_number: Optional[str] = None) | |
for fil in filters: | ||
fil.update({"config_id": config_id}) | ||
return cls._repository._load_all(filters) | ||
|
||
@classmethod | ||
def _clone( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would rename _clone to _duplicate |
||
cls, dn: DataNode, cycle_id: Optional[CycleId] = None, scenario_id: Optional[ScenarioId] = None | ||
) -> DataNode: | ||
cloned_dn = cls._get(dn) | ||
|
||
cloned_dn.id = cloned_dn._new_id(cloned_dn._config_id) | ||
cloned_dn._owner_id = cls._get_owner_id(cloned_dn._scope, cycle_id, scenario_id) | ||
cloned_dn._parent_ids = set() | ||
|
||
cls._set(cloned_dn) | ||
|
||
cloned_dn._clone_data() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the data node has a cycle scope, and if the new scenario is from the same cycle, then we want to share the same data node between scenarios. |
||
|
||
return cloned_dn |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ class _FileDataNodeMixin: | |
_PATH_KEY = "path" | ||
_DEFAULT_PATH_KEY = "default_path" | ||
_IS_GENERATED_KEY = "is_generated" | ||
__TAIPY_CLONED_PREFIX = "TAIPY_CLONED" | ||
|
||
__logger = _TaipyLogger._get_logger() | ||
|
||
|
@@ -109,12 +110,14 @@ def _get_downloadable_path(self) -> str: | |
|
||
return "" | ||
|
||
def _upload(self, | ||
path: str, | ||
upload_checker: Optional[Callable[[str, Any], bool]] = None, | ||
editor_id: Optional[str] = None, | ||
comment: Optional[str] = None, | ||
**kwargs: Any) -> ReasonCollection: | ||
def _upload( | ||
self, | ||
path: str, | ||
upload_checker: Optional[Callable[[str, Any], bool]] = None, | ||
editor_id: Optional[str] = None, | ||
comment: Optional[str] = None, | ||
**kwargs: Any, | ||
) -> ReasonCollection: | ||
"""Upload a file data to the data node. | ||
|
||
Arguments: | ||
|
@@ -136,20 +139,23 @@ def _upload(self, | |
from ._data_manager_factory import _DataManagerFactory | ||
|
||
reasons = ReasonCollection() | ||
if (editor_id | ||
and self.edit_in_progress # type: ignore[attr-defined] | ||
and self.editor_id != editor_id # type: ignore[attr-defined] | ||
and (not self.editor_expiration_date # type: ignore[attr-defined] | ||
or self.editor_expiration_date > datetime.now())): # type: ignore[attr-defined] | ||
if ( | ||
editor_id | ||
and self.edit_in_progress # type: ignore[attr-defined] | ||
and self.editor_id != editor_id # type: ignore[attr-defined] | ||
and ( | ||
not self.editor_expiration_date # type: ignore[attr-defined] | ||
or self.editor_expiration_date > datetime.now() | ||
) | ||
): # type: ignore[attr-defined] | ||
reasons._add_reason(self.id, DataNodeEditInProgress(self.id)) # type: ignore[attr-defined] | ||
return reasons | ||
|
||
up_path = pathlib.Path(path) | ||
try: | ||
upload_data = self._read_from_path(str(up_path)) | ||
except Exception as err: | ||
self.__logger.error(f"Error uploading `{up_path.name}` to data " | ||
f"node `{self.id}`:") # type: ignore[attr-defined] | ||
self.__logger.error(f"Error uploading `{up_path.name}` to data " f"node `{self.id}`:") # type: ignore[attr-defined] | ||
self.__logger.error(f"Error: {err}") | ||
reasons._add_reason(self.id, UploadFileCanNotBeRead(up_path.name, self.id)) # type: ignore[attr-defined] | ||
return reasons | ||
|
@@ -161,7 +167,8 @@ def _upload(self, | |
self.__logger.error( | ||
f"Error with the upload checker `{upload_checker.__name__}` " | ||
f"while checking `{up_path.name}` file for upload to the data " | ||
f"node `{self.id}`:") # type: ignore[attr-defined] | ||
f"node `{self.id}`:" | ||
) # type: ignore[attr-defined] | ||
self.__logger.error(f"Error: {err}") | ||
can_upload = False | ||
|
||
|
@@ -171,9 +178,12 @@ def _upload(self, | |
|
||
shutil.copy(up_path, self.path) | ||
|
||
self.track_edit(timestamp=datetime.now(), # type: ignore[attr-defined] | ||
editor_id=editor_id, | ||
comment=comment, **kwargs) | ||
self.track_edit( | ||
timestamp=datetime.now(), # type: ignore[attr-defined] | ||
editor_id=editor_id, | ||
comment=comment, | ||
**kwargs, | ||
) | ||
self.unlock_edit() # type: ignore[attr-defined] | ||
|
||
_DataManagerFactory._build_manager()._set(self) # type: ignore[arg-type] | ||
|
@@ -212,3 +222,14 @@ def _migrate_path(self, storage_type, old_path) -> str: | |
if os.path.exists(old_path): | ||
shutil.move(old_path, new_path) | ||
return new_path | ||
|
||
def _clone_data_file(self, id: str) -> Optional[str]: | ||
if os.path.exists(self.path): | ||
folder_path, base_name = os.path.split(self.path) | ||
new_base_path = os.path.join(folder_path, f"TAIPY_CLONE_{id}_{base_name}") | ||
if os.path.isdir(self.path): | ||
shutil.copytree(self.path, new_base_path) | ||
else: | ||
shutil.copy(self.path, new_base_path) | ||
return new_base_path | ||
return "" | ||
Comment on lines
+227
to
+235
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to differentiate the cases where the initial path is generated by Taipy or provided by the user? |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,3 +192,8 @@ def _write(self, data: Any, columns: Optional[List[str]] = None): | |
encoding=properties[self.__ENCODING_KEY], | ||
header=properties[self._HAS_HEADER_PROPERTY], | ||
) | ||
|
||
def _clone_data(self): | ||
new_data_path = self._clone_data_file(self.id) | ||
self._properties[self._PATH_KEY] = new_data_path | ||
return new_data_path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would move that outside the data node, to put it in the data_manager. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well this is specific for file DNs only, if we put it in the data manager, we're grouping it with other types of DNs like Sql or mongo, I don't think it's a good idea |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should implement a can_duplicate method, returning reasons, just like we have a can_create method. |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -521,3 +521,47 @@ def _get_by_config_id(cls, config_id: str, version_number: Optional[str] = None) | |||||||
for fil in filters: | ||||||||
fil.update({"config_id": config_id}) | ||||||||
return cls._repository._load_all(filters) | ||||||||
|
||||||||
@classmethod | ||||||||
def _clone(cls, scenario: Scenario) -> Scenario: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would rename _clone method to _duplicate as it does not return another instance of the same object. It returns a similar object with a few differences (ids, sub-entities' ids, paths, etc...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should accept an optional name and an optional date in the method signature. It does not change much for the name but it does for the creation date. This has an impact on the potential cycle as the new scenario might be on a different cycle. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm? I can understand the name, but the creation date? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's say I have a January scenario I am happy with. I want to start with a duplicate of this one to compute my February scenario. So beginning of February I duplicate my January scenario passing the current date so the new scenario is in the February cycle. Does it make sense for you? And @FlorianJacta any opinion on that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree; duplication for me was not about the creation date. Your use case is a real use case from CFM There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the issue, we should also pass an optional list of data nodes or data node IDs. Without any list; we should copy all the data. If the list is provided, only the files of the data nodes in the list should be copied. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll leave it as the next step for now |
||||||||
""" | ||||||||
Clone a scenario. | ||||||||
|
||||||||
Arguments: | ||||||||
scenario (Scenario): The scenario to clone. | ||||||||
|
||||||||
Returns: | ||||||||
Scenario: The cloned scenario. | ||||||||
""" | ||||||||
cloned_scenario = cls._get(scenario) | ||||||||
cloned_scenario_id = cloned_scenario._new_id(cloned_scenario.config_id) | ||||||||
cloned_scenario.id = cloned_scenario_id | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
# TODO: update sequences | ||||||||
|
||||||||
# Clone tasks and data nodes | ||||||||
_task_manager = _TaskManagerFactory._build_manager() | ||||||||
_data_manager = _DataManagerFactory._build_manager() | ||||||||
|
||||||||
cloned_tasks = set() | ||||||||
for task in cloned_scenario.tasks.values(): | ||||||||
cloned_tasks.add(_task_manager._clone(task, None, cloned_scenario_id)) | ||||||||
cloned_scenario._tasks = cloned_tasks | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duplicating tasks should depend on minimal scope of inputs and outputs data nodes. |
||||||||
|
||||||||
cloned_additional_data_nodes = set() | ||||||||
for data_node in cloned_scenario.additional_data_nodes.values(): | ||||||||
cloned_additional_data_nodes.add(_data_manager._clone(data_node, None, cloned_scenario_id)) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duplicating data nodes should depend on its scope. |
||||||||
cloned_scenario._additional_data_nodes = cloned_additional_data_nodes | ||||||||
|
||||||||
for task in cloned_tasks: | ||||||||
if cloned_scenario_id not in task._parent_ids: | ||||||||
task._parent_ids.update([cloned_scenario_id]) | ||||||||
_task_manager._set(task) | ||||||||
|
||||||||
for dn in cloned_additional_data_nodes: | ||||||||
if cloned_scenario_id not in dn._parent_ids: | ||||||||
dn._parent_ids.update([cloned_scenario_id]) | ||||||||
_data_manager._set(dn) | ||||||||
|
||||||||
cls._set(cloned_scenario) | ||||||||
|
||||||||
return cloned_scenario |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should implement a can_duplicate method, returning reasons, just like we have a can_create method.