From 2ca5f082f92f948c5cc9f37f629d8191168cddc4 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 13:13:42 +0200 Subject: [PATCH 01/14] logic in place Signed-off-by: Jerry Guo --- src/power_grid_model/_utils.py | 16 ++++++++++++++++ src/power_grid_model/core/serialization.py | 22 ++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index 61f4e4cfe..6c2bb31c5 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -447,3 +447,19 @@ def is_columnar(component_data: ComponentData) -> bool: if is_sparse(component_data): return not isinstance(component_data["data"], np.ndarray) return not isinstance(component_data, np.ndarray) + + +def is_nan_or_equivalent(array): + """ + Check if the array contains only nan values or equivalent nan values for specific data types. + + Args: + array: The array to check. + + Returns: + bool: True if the array contains only nan or equivalent nan values, False otherwise. + """ + return isinstance(array, np.ndarray) and ( + (array.dtype == np.float64 and np.isnan(array).all()) + or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min)) + ) diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index 87a36c451..912cbdaca 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -10,6 +10,9 @@ from ctypes import byref from enum import IntEnum +import numpy as np + +from power_grid_model._utils import is_nan_or_equivalent from power_grid_model.core.dataset_definitions import DatasetType, _map_to_component_types, _str_to_datatype from power_grid_model.core.error_handling import assert_no_error from power_grid_model.core.index_integer import IdxC @@ -23,7 +26,7 @@ from power_grid_model.core.power_grid_dataset import CConstDataset, CWritableDataset from power_grid_model.data_types import Dataset from power_grid_model.errors import PowerGridSerializationError -from power_grid_model.typing import ComponentAttributeMapping +from power_grid_model.typing import ComponentAttributeFilterOptions, ComponentAttributeMapping class SerializationType(IntEnum): @@ -41,6 +44,7 @@ class Deserializer: _deserializer: DeserializerPtr _dataset_ptr: WritableDatasetPtr _dataset: CWritableDataset + _data_filter: ComponentAttributeMapping def __new__( cls, @@ -59,6 +63,7 @@ def __new__( instance._dataset_ptr = pgc.deserializer_get_dataset(instance._deserializer) assert_no_error() + instance._data_filter = data_filter instance._dataset = CWritableDataset(instance._dataset_ptr, data_filter=data_filter) assert_no_error() @@ -80,7 +85,20 @@ def load(self) -> Dataset: A tuple containing the deserialized dataset in Power grid model input format and the type of the dataset. """ pgc.deserializer_parse_to_buffer(self._deserializer) - return self._dataset.get_data() + + filtered_data = self._dataset.get_data() + if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: + for _, attributes in filtered_data.items(): + keys_to_remove = [] + for attr, array in attributes.items(): + if not isinstance(array, np.ndarray): + continue + if is_nan_or_equivalent(array): + keys_to_remove.append(attr) + for key in keys_to_remove: + del attributes[key] + + return filtered_data class Serializer(ABC): From 7b612058ecbf11401be5045dd66cd258f4665179 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 13:31:29 +0200 Subject: [PATCH 02/14] unit test Signed-off-by: Jerry Guo --- tests/unit/test_serialization.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index 1ae89fadd..3f41621ca 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -515,6 +515,21 @@ def assert_serialization_correct(deserialized_dataset: Dataset, serialized_datas ) +def assert_deserialization_filtering_correct(deserialized_dataset: Dataset, data_filter) -> bool: + if data_filter is ComponentAttributeFilterOptions.ALL: + return True + if data_filter is ComponentAttributeFilterOptions.RELEVANT: + for _, component_values in deserialized_dataset.items(): + for _, array in component_values.items(): + if not isinstance(array, np.ndarray): + continue + if (array.dtype == np.float64 and np.isnan(array).all()) or ( + array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min) + ): + return False + return True + + @pytest.mark.parametrize("raw_buffer", (True, False)) def test_json_deserialize_data(serialized_data, data_filters, raw_buffer: bool): data = to_json(serialized_data, raw_buffer=raw_buffer) @@ -647,3 +662,20 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali np.testing.assert_array_equal(nan_a, nan_b) np.testing.assert_allclose(field_result_a[~nan_a], field_result_b[~nan_b], rtol=1e-15) + + +@pytest.mark.parametrize( + ("deserialize", "pack", "data_filter"), + ( + pytest.param(json_deserialize, to_json, ComponentAttributeFilterOptions.ALL, id="json.all"), + pytest.param(json_deserialize, to_json, ComponentAttributeFilterOptions.RELEVANT, id="json.relevant"), + pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.ALL, id="msgpack.all"), + pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.RELEVANT, id="msgpack.relevant"), + ), +) +def test_deserialize_data_filter(deserialize, serialized_data, data_filter, pack): + test_data = pack(serialized_data) + + deserialized_result_a = deserialize(test_data, data_filter) + + assert assert_deserialization_filtering_correct(deserialized_result_a, data_filter) From b3e00e570dcb53b2fae616d93a51e0a9c907b2cb Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 15:02:29 +0200 Subject: [PATCH 03/14] added in api reference Signed-off-by: Jerry Guo --- docs/api_reference/python-api-reference.md | 5 +++++ docs/user_manual/serialization.md | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/api_reference/python-api-reference.md b/docs/api_reference/python-api-reference.md index 2e7b9c6ef..1b611caa9 100644 --- a/docs/api_reference/python-api-reference.md +++ b/docs/api_reference/python-api-reference.md @@ -19,6 +19,11 @@ SPDX-License-Identifier: MPL-2.0 .. automodule:: power_grid_model.enum :undoc-members: :show-inheritance: + +.. automodule:: power_grid_model.typing + :members: ComponentAttributeFilterOptions + :undoc-members: + :show-inheritance: ``` ## data types diff --git a/docs/user_manual/serialization.md b/docs/user_manual/serialization.md index 53b96b6d3..d3f1aa417 100644 --- a/docs/user_manual/serialization.md +++ b/docs/user_manual/serialization.md @@ -28,7 +28,7 @@ The format consists of a [`PowerGridModelRoot`](#json-schema-root-object) JSON o - [`PowerGridModelRoot`](#json-schema-root-object): `Object` - `version`: `string` containing the schema version (required, current version is `"1.0"`) - - `type`: `string` containing the dataset type, e.g. `"input"`, `"update"`, ... + - `type`: `string` containing the dataset type, e.g. `"input"`, `"update"`, etc. - `is_batch`: `boolean` flag that describes whether the dataset is a batch or not. - `attributes`: [`Attributes`](#json-schema-attributes-object) containing specified attributes per component type (e.g.: `"node"`). - `data`: [`Dataset`](#json-schema-dataset-object) containing the actual dataset. From 8ce9418a34b496edf6179b4a315d5488c4907685 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 15:15:29 +0200 Subject: [PATCH 04/14] formatted due to resolving merge conflict Signed-off-by: Jerry Guo --- src/power_grid_model/_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index 3c4f8a266..5dbf0d1bc 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -494,7 +494,7 @@ def is_columnar(component_data: ComponentData) -> bool: return not isinstance(component_data["data"], np.ndarray) return not isinstance(component_data, np.ndarray) - + def is_nan_or_equivalent(array): """ Check if the array contains only nan values or equivalent nan values for specific data types. @@ -510,7 +510,7 @@ def is_nan_or_equivalent(array): or (array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min)) ) - + def component_data_checks(component_data: ComponentData, component=None) -> None: """Checks if component_data is of ComponentData and raises ValueError if its not""" component_name = f"'{component}'" if component is not None else "" From 1bdb88a38185d2e37e251c5f3eac66f7d0c837b6 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 15:18:44 +0200 Subject: [PATCH 05/14] added `ComponentAttributeMapping` to api reference Signed-off-by: Jerry Guo --- docs/api_reference/python-api-reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api_reference/python-api-reference.md b/docs/api_reference/python-api-reference.md index 1b611caa9..aeee66bdc 100644 --- a/docs/api_reference/python-api-reference.md +++ b/docs/api_reference/python-api-reference.md @@ -21,7 +21,7 @@ SPDX-License-Identifier: MPL-2.0 :show-inheritance: .. automodule:: power_grid_model.typing - :members: ComponentAttributeFilterOptions + :members: ComponentAttributeFilterOptions, ComponentAttributeMapping :undoc-members: :show-inheritance: ``` From bb3a10e1defd3c2b1f7cc26358ba8bf7e9646cac Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 15:58:58 +0200 Subject: [PATCH 06/14] add support for multi level filtering Signed-off-by: Jerry Guo --- src/power_grid_model/core/serialization.py | 31 +++++++++++++++------- tests/unit/test_serialization.py | 22 +++++++++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index 912cbdaca..5206be962 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -87,16 +87,29 @@ def load(self) -> Dataset: pgc.deserializer_parse_to_buffer(self._deserializer) filtered_data = self._dataset.get_data() - if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: - for _, attributes in filtered_data.items(): - keys_to_remove = [] - for attr, array in attributes.items(): - if not isinstance(array, np.ndarray): + + def _filter_attributes(attributes): + keys_to_remove = [] + for attr, array in attributes.items(): + if not isinstance(array, np.ndarray): + continue + if is_nan_or_equivalent(array): + keys_to_remove.append(attr) + for key in keys_to_remove: + del attributes[key] + + if isinstance(self._data_filter, ComponentAttributeFilterOptions): + if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: + for _, attributes in filtered_data.items(): + _filter_attributes(attributes) + elif isinstance(self._data_filter, dict): + for component_type, attributes in filtered_data.items(): + if component_type in self._data_filter: + filter_option = self._data_filter[component_type] + if filter_option is None: continue - if is_nan_or_equivalent(array): - keys_to_remove.append(attr) - for key in keys_to_remove: - del attributes[key] + if filter_option is ComponentAttributeFilterOptions.RELEVANT: + _filter_attributes(attributes) return filtered_data diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index bba139071..b0e44701c 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -779,6 +779,28 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali pytest.param(json_deserialize, to_json, ComponentAttributeFilterOptions.RELEVANT, id="json.relevant"), pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.ALL, id="msgpack.all"), pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.RELEVANT, id="msgpack.relevant"), + pytest.param( + json_deserialize, + to_json, + { + "node": ["id"], + "line": ComponentAttributeFilterOptions.ALL, + "sym_load": ["id"], + "asym_load": ComponentAttributeFilterOptions.RELEVANT, + }, + id="json.mixed_filter", + ), + pytest.param( + msgpack_deserialize, + to_msgpack, + { + "node": ["id"], + "line": ComponentAttributeFilterOptions.ALL, + "sym_load": ["id"], + "asym_load": ComponentAttributeFilterOptions.RELEVANT, + }, + id="msgpack.mixed_filter", + ), ), ) def test_deserialize_data_filter(deserialize, serialized_data, data_filter, pack): From af69cf5bc1cf66b643d88be6bab1c10e31294484 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Tue, 1 Oct 2024 16:25:53 +0200 Subject: [PATCH 07/14] minor Signed-off-by: Jerry Guo --- src/power_grid_model/core/serialization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index 5206be962..422759987 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -106,8 +106,6 @@ def _filter_attributes(attributes): for component_type, attributes in filtered_data.items(): if component_type in self._data_filter: filter_option = self._data_filter[component_type] - if filter_option is None: - continue if filter_option is ComponentAttributeFilterOptions.RELEVANT: _filter_attributes(attributes) From ca5e9c72a1b9c9d2c6a52d7487a8734d64903d65 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 12:14:01 +0200 Subject: [PATCH 08/14] added doc-string and updated the api reference; updated a test; displace the ...Options to enum Signed-off-by: Jerry Guo --- docs/api_reference/python-api-reference.md | 6 +---- src/power_grid_model/__init__.py | 3 ++- src/power_grid_model/_utils.py | 7 ++---- .../core/power_grid_dataset.py | 7 ++---- src/power_grid_model/core/serialization.py | 3 ++- src/power_grid_model/enum.py | 9 ++++++++ src/power_grid_model/typing.py | 23 +++++++++---------- tests/unit/test_internal_utils.py | 2 +- tests/unit/test_serialization.py | 6 ++--- .../unit/validation/test_batch_validation.py | 2 +- .../unit/validation/test_input_validation.py | 3 +-- 11 files changed, 35 insertions(+), 36 deletions(-) diff --git a/docs/api_reference/python-api-reference.md b/docs/api_reference/python-api-reference.md index aeee66bdc..f92b558f1 100644 --- a/docs/api_reference/python-api-reference.md +++ b/docs/api_reference/python-api-reference.md @@ -19,11 +19,6 @@ SPDX-License-Identifier: MPL-2.0 .. automodule:: power_grid_model.enum :undoc-members: :show-inheritance: - -.. automodule:: power_grid_model.typing - :members: ComponentAttributeFilterOptions, ComponentAttributeMapping - :undoc-members: - :show-inheritance: ``` ## data types @@ -39,6 +34,7 @@ SPDX-License-Identifier: MPL-2.0 .. autoclass:: power_grid_model.data_types.SparseBatchArray .. autoclass:: power_grid_model.dataset_definitions.DatasetType .. autoclass:: power_grid_model.dataset_definitions.ComponentType +.. autodata:: power_grid_model.typing.ComponentAttributeMapping ``` ## error types diff --git a/src/power_grid_model/__init__.py b/src/power_grid_model/__init__.py index ce7c86a76..a388541bb 100644 --- a/src/power_grid_model/__init__.py +++ b/src/power_grid_model/__init__.py @@ -12,6 +12,7 @@ BranchSide, CalculationMethod, CalculationType, + ComponentAttributeFilterOptions, FaultPhase, FaultType, LoadGenType, @@ -20,4 +21,4 @@ TapChangingStrategy, WindingType, ) -from power_grid_model.typing import ComponentAttributeFilterOptions, ComponentAttributeMapping +from power_grid_model.typing import ComponentAttributeMapping diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index 5dbf0d1bc..bacffd046 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -39,11 +39,8 @@ SinglePythonDataset, SparseBatchData, ) -from power_grid_model.typing import ( - ComponentAttributeFilterOptions, - ComponentAttributeMapping, - _ComponentAttributeMappingDict, -) +from power_grid_model.enum import ComponentAttributeFilterOptions +from power_grid_model.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict def is_nan(data) -> bool: diff --git a/src/power_grid_model/core/power_grid_dataset.py b/src/power_grid_model/core/power_grid_dataset.py index ed180a736..6a7af3703 100644 --- a/src/power_grid_model/core/power_grid_dataset.py +++ b/src/power_grid_model/core/power_grid_dataset.py @@ -28,12 +28,9 @@ ) from power_grid_model.core.power_grid_meta import ComponentMetaData, DatasetMetaData, power_grid_meta_data from power_grid_model.data_types import AttributeType, ComponentData, Dataset +from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.errors import PowerGridError -from power_grid_model.typing import ( - ComponentAttributeFilterOptions, - ComponentAttributeMapping, - _ComponentAttributeMappingDict, -) +from power_grid_model.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict class CDatasetInfo: # pylint: disable=too-few-public-methods diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index 422759987..d04cbd6fe 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -25,8 +25,9 @@ ) from power_grid_model.core.power_grid_dataset import CConstDataset, CWritableDataset from power_grid_model.data_types import Dataset +from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.errors import PowerGridSerializationError -from power_grid_model.typing import ComponentAttributeFilterOptions, ComponentAttributeMapping +from power_grid_model.typing import ComponentAttributeMapping class SerializationType(IntEnum): diff --git a/src/power_grid_model/enum.py b/src/power_grid_model/enum.py index 1d30b8b11..54d053ab4 100644 --- a/src/power_grid_model/enum.py +++ b/src/power_grid_model/enum.py @@ -203,3 +203,12 @@ class _ExperimentalFeatures(IntEnum): disabled = 0 enabled = 1 + + +class ComponentAttributeFilterOptions(IntEnum): + """Filter option component or attribute""" + + ALL = 0 + """Filter all components/attributes""" + RELEVANT = 1 + """Filter only non-empty components/attributes that contain non-NaN values""" diff --git a/src/power_grid_model/typing.py b/src/power_grid_model/typing.py index 115c2e5e6..cc93b65ee 100644 --- a/src/power_grid_model/typing.py +++ b/src/power_grid_model/typing.py @@ -6,19 +6,8 @@ Type hints for PGM. This includes all miscellaneous type hints not under dataset or dataset_definitions categories """ -from enum import IntEnum - from power_grid_model.core.dataset_definitions import ComponentType, ComponentTypeVar - - -class ComponentAttributeFilterOptions(IntEnum): - """Filter option component or attribute""" - - ALL = 0 - """Filter all components/attributes""" - RELEVANT = 1 - """Filter only non-empty components/attributes that contain non-NaN values""" - +from power_grid_model.enum import ComponentAttributeFilterOptions _ComponentAttributeMappingDict = dict[ComponentType, set[str] | list[str] | None | ComponentAttributeFilterOptions] @@ -29,3 +18,13 @@ class ComponentAttributeFilterOptions(IntEnum): | None | _ComponentAttributeMappingDict ) +""" +Type hint for mapping component attributes. + +`ComponentAttributeMapping` can be one of the following: +- A set of `ComponentTypeVar` +- A list of `ComponentTypeVar` +- A `ComponentAttributeFilterOptions` value +- `None` +- A dictionary mapping `ComponentType` to a set, list, `None`, or `ComponentAttributeFilterOptions` +""" diff --git a/tests/unit/test_internal_utils.py b/tests/unit/test_internal_utils.py index 960b2967c..460c0cc21 100644 --- a/tests/unit/test_internal_utils.py +++ b/tests/unit/test_internal_utils.py @@ -21,7 +21,7 @@ ) from power_grid_model.core.dataset_definitions import ComponentType as CT, DatasetType as DT from power_grid_model.data_types import BatchDataset, BatchList -from power_grid_model.typing import ComponentAttributeFilterOptions +from power_grid_model.enum import ComponentAttributeFilterOptions from .utils import convert_python_to_numpy diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index b0e44701c..a0eaac8c6 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -14,7 +14,7 @@ from power_grid_model.core.dataset_definitions import ComponentType from power_grid_model.core.power_grid_dataset import get_dataset_type from power_grid_model.data_types import BatchDataset, Dataset, SingleDataset -from power_grid_model.typing import ComponentAttributeFilterOptions +from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.utils import json_deserialize, json_serialize, msgpack_deserialize, msgpack_serialize @@ -783,7 +783,7 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali json_deserialize, to_json, { - "node": ["id"], + "node": None, "line": ComponentAttributeFilterOptions.ALL, "sym_load": ["id"], "asym_load": ComponentAttributeFilterOptions.RELEVANT, @@ -796,7 +796,7 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali { "node": ["id"], "line": ComponentAttributeFilterOptions.ALL, - "sym_load": ["id"], + "sym_load": None, "asym_load": ComponentAttributeFilterOptions.RELEVANT, }, id="msgpack.mixed_filter", diff --git a/tests/unit/validation/test_batch_validation.py b/tests/unit/validation/test_batch_validation.py index 19820b0d4..43d697fb2 100644 --- a/tests/unit/validation/test_batch_validation.py +++ b/tests/unit/validation/test_batch_validation.py @@ -8,7 +8,7 @@ from power_grid_model import DatasetType, LoadGenType, initialize_array from power_grid_model._utils import compatibility_convert_row_columnar_dataset -from power_grid_model.typing import ComponentAttributeFilterOptions +from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.validation import validate_batch_data from power_grid_model.validation.errors import MultiComponentNotUniqueError, NotBooleanError diff --git a/tests/unit/validation/test_input_validation.py b/tests/unit/validation/test_input_validation.py index acda77381..4594c6a9f 100644 --- a/tests/unit/validation/test_input_validation.py +++ b/tests/unit/validation/test_input_validation.py @@ -17,8 +17,7 @@ initialize_array, ) from power_grid_model._utils import compatibility_convert_row_columnar_dataset -from power_grid_model.enum import CalculationType, FaultPhase, FaultType -from power_grid_model.typing import ComponentAttributeFilterOptions +from power_grid_model.enum import CalculationType, ComponentAttributeFilterOptions, FaultPhase, FaultType from power_grid_model.validation import validate_input_data from power_grid_model.validation.errors import ( FaultPhaseError, From 30d75ee8ada6d90206325187626d4cf0bcfaa48b Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 13:48:52 +0200 Subject: [PATCH 09/14] reformatted documentation format Signed-off-by: Jerry Guo --- docs/api_reference/python-api-reference.md | 1 + src/power_grid_model/typing.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/docs/api_reference/python-api-reference.md b/docs/api_reference/python-api-reference.md index f92b558f1..6257888d7 100644 --- a/docs/api_reference/python-api-reference.md +++ b/docs/api_reference/python-api-reference.md @@ -35,6 +35,7 @@ SPDX-License-Identifier: MPL-2.0 .. autoclass:: power_grid_model.dataset_definitions.DatasetType .. autoclass:: power_grid_model.dataset_definitions.ComponentType .. autodata:: power_grid_model.typing.ComponentAttributeMapping + :annotation: ComponentAttributeMapping ``` ## error types diff --git a/src/power_grid_model/typing.py b/src/power_grid_model/typing.py index cc93b65ee..298e0b143 100644 --- a/src/power_grid_model/typing.py +++ b/src/power_grid_model/typing.py @@ -22,9 +22,14 @@ Type hint for mapping component attributes. `ComponentAttributeMapping` can be one of the following: + - A set of `ComponentTypeVar` + - A list of `ComponentTypeVar` + - A `ComponentAttributeFilterOptions` value + - `None` + - A dictionary mapping `ComponentType` to a set, list, `None`, or `ComponentAttributeFilterOptions` """ From 0b8cf94c708479f08f827782b41f16a532f72356 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 14:48:32 +0200 Subject: [PATCH 10/14] refactored the code Signed-off-by: Jerry Guo --- src/power_grid_model/_utils.py | 26 ++++++--- .../core/power_grid_dataset.py | 33 ++++++++++- src/power_grid_model/core/serialization.py | 55 +++++++++---------- 3 files changed, 74 insertions(+), 40 deletions(-) diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index bacffd046..25b42d626 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -508,12 +508,7 @@ def is_nan_or_equivalent(array): ) -def component_data_checks(component_data: ComponentData, component=None) -> None: - """Checks if component_data is of ComponentData and raises ValueError if its not""" - component_name = f"'{component}'" if component is not None else "" - err_msg = f"Invalid data for {component_name} component. " "{0}" - err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such." - +def _check_sparse_dense(component_data: ComponentData, err_msg_suffixed: str) -> ComponentData: if is_sparse(component_data): indptr = component_data["indptr"] if not isinstance(indptr, np.ndarray): @@ -521,24 +516,37 @@ def component_data_checks(component_data: ComponentData, component=None) -> None sub_data = component_data["data"] elif isinstance(component_data, dict) and ("indptr" in component_data or "data" in component_data): missing_element = "indptr" if "indptr" not in component_data else "data" - raise KeyError(err_msg.format(f"Missing '{missing_element}' in sparse batch data. ")) + raise KeyError(err_msg_suffixed.format(f"Missing '{missing_element}' in sparse batch data. ")) else: sub_data = component_data + return sub_data + - if is_columnar(component_data): +def _check_columnar_row(sub_data: ComponentData, err_msg_suffixed: str) -> None: + if is_columnar(sub_data): if not isinstance(sub_data, dict): raise TypeError(err_msg_suffixed.format("")) for attribute, attribute_array in sub_data.items(): if not isinstance(attribute_array, np.ndarray): raise TypeError(err_msg_suffixed.format(f"'{attribute}' attribute. ")) if attribute_array.ndim not in [1, 2, 3]: - raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim }")) + raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {attribute_array.ndim}")) elif not isinstance(sub_data, np.ndarray): raise TypeError(err_msg_suffixed.format(f"Invalid data type {type(sub_data).__name__} ")) elif isinstance(sub_data, np.ndarray) and sub_data.ndim not in [1, 2]: raise TypeError(err_msg_suffixed.format(f"Invalid dimension: {sub_data.ndim}. ")) +def component_data_checks(component_data: ComponentData, component=None) -> None: + """Checks if component_data is of ComponentData and raises ValueError if its not""" + component_name = f"'{component}'" if component is not None else "" + err_msg = f"Invalid data for {component_name} component. " "{0}" + err_msg_suffixed = err_msg + "Expecting a 1D/2D Numpy structured array or a dictionary of such." + + sub_data = _check_sparse_dense(component_data, err_msg_suffixed) + _check_columnar_row(sub_data, err_msg_suffixed) + + def _extract_indptr(data: ComponentData) -> IndexPointer: # pragma: no cover """returns indptr and checks if its valid diff --git a/src/power_grid_model/core/power_grid_dataset.py b/src/power_grid_model/core/power_grid_dataset.py index 6a7af3703..f4d627c68 100644 --- a/src/power_grid_model/core/power_grid_dataset.py +++ b/src/power_grid_model/core/power_grid_dataset.py @@ -8,7 +8,7 @@ from typing import Any, Mapping, Optional -from power_grid_model._utils import is_columnar, is_sparse, process_data_filter +from power_grid_model._utils import is_columnar, is_nan_or_equivalent, is_sparse, process_data_filter from power_grid_model.core.buffer_handling import ( BufferProperties, CAttributeBuffer, @@ -419,8 +419,9 @@ def get_data(self) -> Dataset: The Power Grid Model may write to these buffers at a later point in time. Returns: - The full dataset. + The full dataset with filters applied. """ + self._post_filtering() return self._data def get_component_data(self, component: ComponentType) -> ComponentData: @@ -501,6 +502,34 @@ def _get_buffer_properties(self, info: CDatasetInfo) -> Mapping[ComponentType, B if component in self._data_filter } + def _filter_attributes(self, attributes): + keys_to_remove = [] + for attr, array in attributes.items(): + if is_columnar(array): + continue + if is_nan_or_equivalent(array): + keys_to_remove.append(attr) + for key in keys_to_remove: + del attributes[key] + + def _filter_with_option(self): + if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: + for _, attributes in self._data.items(): + self._filter_attributes(attributes) + + def _filter_with_mapping(self): + for component_type, attributes in self._data.items(): + if component_type in self._data_filter: + filter_option = self._data_filter[component_type] + if filter_option is ComponentAttributeFilterOptions.RELEVANT: + self._filter_attributes(attributes) + + def _post_filtering(self): + if isinstance(self._data_filter, ComponentAttributeFilterOptions): + self._filter_with_option() + elif isinstance(self._data_filter, dict): + self._filter_with_mapping() + def _get_filtered_attributes( schema: ComponentMetaData, diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index d04cbd6fe..96d4c5803 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -10,9 +10,6 @@ from ctypes import byref from enum import IntEnum -import numpy as np - -from power_grid_model._utils import is_nan_or_equivalent from power_grid_model.core.dataset_definitions import DatasetType, _map_to_component_types, _str_to_datatype from power_grid_model.core.error_handling import assert_no_error from power_grid_model.core.index_integer import IdxC @@ -25,7 +22,6 @@ ) from power_grid_model.core.power_grid_dataset import CConstDataset, CWritableDataset from power_grid_model.data_types import Dataset -from power_grid_model.enum import ComponentAttributeFilterOptions from power_grid_model.errors import PowerGridSerializationError from power_grid_model.typing import ComponentAttributeMapping @@ -86,31 +82,32 @@ def load(self) -> Dataset: A tuple containing the deserialized dataset in Power grid model input format and the type of the dataset. """ pgc.deserializer_parse_to_buffer(self._deserializer) - - filtered_data = self._dataset.get_data() - - def _filter_attributes(attributes): - keys_to_remove = [] - for attr, array in attributes.items(): - if not isinstance(array, np.ndarray): - continue - if is_nan_or_equivalent(array): - keys_to_remove.append(attr) - for key in keys_to_remove: - del attributes[key] - - if isinstance(self._data_filter, ComponentAttributeFilterOptions): - if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: - for _, attributes in filtered_data.items(): - _filter_attributes(attributes) - elif isinstance(self._data_filter, dict): - for component_type, attributes in filtered_data.items(): - if component_type in self._data_filter: - filter_option = self._data_filter[component_type] - if filter_option is ComponentAttributeFilterOptions.RELEVANT: - _filter_attributes(attributes) - - return filtered_data + return self._dataset.get_data() + + # filtered_data = self._dataset.get_data() + + # def _filter_attributes(attributes): + # keys_to_remove = [] + # for attr, array in attributes.items(): + # if not isinstance(array, np.ndarray): + # continue + # if is_nan_or_equivalent(array): + # keys_to_remove.append(attr) + # for key in keys_to_remove: + # del attributes[key] + + # if isinstance(self._data_filter, ComponentAttributeFilterOptions): + # if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: + # for _, attributes in filtered_data.items(): + # _filter_attributes(attributes) + # elif isinstance(self._data_filter, dict): + # for component_type, attributes in filtered_data.items(): + # if component_type in self._data_filter: + # filter_option = self._data_filter[component_type] + # if filter_option is ComponentAttributeFilterOptions.RELEVANT: + # _filter_attributes(attributes) + + # return filtered_data class Serializer(ABC): From 259bea2741a338d37478ada5b73f6a307ab4bca5 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 14:50:01 +0200 Subject: [PATCH 11/14] comment addressed Signed-off-by: Jerry Guo --- src/power_grid_model/core/power_grid_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model/core/power_grid_dataset.py b/src/power_grid_model/core/power_grid_dataset.py index f4d627c68..f5e7a87f3 100644 --- a/src/power_grid_model/core/power_grid_dataset.py +++ b/src/power_grid_model/core/power_grid_dataset.py @@ -514,7 +514,7 @@ def _filter_attributes(self, attributes): def _filter_with_option(self): if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: - for _, attributes in self._data.items(): + for attributes in self._data.values(): self._filter_attributes(attributes) def _filter_with_mapping(self): From c1eca81551eedaa84bb39135aab9305bb9355754 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 14:51:20 +0200 Subject: [PATCH 12/14] remove commented code Signed-off-by: Jerry Guo --- src/power_grid_model/core/serialization.py | 25 ---------------------- 1 file changed, 25 deletions(-) diff --git a/src/power_grid_model/core/serialization.py b/src/power_grid_model/core/serialization.py index 96d4c5803..914c238df 100644 --- a/src/power_grid_model/core/serialization.py +++ b/src/power_grid_model/core/serialization.py @@ -84,31 +84,6 @@ def load(self) -> Dataset: pgc.deserializer_parse_to_buffer(self._deserializer) return self._dataset.get_data() - # filtered_data = self._dataset.get_data() - - # def _filter_attributes(attributes): - # keys_to_remove = [] - # for attr, array in attributes.items(): - # if not isinstance(array, np.ndarray): - # continue - # if is_nan_or_equivalent(array): - # keys_to_remove.append(attr) - # for key in keys_to_remove: - # del attributes[key] - - # if isinstance(self._data_filter, ComponentAttributeFilterOptions): - # if self._data_filter is ComponentAttributeFilterOptions.RELEVANT: - # for _, attributes in filtered_data.items(): - # _filter_attributes(attributes) - # elif isinstance(self._data_filter, dict): - # for component_type, attributes in filtered_data.items(): - # if component_type in self._data_filter: - # filter_option = self._data_filter[component_type] - # if filter_option is ComponentAttributeFilterOptions.RELEVANT: - # _filter_attributes(attributes) - - # return filtered_data - class Serializer(ABC): """ From d20be6f1fc2a0b46ec865e75e01f8a4f1cf123f0 Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 14:54:44 +0200 Subject: [PATCH 13/14] refactor the test Signed-off-by: Jerry Guo --- tests/unit/test_serialization.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index a0eaac8c6..6e7fc680d 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -610,18 +610,24 @@ def assert_serialization_correct(deserialized_dataset: Dataset, serialized_datas ) +def _check_only_relevant_attributes_present(component_values) -> bool: + for array in component_values.values(): + if not isinstance(array, np.ndarray): + continue + if (array.dtype == np.float64 and np.isnan(array).all()) or ( + array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min) + ): + return False + return True + + def assert_deserialization_filtering_correct(deserialized_dataset: Dataset, data_filter) -> bool: if data_filter is ComponentAttributeFilterOptions.ALL: return True if data_filter is ComponentAttributeFilterOptions.RELEVANT: - for _, component_values in deserialized_dataset.items(): - for _, array in component_values.items(): - if not isinstance(array, np.ndarray): - continue - if (array.dtype == np.float64 and np.isnan(array).all()) or ( - array.dtype in (np.int32, np.int8) and np.all(array == np.iinfo(array.dtype).min) - ): - return False + for component_values in deserialized_dataset.values(): + if not _check_only_relevant_attributes_present(component_values): + return False return True From da1a0a260f84c3685a06bc697642294a4a7daaca Mon Sep 17 00:00:00 2001 From: Jerry Guo Date: Wed, 2 Oct 2024 15:43:24 +0200 Subject: [PATCH 14/14] test combined Signed-off-by: Jerry Guo --- tests/unit/test_serialization.py | 50 +++++++------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index 6e7fc680d..9cea64295 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -384,6 +384,15 @@ def serialized_data(request): pytest.param({"node": ["id"], "sym_load": ["id"]}, id="columnar filter"), pytest.param({"node": ["id"], "sym_load": None}, id="mixed columnar/row filter"), pytest.param({"node": ["id"], "shunt": None}, id="unused component filter"), + pytest.param( + { + "node": ["id"], + "line": ComponentAttributeFilterOptions.ALL, + "sym_load": None, + "asym_load": ComponentAttributeFilterOptions.RELEVANT, + }, + id="mixed filter", + ), ] ) def data_filters(request): @@ -744,6 +753,8 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali assert serialized_result_a == serialized_result_b assert list(deserialized_result_b) == list(deserialized_result_a) + assert assert_deserialization_filtering_correct(deserialized_result_a, data_filters) + assert assert_deserialization_filtering_correct(deserialized_result_b, data_filters) for (component_a, component_result_a), component_result_b in zip( deserialized_result_a.items(), deserialized_result_b.values() @@ -776,42 +787,3 @@ def test_serialize_deserialize_double_round_trip(deserialize, serialize, seriali np.testing.assert_array_equal(nan_a, nan_b) np.testing.assert_allclose(field_result_a[~nan_a], field_result_b[~nan_b], rtol=1e-15) - - -@pytest.mark.parametrize( - ("deserialize", "pack", "data_filter"), - ( - pytest.param(json_deserialize, to_json, ComponentAttributeFilterOptions.ALL, id="json.all"), - pytest.param(json_deserialize, to_json, ComponentAttributeFilterOptions.RELEVANT, id="json.relevant"), - pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.ALL, id="msgpack.all"), - pytest.param(msgpack_deserialize, to_msgpack, ComponentAttributeFilterOptions.RELEVANT, id="msgpack.relevant"), - pytest.param( - json_deserialize, - to_json, - { - "node": None, - "line": ComponentAttributeFilterOptions.ALL, - "sym_load": ["id"], - "asym_load": ComponentAttributeFilterOptions.RELEVANT, - }, - id="json.mixed_filter", - ), - pytest.param( - msgpack_deserialize, - to_msgpack, - { - "node": ["id"], - "line": ComponentAttributeFilterOptions.ALL, - "sym_load": None, - "asym_load": ComponentAttributeFilterOptions.RELEVANT, - }, - id="msgpack.mixed_filter", - ), - ), -) -def test_deserialize_data_filter(deserialize, serialized_data, data_filter, pack): - test_data = pack(serialized_data) - - deserialized_result_a = deserialize(test_data, data_filter) - - assert assert_deserialization_filtering_correct(deserialized_result_a, data_filter)