Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<a><img alt="Coverage" src="https://raw.githubusercontent.com/STRONGAYA/v6-tools-general/main/tests/coverage-badge.svg"></a>
<a href="https://www.python.org/downloads/"><img alt="Python 3.10+" src="https://img.shields.io/badge/python-3.10+-blue.svg"></a>
<a href="https://opensource.org/licenses/Apache-2.0"><img alt="Licence: Apache 2.0" src="https://img.shields.io/badge/Licence-Apache%202.0-blue.svg"></a>
<a href="https://strongaya.eu/wp-content/uploads/2025/07/algorithm_review_guidelines.pdf"><img alt="STRONG AYA Algorithm Guideline Conformity: v1.0.2 Pending" src="https://img.shields.io/badge/STRONG%20AYA%20Algorithm%20Guideline%20Conformity-v1.0.2%20pending-yellow"></a>
<a href="https://strongaya.eu/wp-content/uploads/2025/07/algorithm_review_guidelines.pdf"><img alt="STRONG AYA Algorithm Guideline Conformity: v1.0.3 Pending" src="https://img.shields.io/badge/STRONG%20AYA%20Algorithm%20Guideline%20Conformity-v1.0.3%20pending-yellow"></a>
<br>
<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
<a href="https://flake8.pycqa.org/"><img alt="Linting: flake8" src="https://img.shields.io/badge/linting-flake8-informational"></a>
Expand All @@ -16,7 +16,7 @@

<!--
To show the approved badge instead, use:
<a href="https://strongaya.eu/wp-content/uploads/2025/07/algorithm_review_guidelines.pdf"><img alt="STRONG AYA Algorithm Guideline Conformity: v1.0.2 Approved" src="https://img.shields.io/badge/STRONG%20AYA%20Algorithm%20Guideline%20Conformity-v1.0.2%20approved-brightgreen">
<a href="https://strongaya.eu/wp-content/uploads/2025/07/algorithm_review_guidelines.pdf"><img alt="STRONG AYA Algorithm Guideline Conformity: v1.0.3 Approved" src="https://img.shields.io/badge/STRONG%20AYA%20Algorithm%20Guideline%20Conformity-v1.0.3%20approved-brightgreen">
-->

# Purpose of this repository
Expand Down Expand Up @@ -51,13 +51,13 @@ algorithm.
For the `requirements.txt` file, you can add the following line to the file:

```
git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.2
git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.3
```

For the `setup.py` file, you can add the following line to the `install_requires` list:

```python
"vantage6-strongaya-general @ git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.2",
"vantage6-strongaya-general @ git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.3",
```

The algorithm's `setup.py`, particularly the `install_requirements`, section file should then look something like this:
Expand All @@ -84,7 +84,7 @@ setup(
'vantage6-algorithm-tools',
'numpy',
'pandas',
"vantage6-strongaya-general @ git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.2"
"vantage6-strongaya-general @ git+https://github.com/STRONGAYA/v6-tools-general.git@v1.0.3"
# other dependencies
]
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "vantage6-strongaya-general"
version = "1.0.2"
version = "1.0.3"
description = "A small library with various general tools for Vantage6 algorithms developed for the STRONG AYA Project."
authors = [
{name = "Joshi Hogenboom (@Jhogenboom)", email = "joshi.hogenboom@maastrichtuniversity.nl"},
Expand Down
4 changes: 3 additions & 1 deletion src/vantage6_strongaya_general/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
check_partial_result_presence,
apply_data_stratification,
set_datatypes,
convert_to_json_serialisable,
)

from .privacy_measures import (
Expand All @@ -33,6 +34,7 @@
"check_partial_result_presence",
"apply_data_stratification",
"set_datatypes",
"convert_to_json_serialisable",
"mask_unnecessary_variables",
"apply_sample_size_threshold",
"apply_differential_privacy",
Expand All @@ -41,4 +43,4 @@
"compute_local_general_statistics",
"compute_local_adjusted_deviation",
]
__version__ = "1.0.2"
__version__ = "1.0.3"
43 changes: 38 additions & 5 deletions src/vantage6_strongaya_general/miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import json

import pandas as pd
import numpy as np

from typing import (
Any,
Expand Down Expand Up @@ -77,7 +78,7 @@ def apply_data_stratification(

Caution: The flexibility provided by this function may facilitate differencing attacks if not implemented carefully.
Consider including extra privacy-enhancing mechanisms by applying differential privacy post-stratification
or restricting the variables on which one can stratify -
or restricting the variables on which one can stratify
e.g. through environment variables or a dedicated database and/or list.

Args:
Expand Down Expand Up @@ -431,6 +432,35 @@ def check_partial_result_presence(
)


def convert_to_json_serialisable(obj: Any) -> Any:
"""
Convert numpy/pandas types to native Python types for JSON serialisation.

Args:
obj (Any): The object to convert

Returns:
Any: A JSON-serialisable version of the object
"""
if isinstance(obj, (np.integer, np.int64, np.int32, np.int16, np.int8)):
return int(obj)
elif isinstance(obj, (np.floating, np.float64, np.float32, np.float16)):
return float(obj)
elif isinstance(obj, np.bool_):
return bool(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, pd.Series):
return obj.tolist()
elif isinstance(obj, dict):
return {key: convert_to_json_serialisable(value) for key, value in obj.items()}
elif isinstance(obj, (list, tuple)):
return [convert_to_json_serialisable(item) for item in obj]
elif pd.isna(obj):
return None
return obj


@pd.api.extensions.register_dataframe_accessor("predetermined_info")
class PredeterminedInfoAccessor:
"""
Expand Down Expand Up @@ -458,7 +488,7 @@ def __init__(self, pandas_obj: pd.DataFrame):

def _check_initialized(self) -> None:
"""
Initialize the storage if not already done.
Initialise the storage if not already done.

Ensures the DataFrame has the necessary attribute storage structure.
"""
Expand Down Expand Up @@ -542,6 +572,9 @@ def calculate_for_df(**kwargs: Any) -> Any:

value = safe_calculate(calculate_for_df, default_value=None, **kwargs)

# Convert numpy/pandas types to native Python types
value = convert_to_json_serialisable(value)

try:
json.dumps(value)
except TypeError:
Expand All @@ -566,7 +599,7 @@ def get_stat(self, stat_name: str, column: Optional[str] = None) -> Any:
Any: The stored statistic value

Raises:
KeyError: If statistic not found
KeyError: If the statistic is not found
"""
if stat_name not in self._obj.attrs["stats"]:
safe_log("error", f"Statistic '{stat_name}' not found")
Expand Down Expand Up @@ -599,7 +632,7 @@ def get_column_stats(self, column: str) -> Dict[str, Any]:
Dict[str, Any]: Dictionary containing all statistics for the column

Raises:
KeyError: If column not found in DataFrame
KeyError: If the column is not found in DataFrame
"""
if column not in self._obj.columns:
safe_log("error", f"Column '{column}' not found in DataFrame")
Expand Down Expand Up @@ -646,7 +679,7 @@ def update_stat(self, stat_name: str, **kwargs: Any) -> None:
**kwargs: Arguments to pass to calculation function

Raises:
KeyError: If statistic not found
KeyError: If the statistic is not found
"""
if stat_name not in self._obj.attrs["stats"]:
safe_log(
Expand Down
Loading