From b5dd69eeeb461469a1b70c8e6bec871f538edf06 Mon Sep 17 00:00:00 2001 From: kingjr Date: Wed, 11 Dec 2024 11:17:42 +0100 Subject: [PATCH 1/7] example sklearn --- docs/infra/example_sklearn.py | 86 +++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/infra/example_sklearn.py diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py new file mode 100644 index 00000000..b94099f8 --- /dev/null +++ b/docs/infra/example_sklearn.py @@ -0,0 +1,86 @@ +""" +A minimalist example with sklearn to show how to develop and explore a model with exca. +""" +import typing as tp +import numpy as np +import pydantic +import sys +import exca +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error + + +class Dataset(pydantic.BaseModel): + n_samples: int = 100 + noise: float = 0.1 + random_state: int = 42 + test_size: float = 0.2 + model_config = pydantic.ConfigDict(extra="forbid") + + def get(self) -> tp.Tuple[np.ndarray]: + # Generate synthetic data + X, y = make_regression( + n_samples=self.n_samples, + noise=self.noise, + random_state=self.random_state + ) + # Split into training and testing datasets + X_train, X_test, y_train, y_test = train_test_split( + X, y, + test_size=self.test_size, + random_state=self.random_state + ) + return X_train, X_test, y_train, y_test + + +class Model(pydantic.BaseModel): + data: Dataset = Dataset() + alpha: float = 1.0 + max_iter: int = 1000 + infra: exca.TaskInfra = exca.TaskInfra(folder='.cache/') + + @infra.apply + def score(self): + # Get data + X_train, X_test, y_train, y_test = self.data.get() + + # Train a Ridge regression model + print('Fit...') + model = Ridge(alpha=self.alpha, max_iter=self.max_iter) + model.fit(X_train, y_train) + + # Evaluate + print('Score...') + y_pred = model.predict(X_test) + mse = mean_squared_error(y_test, y_pred) + return mse + + +def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]: + """ + Parses a list of Bash-style arguments (e.g., --key=value) into a nested dict. + """ + nested_dict = {} + for arg in args: + # Split argument into key and value + key, value = arg.lstrip("--").split("=", 1) + # Convert flat key into a nested dictionary + keys = key.split(".") + current_level = nested_dict + for k in keys[:-1]: + current_level = current_level.setdefault(k, {}) + current_level[keys[-1]] = value + return nested_dict + + +if __name__ == "__main__": + # Validate config + config = args_to_nested_dict(sys.argv[1:]) + model = Model(**config) + print(model.infra.config) + + # Score + mse = model.score() + print(mse) From 53aa337176e92c8791ba4fb827eab0e30f04c997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Tue, 24 Dec 2024 10:45:38 +0100 Subject: [PATCH 2/7] Update example_sklearn.py --- docs/infra/example_sklearn.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py index b94099f8..ac535593 100644 --- a/docs/infra/example_sklearn.py +++ b/docs/infra/example_sklearn.py @@ -1,3 +1,8 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. """ A minimalist example with sklearn to show how to develop and explore a model with exca. """ From 7912ba6ff61e214a26ce09c10e4e433a93cb0702 Mon Sep 17 00:00:00 2001 From: Jeremy Rapin Date: Tue, 24 Dec 2024 11:28:18 +0100 Subject: [PATCH 3/7] Add packages for examples in docs --- .github/workflows/test-type-lint.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-type-lint.yaml b/.github/workflows/test-type-lint.yaml index 51e4241f..629d13c0 100644 --- a/.github/workflows/test-type-lint.yaml +++ b/.github/workflows/test-type-lint.yaml @@ -48,6 +48,7 @@ jobs: run: | source activate ./ci_env pip install -e .[dev] + pip install sklearn lightning # for docs - name: Print installed packages run: | @@ -76,7 +77,7 @@ jobs: sed -i 's/\"auto\"/None/g' README.md # on Mac: sed -i '' 's/cluster: slurm/cluster: null/g' infra/*.md # check readmes - pytest --markdown-docs -m markdown-docs `**/*.md` + pytest --markdown-docs -m markdown-docs . - name: Run basic pylint run: | From 642aeb05e0ff8576c1d4cc5b15b907d1bc565069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Tue, 24 Dec 2024 11:48:44 +0100 Subject: [PATCH 4/7] Update .github/workflows/test-type-lint.yaml --- .github/workflows/test-type-lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-type-lint.yaml b/.github/workflows/test-type-lint.yaml index 629d13c0..cc1ee818 100644 --- a/.github/workflows/test-type-lint.yaml +++ b/.github/workflows/test-type-lint.yaml @@ -48,7 +48,7 @@ jobs: run: | source activate ./ci_env pip install -e .[dev] - pip install sklearn lightning # for docs + pip install scikit-learn lightning # for docs - name: Print installed packages run: | From cd60efe8c9a67977ca03217feac5e82af678a42c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Fri, 21 Mar 2025 14:41:34 +0100 Subject: [PATCH 5/7] Update docs/infra/example_sklearn.py --- docs/infra/example_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py index ac535593..1b364b03 100644 --- a/docs/infra/example_sklearn.py +++ b/docs/infra/example_sklearn.py @@ -82,7 +82,7 @@ def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]: if __name__ == "__main__": # Validate config - config = args_to_nested_dict(sys.argv[1:]) + config = exca.ConfDict.from_args(sys.argv[1:]) model = Model(**config) print(model.infra.config) From c31934694118f230e30f343f946cc2fdacd96560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Fri, 21 Mar 2025 14:41:44 +0100 Subject: [PATCH 6/7] Update docs/infra/example_sklearn.py --- docs/infra/example_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py index 1b364b03..848aa0a8 100644 --- a/docs/infra/example_sklearn.py +++ b/docs/infra/example_sklearn.py @@ -24,7 +24,7 @@ class Dataset(pydantic.BaseModel): test_size: float = 0.2 model_config = pydantic.ConfigDict(extra="forbid") - def get(self) -> tp.Tuple[np.ndarray]: + def get(self) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: # Generate synthetic data X, y = make_regression( n_samples=self.n_samples, From 1777399926ee0c646d1d5a08773162a3a47e1824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Fri, 21 Mar 2025 14:41:56 +0100 Subject: [PATCH 7/7] Update docs/infra/example_sklearn.py --- docs/infra/example_sklearn.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py index 848aa0a8..2b78d9f5 100644 --- a/docs/infra/example_sklearn.py +++ b/docs/infra/example_sklearn.py @@ -63,23 +63,6 @@ def score(self): return mse -def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]: - """ - Parses a list of Bash-style arguments (e.g., --key=value) into a nested dict. - """ - nested_dict = {} - for arg in args: - # Split argument into key and value - key, value = arg.lstrip("--").split("=", 1) - # Convert flat key into a nested dictionary - keys = key.split(".") - current_level = nested_dict - for k in keys[:-1]: - current_level = current_level.setdefault(k, {}) - current_level[keys[-1]] = value - return nested_dict - - if __name__ == "__main__": # Validate config config = exca.ConfDict.from_args(sys.argv[1:])