From b5dd69eeeb461469a1b70c8e6bec871f538edf06 Mon Sep 17 00:00:00 2001
From: kingjr <jeanremi.king@gmail.com>
Date: Wed, 11 Dec 2024 11:17:42 +0100
Subject: [PATCH 1/7] example sklearn

---
 docs/infra/example_sklearn.py | 86 +++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 docs/infra/example_sklearn.py

diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py
new file mode 100644
index 00000000..b94099f8
--- /dev/null
+++ b/docs/infra/example_sklearn.py
@@ -0,0 +1,86 @@
+"""
+A minimalist example with sklearn to show how to develop and explore a model with exca.
+"""
+import typing as tp
+import numpy as np
+import pydantic
+import sys
+import exca
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import Ridge
+from sklearn.metrics import mean_squared_error
+
+
+class Dataset(pydantic.BaseModel):
+    n_samples: int = 100
+    noise: float = 0.1
+    random_state: int = 42
+    test_size: float = 0.2
+    model_config = pydantic.ConfigDict(extra="forbid")
+
+    def get(self) -> tp.Tuple[np.ndarray]:
+        # Generate synthetic data
+        X, y = make_regression(
+            n_samples=self.n_samples,
+            noise=self.noise,
+            random_state=self.random_state
+        )
+        # Split into training and testing datasets
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, 
+            test_size=self.test_size, 
+            random_state=self.random_state
+        )
+        return X_train, X_test, y_train, y_test
+
+
+class Model(pydantic.BaseModel):
+    data: Dataset = Dataset()
+    alpha: float = 1.0
+    max_iter: int = 1000
+    infra: exca.TaskInfra = exca.TaskInfra(folder='.cache/')
+
+    @infra.apply
+    def score(self):
+        # Get data
+        X_train, X_test, y_train, y_test = self.data.get()
+        
+        # Train a Ridge regression model
+        print('Fit...')
+        model = Ridge(alpha=self.alpha, max_iter=self.max_iter)
+        model.fit(X_train, y_train)
+
+        # Evaluate
+        print('Score...')
+        y_pred = model.predict(X_test)
+        mse = mean_squared_error(y_test, y_pred)
+        return mse
+
+
+def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]:
+    """
+    Parses a list of Bash-style arguments (e.g., --key=value) into a nested dict.
+    """
+    nested_dict = {}
+    for arg in args:
+        # Split argument into key and value
+        key, value = arg.lstrip("--").split("=", 1)
+        # Convert flat key into a nested dictionary
+        keys = key.split(".")
+        current_level = nested_dict
+        for k in keys[:-1]:
+            current_level = current_level.setdefault(k, {})
+        current_level[keys[-1]] = value
+    return nested_dict
+
+
+if __name__ == "__main__":
+    # Validate config
+    config = args_to_nested_dict(sys.argv[1:])
+    model = Model(**config)
+    print(model.infra.config)
+
+    # Score
+    mse = model.score()
+    print(mse)

From 53aa337176e92c8791ba4fb827eab0e30f04c997 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin@meta.com>
Date: Tue, 24 Dec 2024 10:45:38 +0100
Subject: [PATCH 2/7] Update example_sklearn.py

---
 docs/infra/example_sklearn.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py
index b94099f8..ac535593 100644
--- a/docs/infra/example_sklearn.py
+++ b/docs/infra/example_sklearn.py
@@ -1,3 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
 """
 A minimalist example with sklearn to show how to develop and explore a model with exca.
 """

From 7912ba6ff61e214a26ce09c10e4e433a93cb0702 Mon Sep 17 00:00:00 2001
From: Jeremy Rapin <jrapin@meta.com>
Date: Tue, 24 Dec 2024 11:28:18 +0100
Subject: [PATCH 3/7] Add packages for examples in docs

---
 .github/workflows/test-type-lint.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-type-lint.yaml b/.github/workflows/test-type-lint.yaml
index 51e4241f..629d13c0 100644
--- a/.github/workflows/test-type-lint.yaml
+++ b/.github/workflows/test-type-lint.yaml
@@ -48,6 +48,7 @@ jobs:
       run: |
         source activate ./ci_env
         pip install -e .[dev]
+        pip install sklearn lightning  # for docs
 
     - name: Print installed packages
       run: |
@@ -76,7 +77,7 @@ jobs:
         sed -i 's/\"auto\"/None/g' README.md
         # on Mac: sed -i '' 's/cluster: slurm/cluster: null/g' infra/*.md
         # check readmes
-        pytest --markdown-docs -m markdown-docs `**/*.md`
+        pytest --markdown-docs -m markdown-docs .
 
     - name: Run basic pylint
       run: |

From 642aeb05e0ff8576c1d4cc5b15b907d1bc565069 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin@meta.com>
Date: Tue, 24 Dec 2024 11:48:44 +0100
Subject: [PATCH 4/7] Update .github/workflows/test-type-lint.yaml

---
 .github/workflows/test-type-lint.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-type-lint.yaml b/.github/workflows/test-type-lint.yaml
index 629d13c0..cc1ee818 100644
--- a/.github/workflows/test-type-lint.yaml
+++ b/.github/workflows/test-type-lint.yaml
@@ -48,7 +48,7 @@ jobs:
       run: |
         source activate ./ci_env
         pip install -e .[dev]
-        pip install sklearn lightning  # for docs
+        pip install scikit-learn lightning  # for docs
 
     - name: Print installed packages
       run: |

From cd60efe8c9a67977ca03217feac5e82af678a42c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin@meta.com>
Date: Fri, 21 Mar 2025 14:41:34 +0100
Subject: [PATCH 5/7] Update docs/infra/example_sklearn.py

---
 docs/infra/example_sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py
index ac535593..1b364b03 100644
--- a/docs/infra/example_sklearn.py
+++ b/docs/infra/example_sklearn.py
@@ -82,7 +82,7 @@ def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]:
 
 if __name__ == "__main__":
     # Validate config
-    config = args_to_nested_dict(sys.argv[1:])
+    config = exca.ConfDict.from_args(sys.argv[1:])
     model = Model(**config)
     print(model.infra.config)
 

From c31934694118f230e30f343f946cc2fdacd96560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin@meta.com>
Date: Fri, 21 Mar 2025 14:41:44 +0100
Subject: [PATCH 6/7] Update docs/infra/example_sklearn.py

---
 docs/infra/example_sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py
index 1b364b03..848aa0a8 100644
--- a/docs/infra/example_sklearn.py
+++ b/docs/infra/example_sklearn.py
@@ -24,7 +24,7 @@ class Dataset(pydantic.BaseModel):
     test_size: float = 0.2
     model_config = pydantic.ConfigDict(extra="forbid")
 
-    def get(self) -> tp.Tuple[np.ndarray]:
+    def get(self) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         # Generate synthetic data
         X, y = make_regression(
             n_samples=self.n_samples,

From 1777399926ee0c646d1d5a08773162a3a47e1824 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin@meta.com>
Date: Fri, 21 Mar 2025 14:41:56 +0100
Subject: [PATCH 7/7] Update docs/infra/example_sklearn.py

---
 docs/infra/example_sklearn.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/docs/infra/example_sklearn.py b/docs/infra/example_sklearn.py
index 848aa0a8..2b78d9f5 100644
--- a/docs/infra/example_sklearn.py
+++ b/docs/infra/example_sklearn.py
@@ -63,23 +63,6 @@ def score(self):
         return mse
 
 
-def args_to_nested_dict(args: list[str]) -> tp.Dict[str, tp.Any]:
-    """
-    Parses a list of Bash-style arguments (e.g., --key=value) into a nested dict.
-    """
-    nested_dict = {}
-    for arg in args:
-        # Split argument into key and value
-        key, value = arg.lstrip("--").split("=", 1)
-        # Convert flat key into a nested dictionary
-        keys = key.split(".")
-        current_level = nested_dict
-        for k in keys[:-1]:
-            current_level = current_level.setdefault(k, {})
-        current_level[keys[-1]] = value
-    return nested_dict
-
-
 if __name__ == "__main__":
     # Validate config
     config = exca.ConfDict.from_args(sys.argv[1:])