From eeda699c55dfc2f8ea6dbc467e73ea9312bb9d60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 17:33:27 +0200
Subject: [PATCH 1/9] Update build

---
 .local.jenkins.lin.yml                      |  1 -
 appveyor.yml                                |  1 -
 mlinsights/mlmodel/quantile_mlpregressor.py | 38 ++++++---------------
 requirements-win.txt                        | 17 ---------
 requirements.txt                            |  3 +-
 5 files changed, 12 insertions(+), 48 deletions(-)
 delete mode 100644 requirements-win.txt

diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml
index 5a88a30a..403b9d91 100644
--- a/.local.jenkins.lin.yml
+++ b/.local.jenkins.lin.yml
@@ -11,7 +11,6 @@ install:
   - $PYINT -m pip install --upgrade pip
   - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper cpyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
   - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ scikit-learn>=0.22 --extra-index-url=https://pypi.python.org/simple/
-  - $PYINT -m pip install -r requirements-win.txt
   - $PYINT -m pip install -r requirements.txt
   - $PYINT --version
   - $PYINT -m pip freeze
diff --git a/appveyor.yml b/appveyor.yml
index 143ac0b5..7e5afb0c 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -13,7 +13,6 @@ install:
   - "%PYTHON%\\python -m pip install --upgrade pip"
   # for many packages
   - pip install llvmlite numba
-  - "%PYTHON%\\Scripts\\pip install -r requirements-win.txt"
   # install precompiled versions not available on pypi
   - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio"
   # other dependencies
diff --git a/mlinsights/mlmodel/quantile_mlpregressor.py b/mlinsights/mlmodel/quantile_mlpregressor.py
index ddc96de9..c0d4f790 100644
--- a/mlinsights/mlmodel/quantile_mlpregressor.py
+++ b/mlinsights/mlmodel/quantile_mlpregressor.py
@@ -146,36 +146,20 @@ def _backprop(self, X, y, activations, deltas, coef_grads,
         # due to the modification of the loss function.
         deltas[last] = self._modify_loss_derivatives(deltas[last])
 
+        # recent version of scikit-learn
         # Compute gradient for the last layer
-        temp = self._compute_loss_grad(  # pylint: disable=E1111
+        self._compute_loss_grad(
             last, n_samples, activations, deltas, coef_grads, intercept_grads)
-        if temp is None:
-            # recent version of scikit-learn
-            # Compute gradient for the last layer
+
+        inplace_derivative = DERIVATIVES[self.activation]
+        # Iterate over the hidden layers
+        for i in range(self.n_layers_ - 2, 0, -1):
+            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
+            inplace_derivative(activations[i], deltas[i - 1])
+
             self._compute_loss_grad(
-                last, n_samples, activations, deltas, coef_grads, intercept_grads)
-
-            inplace_derivative = DERIVATIVES[self.activation]
-            # Iterate over the hidden layers
-            for i in range(self.n_layers_ - 2, 0, -1):
-                deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
-                inplace_derivative(activations[i], deltas[i - 1])
-
-                self._compute_loss_grad(
-                    i - 1, n_samples, activations, deltas, coef_grads,
-                    intercept_grads)
-        else:  # pragma: no cover
-            coef_grads, intercept_grads = temp  # pylint: disable=E0633
-
-            # Iterate over the hidden layers
-            for i in range(self.n_layers_ - 2, 0, -1):
-                deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
-                inplace_derivative = DERIVATIVES[self.activation]
-                inplace_derivative(activations[i], deltas[i - 1])
-
-                coef_grads, intercept_grads = self._compute_loss_grad(  # pylint: disable=E1111,E0633
-                    i - 1, n_samples, activations, deltas, coef_grads,
-                    intercept_grads)
+                i - 1, n_samples, activations, deltas, coef_grads,
+                intercept_grads)
 
         return loss, coef_grads, intercept_grads
 
diff --git a/requirements-win.txt b/requirements-win.txt
deleted file mode 100644
index bbed5e0d..00000000
--- a/requirements-win.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-astroid
-ijson
-importlib_metadata
-ipython
-isort
-joblib
-jupyter
-matplotlib
-nbformat
-numpy
-pandas
-psutil
-pylint>=2.14.0
-pymyinstall
-pyshp
-scikit-learn>=1.0
-threadpoolctl
diff --git a/requirements.txt b/requirements.txt
index 9580f4b0..edf1da06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,8 +25,7 @@ scikit-learn>=1.0
 scipy
 seaborn
 skl2onnx
-sphinx>=3.0
-sphinxcontrib.imagesvg
+sphinx>=5.0
 sphinx_gallery
 tqdm
 wheel

From 315346f17ee79494f969ea31eef48e12f62bc9e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 17:41:44 +0200
Subject: [PATCH 2/9] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index edf1da06..58e344a2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,7 @@ chardet
 coverage
 cpyquickhelper>=0.3
 cython
+ipython
 joblib
 jupyter_sphinx>=0.2
 jyquickhelper

From 7fbbd3fd0c6256e4290aef34643628b03019fdc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 17:53:01 +0200
Subject: [PATCH 3/9] first sketch of linkedmlpregressor

---
 .../ut_mlmodel/test_linked_mlpregression.py   |  70 +++++
 mlinsights/mlmodel/__init__.py                |   1 +
 mlinsights/mlmodel/linked_mlpregressor.py     | 254 ++++++++++++++++++
 requirements.txt                              |   1 +
 4 files changed, 326 insertions(+)
 create mode 100644 _unittests/ut_mlmodel/test_linked_mlpregression.py
 create mode 100644 mlinsights/mlmodel/linked_mlpregressor.py

diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py
new file mode 100644
index 00000000..616a6e03
--- /dev/null
+++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=2s)
+"""
+import unittest
+import numpy
+from numpy.random import random
+import pandas
+from sklearn.neural_network import MLPRegressor
+from sklearn.metrics import mean_absolute_error
+from sklearn.exceptions import ConvergenceWarning
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlinsights.mlmodel import LinkedMLPRegressor
+from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv
+
+
+class TestLinkedMLPRegression(ExtTestCase):
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_quantile_regression_diff(self):
+        X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
+        Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,))
+        clq.fit(X, Y)
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_quantile_regression_pickle(self):
+        X = random(100)
+        eps1 = (random(90) - 0.5) * 0.1
+        eps2 = random(10) * 2
+        eps = numpy.hstack([eps1, eps2])
+        X = X.reshape((100, 1))  # pylint: disable=E1101
+        Y = X.ravel() * 3.4 + 5.6 + eps
+        test_sklearn_pickle(lambda: MLPRegressor(
+            hidden_layer_sizes=(3,)), X, Y)
+        test_sklearn_pickle(lambda: LinkedMLPRegressor(
+            hidden_layer_sizes=(3,)), X, Y)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_quantile_regression_clone(self):
+        test_sklearn_clone(lambda: LinkedMLPRegressor())
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_quantile_regression_grid_search(self):
+        X = random(100)
+        eps1 = (random(90) - 0.5) * 0.1
+        eps2 = random(10) * 2
+        eps = numpy.hstack([eps1, eps2])
+        X = X.reshape((100, 1))  # pylint: disable=E1101
+        Y = X.ravel() * 3.4 + 5.6 + eps
+        self.assertRaise(lambda: test_sklearn_grid_search_cv(
+            lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y), ValueError)
+        res = test_sklearn_grid_search_cv(lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)),
+                                          X, Y, learning_rate_init=[0.001, 0.0001])
+        self.assertIn('model', res)
+        self.assertIn('score', res)
+        self.assertGreater(res['score'], 0)
+        self.assertLesser(res['score'], 11)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mlinsights/mlmodel/__init__.py b/mlinsights/mlmodel/__init__.py
index df30feb0..5108611f 100644
--- a/mlinsights/mlmodel/__init__.py
+++ b/mlinsights/mlmodel/__init__.py
@@ -8,6 +8,7 @@
 from .decision_tree_logreg import DecisionTreeLogisticRegression
 from .extended_features import ExtendedFeatures
 from .interval_regressor import IntervalRegressor
+from .linked_mlpregressor import LinkedMLPRegressor
 from .kmeans_constraint import ConstraintKMeans
 from .kmeans_l1 import KMeansL1L2
 from .ml_featurizer import model_featurizer
diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py
new file mode 100644
index 00000000..578d8ad9
--- /dev/null
+++ b/mlinsights/mlmodel/linked_mlpregressor.py
@@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+"""
+@file
+@brief Implements a quantile non-linear regression.
+"""
+import inspect
+import numpy as np
+from sklearn.base import RegressorMixin
+from sklearn.utils import check_X_y, column_or_1d
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.extmath import safe_sparse_dot
+from sklearn.neural_network._base import DERIVATIVES, LOSS_FUNCTIONS
+from sklearn.neural_network import MLPRegressor
+
+
+class LinkedMLPBase:
+
+    def _backprop(self, X, y, activations, deltas, coef_grads,
+                  intercept_grads):
+        """
+        Computes the MLP loss function and its corresponding derivatives
+        with respect to each parameter: weights and bias vectors.
+
+        :param X: {array-like, sparse matrix}, shape (n_samples, n_features)
+            The input data.
+        :param y: array-like, shape (n_samples,)
+            The target values.
+        :param activations: list, length = n_layers - 1
+             The ith element of the list holds the values of the ith layer.
+        :param deltas: list, length = n_layers - 1
+            The ith element of the list holds the difference between the
+            activations of the i + 1 layer and the backpropagated error.
+            More specifically, deltas are gradients of loss with respect to z
+            in each layer, where z = wx + b is the value of a particular layer
+            before passing through the activation function
+        :param coef_grads: list, length = n_layers - 1
+            The ith element contains the amount of change used to update the
+            coefficient parameters of the ith layer in an iteration.
+        :param intercept_grads: list, length = n_layers - 1
+            The ith element contains the amount of change used to update the
+            intercept parameters of the ith layer in an iteration.
+        :return: loss, float
+        :return: coef_grads, list, length = n_layers - 1
+        :return: intercept_grads, list, length = n_layers - 1
+        """
+        stop
+        n_samples = X.shape[0]
+
+        # Forward propagate
+        activations = self._forward_pass(activations)
+
+        # Get loss
+        loss_func_name = self.loss
+        if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic':
+            loss_func_name = 'binary_log_loss'
+        loss_function = self._get_loss_function(loss_func_name)
+        loss = loss_function(y, activations[-1])
+        # Add L2 regularization term to loss
+        values = np.sum(
+            np.array([np.dot(s.ravel(), s.ravel()) for s in self.coefs_]))
+        loss += (0.5 * self.alpha) * values / n_samples
+
+        # Backward propagate
+        last = self.n_layers_ - 2
+
+        # The calculation of delta[last] here works with following
+        # combinations of output activation and loss function:
+        # sigmoid and binary cross entropy, softmax and categorical cross
+        # entropy, and identity with squared loss
+        deltas[last] = activations[-1] - y
+
+        # We insert the following modification to modify the gradient
+        # due to the modification of the loss function.
+        deltas[last] = self._modify_loss_derivatives(deltas[last])
+
+        # recent version of scikit-learn
+        # Compute gradient for the last layer
+        self._compute_loss_grad(
+            last, n_samples, activations, deltas, coef_grads, intercept_grads)
+
+        inplace_derivative = DERIVATIVES[self.activation]
+        # Iterate over the hidden layers
+        for i in range(self.n_layers_ - 2, 0, -1):
+            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
+            inplace_derivative(activations[i], deltas[i - 1])
+
+            self._compute_loss_grad(
+                i - 1, n_samples, activations, deltas, coef_grads,
+                intercept_grads)
+
+        return loss, coef_grads, intercept_grads
+
+
+class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase):
+    """
+    Quantile MLP Regression or neural networks regression
+    trained with norm :epkg:`L1`. This class inherits from
+    :epkg:`sklearn:neural_networks:MLPRegressor`.
+    This model optimizes the absolute-loss using LBFGS or stochastic gradient
+    descent. See @see cl CustomizedMultilayerPerceptron and
+    @see fn absolute_loss.
+
+    :param hidden_layer_sizes: tuple, length = n_layers - 2, default (100,)
+        The ith element represents the number of neurons in the ith
+        hidden layer.
+    :param activation: {'identity', 'logistic', 'tanh', 'relu'}, default 'relu'
+        Activation function for the hidden layer.
+        - 'identity', no-op activation, useful to implement linear bottleneck,
+          returns :math:`f(x) = x`
+        - 'logistic', the logistic sigmoid function,
+          returns :math:`f(x) = 1 / (1 + exp(-x))`.
+        - 'tanh', the hyperbolic tan function,
+          returns :math:`f(x) = tanh(x)`.
+        - 'relu', the rectified linear unit function,
+          returns :math:`f(x) = \\max(0, x)`.
+    :param solver: ``{'lbfgs', 'sgd', 'adam'}``, default 'adam'
+        The solver for weight optimization.
+        - *'lbfgs'* is an optimizer in the family of quasi-Newton methods.
+        - *'sgd'* refers to stochastic gradient descent.
+        - *'adam'* refers to a stochastic gradient-based optimizer proposed by
+          Kingma, Diederik, and Jimmy Ba
+        Note: The default solver 'adam' works pretty well on relatively
+        large datasets (with thousands of training samples or more) in terms of
+        both training time and validation score.
+        For small datasets, however, 'lbfgs' can converge faster and perform
+        better.
+    :param alpha: float, optional, default 0.0001
+        :epkg:`L2` penalty (regularization term) parameter.
+    :param batch_size: int, optional, default 'auto'
+        Size of minibatches for stochastic optimizers.
+        If the solver is 'lbfgs', the classifier will not use minibatch.
+        When set to "auto", `batch_size=min(200, n_samples)`
+    :param learning_rate: {'constant', 'invscaling', 'adaptive'}, default 'constant'
+        Learning rate schedule for weight updates.
+        - 'constant' is a constant learning rate given by
+          'learning_rate_init'.
+        - 'invscaling' gradually decreases the learning rate ``learning_rate_``
+          at each time step 't' using an inverse scaling exponent of 'power_t'.
+          effective_learning_rate = learning_rate_init / pow(t, power_t)
+        - 'adaptive' keeps the learning rate constant to
+          'learning_rate_init' as long as training loss keeps decreasing.
+          Each time two consecutive epochs fail to decrease training loss by at
+          least tol, or fail to increase validation score by at least tol if
+          'early_stopping' is on, the current learning rate is divided by 5.
+        Only used when solver='sgd'.
+    :param learning_rate_init: double, optional, default 0.001
+        The initial learning rate used. It controls the step-size
+        in updating the weights. Only used when solver='sgd' or 'adam'.
+    :param power_t: double, optional, default 0.5
+        The exponent for inverse scaling learning rate.
+        It is used in updating effective learning rate when the learning_rate
+        is set to 'invscaling'. Only used when solver='sgd'.
+    :param max_iter: int, optional, default 200
+        Maximum number of iterations. The solver iterates until convergence
+        (determined by 'tol') or this number of iterations. For stochastic
+        solvers ('sgd', 'adam'), note that this determines the number of epochs
+        (how many times each data point will be used), not the number of
+        gradient steps.
+    :param shuffle: bool, optional, default True
+        Whether to shuffle samples in each iteration. Only used when
+        solver='sgd' or 'adam'.
+    :param random_state: int, RandomState instance or None, optional, default None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+    :param tol: float, optional, default 1e-4
+        Tolerance for the optimization. When the loss or score is not improving
+        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,
+        unless ``learning_rate`` is set to 'adaptive', convergence is
+        considered to be reached and training stops.
+    :param verbose: bool, optional, default False
+        Whether to print progress messages to stdout.
+    :param warm_start: bool, optional, default False
+        When set to True, reuse the solution of the previous
+        call to fit as initialization, otherwise, just erase the
+        previous solution. See :term:`the Glossary <warm_start>`.
+    :param momentum: float, default 0.9
+        Momentum for gradient descent update.  Should be between 0 and 1. Only
+        used when solver='sgd'.
+    :param nesterovs_momentum: boolean, default True
+        Whether to use Nesterov's momentum. Only used when solver='sgd' and
+        momentum > 0.
+    :param early_stopping: bool, default False
+        Whether to use early stopping to terminate training when validation
+        score is not improving. If set to true, it will automatically set
+        aside 10% of training data as validation and terminate training when
+        validation score is not improving by at least ``tol`` for
+        ``n_iter_no_change`` consecutive epochs.
+        Only effective when solver='sgd' or 'adam'
+    :param validation_fraction: float, optional, default 0.1
+        The proportion of training data to set aside as validation set for
+        early stopping. Must be between 0 and 1.
+        Only used if early_stopping is True
+    :param beta_1: float, optional, default 0.9
+        Exponential decay rate for estimates of first moment vector in adam,
+        should be in [0, 1). Only used when solver='adam'
+    :param beta_2: float, optional, default 0.999
+        Exponential decay rate for estimates of second moment vector in adam,
+        should be in [0, 1). Only used when solver='adam'
+    :param epsilon: float, optional, default 1e-8
+        Value for numerical stability in adam. Only used when solver='adam'
+    :param n_iter_no_change: int, optional, default 10
+        Maximum number of epochs to not meet ``tol`` improvement.
+        Only effective when solver='sgd' or 'adam'
+
+    Fitted attributes:
+
+    * `loss_`: float
+        The current loss computed with the loss function.
+    * `coefs_`: list, length n_layers - 1
+        The ith element in the list represents the weight matrix corresponding
+        to layer i.
+    * `intercepts_`: list, length n_layers - 1
+        The ith element in the list represents the bias vector corresponding to
+        layer i + 1.
+    * `n_iter_`: int,
+        The number of iterations the solver has ran.
+    * `n_layers_`: int
+        Number of layers.
+    * `n_outputs_`: int
+        Number of outputs.
+    * `out_activation_`: string
+        Name of the output activation function.
+    """
+
+    def __init__(self,
+                 hidden_layer_sizes=(100,), activation="relu",
+                 solver='sgd', alpha=0.0001,
+                 batch_size='auto', learning_rate="constant",
+                 learning_rate_init=0.001,
+                 power_t=0.5, max_iter=200, shuffle=True,
+                 random_state=None, tol=1e-4,
+                 verbose=False, warm_start=False, momentum=0.9,
+                 nesterovs_momentum=True, early_stopping=False,
+                 validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
+                 epsilon=1e-8, n_iter_no_change=10,
+                 max_fun=15000):
+        """
+        See :epkg:`sklearn:neural_networks:MLPRegressor`
+        """
+        sup = super(LinkedMLPRegressor, self)  # pylint: disable=R1725
+        sup.__init__(hidden_layer_sizes=hidden_layer_sizes,
+                     activation=activation, solver=solver, alpha=alpha,
+                     batch_size=batch_size, learning_rate=learning_rate,
+                     learning_rate_init=learning_rate_init, power_t=power_t,
+                     max_iter=max_iter, shuffle=shuffle,
+                     random_state=random_state, tol=tol, verbose=verbose,
+                     warm_start=warm_start, momentum=momentum,
+                     nesterovs_momentum=nesterovs_momentum,
+                     early_stopping=early_stopping,
+                     validation_fraction=validation_fraction,
+                     beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
+                     n_iter_no_change=n_iter_no_change, max_fun=max_fun)
diff --git a/requirements.txt b/requirements.txt
index 58e344a2..52bf8032 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,4 +29,5 @@ skl2onnx
 sphinx>=5.0
 sphinx_gallery
 tqdm
+traitlets
 wheel

From 2a1928460217af9b55bcad227bfb92b6b709ee37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 19:56:22 +0200
Subject: [PATCH 4/9] implements links

---
 .../ut_mlmodel/test_linked_mlpregression.py   |  52 +++++-
 mlinsights/mlmodel/linked_mlpregressor.py     | 169 ++++++++++--------
 2 files changed, 139 insertions(+), 82 deletions(-)

diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py
index 616a6e03..c09b0a1f 100644
--- a/_unittests/ut_mlmodel/test_linked_mlpregression.py
+++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py
@@ -17,7 +17,7 @@
 class TestLinkedMLPRegression(ExtTestCase):
 
     @ignore_warnings(ConvergenceWarning)
-    def test_quantile_regression_diff(self):
+    def test_regression_diff(self):
         X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
         Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
         clr = MLPRegressor(hidden_layer_sizes=(3,))
@@ -32,7 +32,48 @@ def test_quantile_regression_diff(self):
         self.assertLesser(err2, 5)
 
     @ignore_warnings(ConvergenceWarning)
-    def test_quantile_regression_pickle(self):
+    def test_regression_linked_int(self):
+        X = numpy.array([[0.1, 0.11], [0.2, 0.21], [0.3, 0.31],
+                         [0.4, 0.41], [0.5, 0.51]])
+        Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=2)
+        clq.fit(X, Y)
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_linked(self):
+        linked = [((0, 'c', 1, 2), (0, 'i', 0)),
+                  ((1, 'c', 0, 0), (1, 'c', 2, 0)),
+                  ((0, 'c', 1, 1), (0, 'c', 0, 2)),
+                  ((0, 'i', 2), (0, 'c', 0, 0)),
+                  ((1, 'i', 0), (1, 'c', 1, 0)),
+                  ((0, 'i', 1), (0, 'c', 0, 1))]
+        X = numpy.array([[0.1, 0.11], [0.2, 0.21], [0.3, 0.31],
+                         [0.4, 0.41], [0.5, 0.51]])
+        Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
+        clr = MLPRegressor(hidden_layer_sizes=(3,))
+        clr.fit(X, Y)
+        clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=linked)
+        clq.fit(X, Y)
+        self.assertEqual(clq.linked_, linked)
+        self.assertEqual(clq.coefs_[0][1, 2], clq.intercepts_[0][0])
+        self.assertEqual(clq.coefs_[1][0, 0], clq.coefs_[1][2, 0])
+        self.assertGreater(clr.n_iter_, 10)
+        self.assertGreater(clq.n_iter_, 10)
+        err1 = mean_absolute_error(Y, clr.predict(X))
+        err2 = mean_absolute_error(Y, clq.predict(X))
+        self.assertLesser(err1, 5)
+        self.assertLesser(err2, 5)
+
+    @ignore_warnings(ConvergenceWarning)
+    def test_regression_pickle(self):
         X = random(100)
         eps1 = (random(90) - 0.5) * 0.1
         eps2 = random(10) * 2
@@ -45,11 +86,11 @@ def test_quantile_regression_pickle(self):
             hidden_layer_sizes=(3,)), X, Y)
 
     @ignore_warnings(ConvergenceWarning)
-    def test_quantile_regression_clone(self):
+    def test_regression_clone(self):
         test_sklearn_clone(lambda: LinkedMLPRegressor())
 
     @ignore_warnings(ConvergenceWarning)
-    def test_quantile_regression_grid_search(self):
+    def test_regression_grid_search(self):
         X = random(100)
         eps1 = (random(90) - 0.5) * 0.1
         eps2 = random(10) * 2
@@ -62,9 +103,10 @@ def test_quantile_regression_grid_search(self):
                                           X, Y, learning_rate_init=[0.001, 0.0001])
         self.assertIn('model', res)
         self.assertIn('score', res)
-        self.assertGreater(res['score'], 0)
+        self.assertGreater(res['score'], -1)
         self.assertLesser(res['score'], 11)
 
 
 if __name__ == "__main__":
+    # TestLinkedMLPRegression().test_regression_linked()
     unittest.main()
diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py
index 578d8ad9..0f0d0975 100644
--- a/mlinsights/mlmodel/linked_mlpregressor.py
+++ b/mlinsights/mlmodel/linked_mlpregressor.py
@@ -4,6 +4,7 @@
 @brief Implements a quantile non-linear regression.
 """
 import inspect
+import random
 import numpy as np
 from sklearn.base import RegressorMixin
 from sklearn.utils import check_X_y, column_or_1d
@@ -15,90 +16,101 @@
 
 class LinkedMLPBase:
 
-    def _backprop(self, X, y, activations, deltas, coef_grads,
-                  intercept_grads):
-        """
-        Computes the MLP loss function and its corresponding derivatives
-        with respect to each parameter: weights and bias vectors.
-
-        :param X: {array-like, sparse matrix}, shape (n_samples, n_features)
-            The input data.
-        :param y: array-like, shape (n_samples,)
-            The target values.
-        :param activations: list, length = n_layers - 1
-             The ith element of the list holds the values of the ith layer.
-        :param deltas: list, length = n_layers - 1
-            The ith element of the list holds the difference between the
-            activations of the i + 1 layer and the backpropagated error.
-            More specifically, deltas are gradients of loss with respect to z
-            in each layer, where z = wx + b is the value of a particular layer
-            before passing through the activation function
-        :param coef_grads: list, length = n_layers - 1
-            The ith element contains the amount of change used to update the
-            coefficient parameters of the ith layer in an iteration.
-        :param intercept_grads: list, length = n_layers - 1
-            The ith element contains the amount of change used to update the
-            intercept parameters of the ith layer in an iteration.
-        :return: loss, float
-        :return: coef_grads, list, length = n_layers - 1
-        :return: intercept_grads, list, length = n_layers - 1
-        """
-        stop
-        n_samples = X.shape[0]
-
-        # Forward propagate
-        activations = self._forward_pass(activations)
+    def _initialize(self, y, layer_units, dtype):
+        super()._initialize(y, layer_units, dtype)
+        if hasattr(self, "linked_"):
+            return
+        if self.linked is None:
+            self.linked_ = None
+            return
+        if isinstance(self.linked, int):
 
-        # Get loss
-        loss_func_name = self.loss
-        if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic':
-            loss_func_name = 'binary_log_loss'
-        loss_function = self._get_loss_function(loss_func_name)
-        loss = loss_function(y, activations[-1])
-        # Add L2 regularization term to loss
-        values = np.sum(
-            np.array([np.dot(s.ravel(), s.ravel()) for s in self.coefs_]))
-        loss += (0.5 * self.alpha) * values / n_samples
+            def _get_random(layer, selected, n_sel):
+                indices = []
+                c = self.coefs_[layer]
+                for i in range(c.shape[0]):
+                    for j in range(c.shape[1]):
+                        key = layer, "c", i, j
+                        if key in selected:
+                            continue
+                        indices.append(key)
+                c = self.intercepts_[layer]
+                for i in range(c.shape[0]):
+                    key = layer, "i", i
+                    if key in selected:
+                        continue
+                    indices.append(key)
 
-        # Backward propagate
-        last = self.n_layers_ - 2
+                random.shuffle(indices)
+                inds = []
+                pos = 0
+                nis = set()
+                while len(inds) < n_sel and pos < len(indices):
+                    ind = indices[pos]
+                    if ind[2] in nis:
+                        pos += 1
+                        continue
+                    inds.append(pos)
+                    nis.add(ind[2])
+                    pos += 1
+                return tuple(indices[p] for p in inds)
 
-        # The calculation of delta[last] here works with following
-        # combinations of output activation and loss function:
-        # sigmoid and binary cross entropy, softmax and categorical cross
-        # entropy, and identity with squared loss
-        deltas[last] = activations[-1] - y
+            n_coefs = sum([c.size for c in self.coefs_] +
+                          [c.size for c in self.intercepts_])
+            linked = []
+            selected = set()
+            unchanged = 0
+            while len(linked) < n_coefs and unchanged < 10:
+                layer = random.randint(0, len(self.coefs_) - 1)
+                inds = _get_random(layer, selected, self.linked)
+                if len(inds) <= 1:
+                    unchanged += 1
+                    continue
+                unchanged = 0
+                for i in inds:
+                    selected.add(i)
+                linked.append(inds)
+            self.linked_ = linked
+            self._fix_links(self.coefs_, self.intercepts_)
+        elif isinstance(self.linked, list):
+            self.linked_ = self.linked
+            self._fix_links(self.coefs_, self.intercepts_)
+        else:
+            raise TypeError(f"Unexpected type for linked {type(self.linked)}.")
 
-        # We insert the following modification to modify the gradient
-        # due to the modification of the loss function.
-        deltas[last] = self._modify_loss_derivatives(deltas[last])
+    def _fix_links(self, coefs, intercepts):
+        if self.linked_ is None:
+            return
+        for links in self.linked_:
+            if len(links) <= 1:
+                raise RuntimeError(f"Unexpected value for link {links}.")
+            total = 0
+            for key in links:
+                if key[1] == "c":
+                    v = coefs[key[0]][key[2:]]
+                else:
+                    v = intercepts[key[0]][key[2]]
+                total += v
+            total /= len(links)
+            for key in links:
+                if key[1] == "c":
+                    coefs[key[0]][key[2:]] = total
+                else:
+                    intercepts[key[0]][key[2]] = total
 
-        # recent version of scikit-learn
-        # Compute gradient for the last layer
-        self._compute_loss_grad(
-            last, n_samples, activations, deltas, coef_grads, intercept_grads)
-
-        inplace_derivative = DERIVATIVES[self.activation]
-        # Iterate over the hidden layers
-        for i in range(self.n_layers_ - 2, 0, -1):
-            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)
-            inplace_derivative(activations[i], deltas[i - 1])
-
-            self._compute_loss_grad(
-                i - 1, n_samples, activations, deltas, coef_grads,
-                intercept_grads)
-
-        return loss, coef_grads, intercept_grads
+    def _backprop(self, X, y, activations, deltas, coef_grads,
+                  intercept_grads):
+        batch_loss, coef_grads, intercept_grads = super()._backprop(
+            X, y, activations, deltas, coef_grads, intercept_grads)
+        self._fix_links(coef_grads, intercept_grads)
+        return batch_loss, coef_grads, intercept_grads
 
 
-class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase):
+class LinkedMLPRegressor(LinkedMLPBase, MLPRegressor):
     """
-    Quantile MLP Regression or neural networks regression
-    trained with norm :epkg:`L1`. This class inherits from
-    :epkg:`sklearn:neural_networks:MLPRegressor`.
-    This model optimizes the absolute-loss using LBFGS or stochastic gradient
-    descent. See @see cl CustomizedMultilayerPerceptron and
-    @see fn absolute_loss.
+    A neural networks regression for which a subset a coefficients
+    share the same value. In practice, it should make the training
+    more stable. See parameter *linked*.
 
     :param hidden_layer_sizes: tuple, length = n_layers - 2, default (100,)
         The ith element represents the number of neurons in the ith
@@ -203,6 +215,8 @@ class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase):
     :param n_iter_no_change: int, optional, default 10
         Maximum number of epochs to not meet ``tol`` improvement.
         Only effective when solver='sgd' or 'adam'
+    :param linked: can be a float to defined the ratio of linked coefficients,
+        or list of set of indices
 
     Fitted attributes:
 
@@ -235,7 +249,7 @@ def __init__(self,
                  nesterovs_momentum=True, early_stopping=False,
                  validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
                  epsilon=1e-8, n_iter_no_change=10,
-                 max_fun=15000):
+                 max_fun=15000, linked=None):
         """
         See :epkg:`sklearn:neural_networks:MLPRegressor`
         """
@@ -252,3 +266,4 @@ def __init__(self,
                      validation_fraction=validation_fraction,
                      beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
                      n_iter_no_change=n_iter_no_change, max_fun=max_fun)
+        self.linked = linked

From 1a5b8c44297edc6cfddabb134596ca9277e5b925 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 20:09:33 +0200
Subject: [PATCH 5/9] lint

---
 appveyor.yml                              | 2 +-
 mlinsights/mlmodel/linked_mlpregressor.py | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 7e5afb0c..b96b1d17 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -16,7 +16,7 @@ install:
   # install precompiled versions not available on pypi
   - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio"
   # other dependencies
-  - "%PYTHON%\\Scripts\\pip install -r requirements.txt --no-deps"
+  - "%PYTHON%\\Scripts\\pip install -r requirements.txt"
   - "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%"
 build: off
 
diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py
index 0f0d0975..3f1d7183 100644
--- a/mlinsights/mlmodel/linked_mlpregressor.py
+++ b/mlinsights/mlmodel/linked_mlpregressor.py
@@ -1,16 +1,10 @@
 # -*- coding: utf-8 -*-
+# pylint: disable=E1101
 """
 @file
 @brief Implements a quantile non-linear regression.
 """
-import inspect
 import random
-import numpy as np
-from sklearn.base import RegressorMixin
-from sklearn.utils import check_X_y, column_or_1d
-from sklearn.utils.validation import check_is_fitted
-from sklearn.utils.extmath import safe_sparse_dot
-from sklearn.neural_network._base import DERIVATIVES, LOSS_FUNCTIONS
 from sklearn.neural_network import MLPRegressor
 
 

From 79eafc33a9b2f46f2bd1a543f02c5444d1b12c56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 16 Oct 2022 20:22:15 +0200
Subject: [PATCH 6/9] Update linked_mlpregressor.py

---
 mlinsights/mlmodel/linked_mlpregressor.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py
index 3f1d7183..f90f87dc 100644
--- a/mlinsights/mlmodel/linked_mlpregressor.py
+++ b/mlinsights/mlmodel/linked_mlpregressor.py
@@ -9,6 +9,10 @@
 
 
 class LinkedMLPBase:
+    """
+    Overloads methods from :epkg:`sklearn:neural_networks:MLPRegressor`
+    and insert the logic to train linked coefficients.
+    """
 
     def _initialize(self, y, layer_units, dtype):
         super()._initialize(y, layer_units, dtype)

From 66b7425069221be372e23551fac0ea083004d74c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Mon, 17 Oct 2022 08:52:18 +0200
Subject: [PATCH 7/9] Update test_linked_mlpregression.py

---
 _unittests/ut_mlmodel/test_linked_mlpregression.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py
index c09b0a1f..2fc4c063 100644
--- a/_unittests/ut_mlmodel/test_linked_mlpregression.py
+++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py
@@ -5,7 +5,6 @@
 import unittest
 import numpy
 from numpy.random import random
-import pandas
 from sklearn.neural_network import MLPRegressor
 from sklearn.metrics import mean_absolute_error
 from sklearn.exceptions import ConvergenceWarning

From 69ca553569e0d3903648bbe027b888bd902e91fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Fri, 9 Dec 2022 00:33:41 +0100
Subject: [PATCH 8/9] documentation

---
 _doc/sphinxdoc/source/index.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_doc/sphinxdoc/source/index.rst b/_doc/sphinxdoc/source/index.rst
index f26e005a..c5b0bf9e 100644
--- a/_doc/sphinxdoc/source/index.rst
+++ b/_doc/sphinxdoc/source/index.rst
@@ -86,11 +86,11 @@ Short example:
     :showcode:
     :warningout: FutureWarning
 
-    from sklearn.datasets import load_boston
+    from sklearn.datasets import load_diabetes
     from sklearn.linear_model import LinearRegression
     from mlinsights.mlmodel import QuantileLinearRegression
 
-    data = load_boston()
+    data = load_diabetes()
     X, y = data.data, data.target
 
     clq = QuantileLinearRegression()

From 97ad740877efef9cb06395dd4f12bf447547d24e Mon Sep 17 00:00:00 2001
From: Xavier Dupre <xadupre@microsoft.com>
Date: Mon, 2 Oct 2023 11:30:36 +0200
Subject: [PATCH 9/9] fix unit test

---
 .../ut_mlmodel/test_linked_mlpregression.py   | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py
index adbe9507..e3dd4cc5 100644
--- a/_unittests/ut_mlmodel/test_linked_mlpregression.py
+++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py
@@ -8,12 +8,12 @@
 from sklearn.neural_network import MLPRegressor
 from sklearn.metrics import mean_absolute_error
 from sklearn.exceptions import ConvergenceWarning
-from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlinsights.ext_test_case import ExtTestCase, ignore_warnings
 from mlinsights.mlmodel import LinkedMLPRegressor
 from mlinsights.mlmodel import (
-    test_sklearn_pickle,
-    test_sklearn_clone,
-    test_sklearn_grid_search_cv,
+    run_test_sklearn_pickle,
+    run_test_sklearn_clone,
+    run_test_sklearn_grid_search_cv,
 )
 
 
@@ -86,12 +86,14 @@ def test_regression_pickle(self):
         eps = numpy.hstack([eps1, eps2])
         X = X.reshape((100, 1))  # pylint: disable=E1101
         Y = X.ravel() * 3.4 + 5.6 + eps
-        test_sklearn_pickle(lambda: MLPRegressor(hidden_layer_sizes=(3,)), X, Y)
-        test_sklearn_pickle(lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y)
+        run_test_sklearn_pickle(lambda: MLPRegressor(hidden_layer_sizes=(3,)), X, Y)
+        run_test_sklearn_pickle(
+            lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y
+        )
 
     @ignore_warnings(ConvergenceWarning)
     def test_regression_clone(self):
-        test_sklearn_clone(lambda: LinkedMLPRegressor())
+        run_test_sklearn_clone(lambda: LinkedMLPRegressor())
 
     @ignore_warnings(ConvergenceWarning)
     def test_regression_grid_search(self):
@@ -102,12 +104,12 @@ def test_regression_grid_search(self):
         X = X.reshape((100, 1))  # pylint: disable=E1101
         Y = X.ravel() * 3.4 + 5.6 + eps
         self.assertRaise(
-            lambda: test_sklearn_grid_search_cv(
+            lambda: run_test_sklearn_grid_search_cv(
                 lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y
             ),
             ValueError,
         )
-        res = test_sklearn_grid_search_cv(
+        res = run_test_sklearn_grid_search_cv(
             lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)),
             X,
             Y,