From eeda699c55dfc2f8ea6dbc467e73ea9312bb9d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 17:33:27 +0200 Subject: [PATCH 1/9] Update build --- .local.jenkins.lin.yml | 1 - appveyor.yml | 1 - mlinsights/mlmodel/quantile_mlpregressor.py | 38 ++++++--------------- requirements-win.txt | 17 --------- requirements.txt | 3 +- 5 files changed, 12 insertions(+), 48 deletions(-) delete mode 100644 requirements-win.txt diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml index 5a88a30a..403b9d91 100644 --- a/.local.jenkins.lin.yml +++ b/.local.jenkins.lin.yml @@ -11,7 +11,6 @@ install: - $PYINT -m pip install --upgrade pip - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper cpyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/ - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ scikit-learn>=0.22 --extra-index-url=https://pypi.python.org/simple/ - - $PYINT -m pip install -r requirements-win.txt - $PYINT -m pip install -r requirements.txt - $PYINT --version - $PYINT -m pip freeze diff --git a/appveyor.yml b/appveyor.yml index 143ac0b5..7e5afb0c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,7 +13,6 @@ install: - "%PYTHON%\\python -m pip install --upgrade pip" # for many packages - pip install llvmlite numba - - "%PYTHON%\\Scripts\\pip install -r requirements-win.txt" # install precompiled versions not available on pypi - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio" # other dependencies diff --git a/mlinsights/mlmodel/quantile_mlpregressor.py b/mlinsights/mlmodel/quantile_mlpregressor.py index ddc96de9..c0d4f790 100644 --- a/mlinsights/mlmodel/quantile_mlpregressor.py +++ b/mlinsights/mlmodel/quantile_mlpregressor.py @@ -146,36 +146,20 @@ def _backprop(self, X, y, activations, deltas, coef_grads, # due to the modification of the loss function. deltas[last] = self._modify_loss_derivatives(deltas[last]) + # recent version of scikit-learn # Compute gradient for the last layer - temp = self._compute_loss_grad( # pylint: disable=E1111 + self._compute_loss_grad( last, n_samples, activations, deltas, coef_grads, intercept_grads) - if temp is None: - # recent version of scikit-learn - # Compute gradient for the last layer + + inplace_derivative = DERIVATIVES[self.activation] + # Iterate over the hidden layers + for i in range(self.n_layers_ - 2, 0, -1): + deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T) + inplace_derivative(activations[i], deltas[i - 1]) + self._compute_loss_grad( - last, n_samples, activations, deltas, coef_grads, intercept_grads) - - inplace_derivative = DERIVATIVES[self.activation] - # Iterate over the hidden layers - for i in range(self.n_layers_ - 2, 0, -1): - deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T) - inplace_derivative(activations[i], deltas[i - 1]) - - self._compute_loss_grad( - i - 1, n_samples, activations, deltas, coef_grads, - intercept_grads) - else: # pragma: no cover - coef_grads, intercept_grads = temp # pylint: disable=E0633 - - # Iterate over the hidden layers - for i in range(self.n_layers_ - 2, 0, -1): - deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T) - inplace_derivative = DERIVATIVES[self.activation] - inplace_derivative(activations[i], deltas[i - 1]) - - coef_grads, intercept_grads = self._compute_loss_grad( # pylint: disable=E1111,E0633 - i - 1, n_samples, activations, deltas, coef_grads, - intercept_grads) + i - 1, n_samples, activations, deltas, coef_grads, + intercept_grads) return loss, coef_grads, intercept_grads diff --git a/requirements-win.txt b/requirements-win.txt deleted file mode 100644 index bbed5e0d..00000000 --- a/requirements-win.txt +++ /dev/null @@ -1,17 +0,0 @@ -astroid -ijson -importlib_metadata -ipython -isort -joblib -jupyter -matplotlib -nbformat -numpy -pandas -psutil -pylint>=2.14.0 -pymyinstall -pyshp -scikit-learn>=1.0 -threadpoolctl diff --git a/requirements.txt b/requirements.txt index 9580f4b0..edf1da06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,8 +25,7 @@ scikit-learn>=1.0 scipy seaborn skl2onnx -sphinx>=3.0 -sphinxcontrib.imagesvg +sphinx>=5.0 sphinx_gallery tqdm wheel From 315346f17ee79494f969ea31eef48e12f62bc9e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 17:41:44 +0200 Subject: [PATCH 2/9] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index edf1da06..58e344a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ chardet coverage cpyquickhelper>=0.3 cython +ipython joblib jupyter_sphinx>=0.2 jyquickhelper From 7fbbd3fd0c6256e4290aef34643628b03019fdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 17:53:01 +0200 Subject: [PATCH 3/9] first sketch of linkedmlpregressor --- .../ut_mlmodel/test_linked_mlpregression.py | 70 +++++ mlinsights/mlmodel/__init__.py | 1 + mlinsights/mlmodel/linked_mlpregressor.py | 254 ++++++++++++++++++ requirements.txt | 1 + 4 files changed, 326 insertions(+) create mode 100644 _unittests/ut_mlmodel/test_linked_mlpregression.py create mode 100644 mlinsights/mlmodel/linked_mlpregressor.py diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py new file mode 100644 index 00000000..616a6e03 --- /dev/null +++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +""" +@brief test log(time=2s) +""" +import unittest +import numpy +from numpy.random import random +import pandas +from sklearn.neural_network import MLPRegressor +from sklearn.metrics import mean_absolute_error +from sklearn.exceptions import ConvergenceWarning +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlinsights.mlmodel import LinkedMLPRegressor +from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv + + +class TestLinkedMLPRegression(ExtTestCase): + + @ignore_warnings(ConvergenceWarning) + def test_quantile_regression_diff(self): + X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]]) + Y = numpy.array([1., 1.1, 1.2, 10, 1.4]) + clr = MLPRegressor(hidden_layer_sizes=(3,)) + clr.fit(X, Y) + clq = LinkedMLPRegressor(hidden_layer_sizes=(3,)) + clq.fit(X, Y) + self.assertGreater(clr.n_iter_, 10) + self.assertGreater(clq.n_iter_, 10) + err1 = mean_absolute_error(Y, clr.predict(X)) + err2 = mean_absolute_error(Y, clq.predict(X)) + self.assertLesser(err1, 5) + self.assertLesser(err2, 5) + + @ignore_warnings(ConvergenceWarning) + def test_quantile_regression_pickle(self): + X = random(100) + eps1 = (random(90) - 0.5) * 0.1 + eps2 = random(10) * 2 + eps = numpy.hstack([eps1, eps2]) + X = X.reshape((100, 1)) # pylint: disable=E1101 + Y = X.ravel() * 3.4 + 5.6 + eps + test_sklearn_pickle(lambda: MLPRegressor( + hidden_layer_sizes=(3,)), X, Y) + test_sklearn_pickle(lambda: LinkedMLPRegressor( + hidden_layer_sizes=(3,)), X, Y) + + @ignore_warnings(ConvergenceWarning) + def test_quantile_regression_clone(self): + test_sklearn_clone(lambda: LinkedMLPRegressor()) + + @ignore_warnings(ConvergenceWarning) + def test_quantile_regression_grid_search(self): + X = random(100) + eps1 = (random(90) - 0.5) * 0.1 + eps2 = random(10) * 2 + eps = numpy.hstack([eps1, eps2]) + X = X.reshape((100, 1)) # pylint: disable=E1101 + Y = X.ravel() * 3.4 + 5.6 + eps + self.assertRaise(lambda: test_sklearn_grid_search_cv( + lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y), ValueError) + res = test_sklearn_grid_search_cv(lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), + X, Y, learning_rate_init=[0.001, 0.0001]) + self.assertIn('model', res) + self.assertIn('score', res) + self.assertGreater(res['score'], 0) + self.assertLesser(res['score'], 11) + + +if __name__ == "__main__": + unittest.main() diff --git a/mlinsights/mlmodel/__init__.py b/mlinsights/mlmodel/__init__.py index df30feb0..5108611f 100644 --- a/mlinsights/mlmodel/__init__.py +++ b/mlinsights/mlmodel/__init__.py @@ -8,6 +8,7 @@ from .decision_tree_logreg import DecisionTreeLogisticRegression from .extended_features import ExtendedFeatures from .interval_regressor import IntervalRegressor +from .linked_mlpregressor import LinkedMLPRegressor from .kmeans_constraint import ConstraintKMeans from .kmeans_l1 import KMeansL1L2 from .ml_featurizer import model_featurizer diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py new file mode 100644 index 00000000..578d8ad9 --- /dev/null +++ b/mlinsights/mlmodel/linked_mlpregressor.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- +""" +@file +@brief Implements a quantile non-linear regression. +""" +import inspect +import numpy as np +from sklearn.base import RegressorMixin +from sklearn.utils import check_X_y, column_or_1d +from sklearn.utils.validation import check_is_fitted +from sklearn.utils.extmath import safe_sparse_dot +from sklearn.neural_network._base import DERIVATIVES, LOSS_FUNCTIONS +from sklearn.neural_network import MLPRegressor + + +class LinkedMLPBase: + + def _backprop(self, X, y, activations, deltas, coef_grads, + intercept_grads): + """ + Computes the MLP loss function and its corresponding derivatives + with respect to each parameter: weights and bias vectors. + + :param X: {array-like, sparse matrix}, shape (n_samples, n_features) + The input data. + :param y: array-like, shape (n_samples,) + The target values. + :param activations: list, length = n_layers - 1 + The ith element of the list holds the values of the ith layer. + :param deltas: list, length = n_layers - 1 + The ith element of the list holds the difference between the + activations of the i + 1 layer and the backpropagated error. + More specifically, deltas are gradients of loss with respect to z + in each layer, where z = wx + b is the value of a particular layer + before passing through the activation function + :param coef_grads: list, length = n_layers - 1 + The ith element contains the amount of change used to update the + coefficient parameters of the ith layer in an iteration. + :param intercept_grads: list, length = n_layers - 1 + The ith element contains the amount of change used to update the + intercept parameters of the ith layer in an iteration. + :return: loss, float + :return: coef_grads, list, length = n_layers - 1 + :return: intercept_grads, list, length = n_layers - 1 + """ + stop + n_samples = X.shape[0] + + # Forward propagate + activations = self._forward_pass(activations) + + # Get loss + loss_func_name = self.loss + if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic': + loss_func_name = 'binary_log_loss' + loss_function = self._get_loss_function(loss_func_name) + loss = loss_function(y, activations[-1]) + # Add L2 regularization term to loss + values = np.sum( + np.array([np.dot(s.ravel(), s.ravel()) for s in self.coefs_])) + loss += (0.5 * self.alpha) * values / n_samples + + # Backward propagate + last = self.n_layers_ - 2 + + # The calculation of delta[last] here works with following + # combinations of output activation and loss function: + # sigmoid and binary cross entropy, softmax and categorical cross + # entropy, and identity with squared loss + deltas[last] = activations[-1] - y + + # We insert the following modification to modify the gradient + # due to the modification of the loss function. + deltas[last] = self._modify_loss_derivatives(deltas[last]) + + # recent version of scikit-learn + # Compute gradient for the last layer + self._compute_loss_grad( + last, n_samples, activations, deltas, coef_grads, intercept_grads) + + inplace_derivative = DERIVATIVES[self.activation] + # Iterate over the hidden layers + for i in range(self.n_layers_ - 2, 0, -1): + deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T) + inplace_derivative(activations[i], deltas[i - 1]) + + self._compute_loss_grad( + i - 1, n_samples, activations, deltas, coef_grads, + intercept_grads) + + return loss, coef_grads, intercept_grads + + +class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase): + """ + Quantile MLP Regression or neural networks regression + trained with norm :epkg:`L1`. This class inherits from + :epkg:`sklearn:neural_networks:MLPRegressor`. + This model optimizes the absolute-loss using LBFGS or stochastic gradient + descent. See @see cl CustomizedMultilayerPerceptron and + @see fn absolute_loss. + + :param hidden_layer_sizes: tuple, length = n_layers - 2, default (100,) + The ith element represents the number of neurons in the ith + hidden layer. + :param activation: {'identity', 'logistic', 'tanh', 'relu'}, default 'relu' + Activation function for the hidden layer. + - 'identity', no-op activation, useful to implement linear bottleneck, + returns :math:`f(x) = x` + - 'logistic', the logistic sigmoid function, + returns :math:`f(x) = 1 / (1 + exp(-x))`. + - 'tanh', the hyperbolic tan function, + returns :math:`f(x) = tanh(x)`. + - 'relu', the rectified linear unit function, + returns :math:`f(x) = \\max(0, x)`. + :param solver: ``{'lbfgs', 'sgd', 'adam'}``, default 'adam' + The solver for weight optimization. + - *'lbfgs'* is an optimizer in the family of quasi-Newton methods. + - *'sgd'* refers to stochastic gradient descent. + - *'adam'* refers to a stochastic gradient-based optimizer proposed by + Kingma, Diederik, and Jimmy Ba + Note: The default solver 'adam' works pretty well on relatively + large datasets (with thousands of training samples or more) in terms of + both training time and validation score. + For small datasets, however, 'lbfgs' can converge faster and perform + better. + :param alpha: float, optional, default 0.0001 + :epkg:`L2` penalty (regularization term) parameter. + :param batch_size: int, optional, default 'auto' + Size of minibatches for stochastic optimizers. + If the solver is 'lbfgs', the classifier will not use minibatch. + When set to "auto", `batch_size=min(200, n_samples)` + :param learning_rate: {'constant', 'invscaling', 'adaptive'}, default 'constant' + Learning rate schedule for weight updates. + - 'constant' is a constant learning rate given by + 'learning_rate_init'. + - 'invscaling' gradually decreases the learning rate ``learning_rate_`` + at each time step 't' using an inverse scaling exponent of 'power_t'. + effective_learning_rate = learning_rate_init / pow(t, power_t) + - 'adaptive' keeps the learning rate constant to + 'learning_rate_init' as long as training loss keeps decreasing. + Each time two consecutive epochs fail to decrease training loss by at + least tol, or fail to increase validation score by at least tol if + 'early_stopping' is on, the current learning rate is divided by 5. + Only used when solver='sgd'. + :param learning_rate_init: double, optional, default 0.001 + The initial learning rate used. It controls the step-size + in updating the weights. Only used when solver='sgd' or 'adam'. + :param power_t: double, optional, default 0.5 + The exponent for inverse scaling learning rate. + It is used in updating effective learning rate when the learning_rate + is set to 'invscaling'. Only used when solver='sgd'. + :param max_iter: int, optional, default 200 + Maximum number of iterations. The solver iterates until convergence + (determined by 'tol') or this number of iterations. For stochastic + solvers ('sgd', 'adam'), note that this determines the number of epochs + (how many times each data point will be used), not the number of + gradient steps. + :param shuffle: bool, optional, default True + Whether to shuffle samples in each iteration. Only used when + solver='sgd' or 'adam'. + :param random_state: int, RandomState instance or None, optional, default None + If int, random_state is the seed used by the random number generator; + If RandomState instance, random_state is the random number generator; + If None, the random number generator is the RandomState instance used + by `np.random`. + :param tol: float, optional, default 1e-4 + Tolerance for the optimization. When the loss or score is not improving + by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, + unless ``learning_rate`` is set to 'adaptive', convergence is + considered to be reached and training stops. + :param verbose: bool, optional, default False + Whether to print progress messages to stdout. + :param warm_start: bool, optional, default False + When set to True, reuse the solution of the previous + call to fit as initialization, otherwise, just erase the + previous solution. See :term:`the Glossary `. + :param momentum: float, default 0.9 + Momentum for gradient descent update. Should be between 0 and 1. Only + used when solver='sgd'. + :param nesterovs_momentum: boolean, default True + Whether to use Nesterov's momentum. Only used when solver='sgd' and + momentum > 0. + :param early_stopping: bool, default False + Whether to use early stopping to terminate training when validation + score is not improving. If set to true, it will automatically set + aside 10% of training data as validation and terminate training when + validation score is not improving by at least ``tol`` for + ``n_iter_no_change`` consecutive epochs. + Only effective when solver='sgd' or 'adam' + :param validation_fraction: float, optional, default 0.1 + The proportion of training data to set aside as validation set for + early stopping. Must be between 0 and 1. + Only used if early_stopping is True + :param beta_1: float, optional, default 0.9 + Exponential decay rate for estimates of first moment vector in adam, + should be in [0, 1). Only used when solver='adam' + :param beta_2: float, optional, default 0.999 + Exponential decay rate for estimates of second moment vector in adam, + should be in [0, 1). Only used when solver='adam' + :param epsilon: float, optional, default 1e-8 + Value for numerical stability in adam. Only used when solver='adam' + :param n_iter_no_change: int, optional, default 10 + Maximum number of epochs to not meet ``tol`` improvement. + Only effective when solver='sgd' or 'adam' + + Fitted attributes: + + * `loss_`: float + The current loss computed with the loss function. + * `coefs_`: list, length n_layers - 1 + The ith element in the list represents the weight matrix corresponding + to layer i. + * `intercepts_`: list, length n_layers - 1 + The ith element in the list represents the bias vector corresponding to + layer i + 1. + * `n_iter_`: int, + The number of iterations the solver has ran. + * `n_layers_`: int + Number of layers. + * `n_outputs_`: int + Number of outputs. + * `out_activation_`: string + Name of the output activation function. + """ + + def __init__(self, + hidden_layer_sizes=(100,), activation="relu", + solver='sgd', alpha=0.0001, + batch_size='auto', learning_rate="constant", + learning_rate_init=0.001, + power_t=0.5, max_iter=200, shuffle=True, + random_state=None, tol=1e-4, + verbose=False, warm_start=False, momentum=0.9, + nesterovs_momentum=True, early_stopping=False, + validation_fraction=0.1, beta_1=0.9, beta_2=0.999, + epsilon=1e-8, n_iter_no_change=10, + max_fun=15000): + """ + See :epkg:`sklearn:neural_networks:MLPRegressor` + """ + sup = super(LinkedMLPRegressor, self) # pylint: disable=R1725 + sup.__init__(hidden_layer_sizes=hidden_layer_sizes, + activation=activation, solver=solver, alpha=alpha, + batch_size=batch_size, learning_rate=learning_rate, + learning_rate_init=learning_rate_init, power_t=power_t, + max_iter=max_iter, shuffle=shuffle, + random_state=random_state, tol=tol, verbose=verbose, + warm_start=warm_start, momentum=momentum, + nesterovs_momentum=nesterovs_momentum, + early_stopping=early_stopping, + validation_fraction=validation_fraction, + beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, + n_iter_no_change=n_iter_no_change, max_fun=max_fun) diff --git a/requirements.txt b/requirements.txt index 58e344a2..52bf8032 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ skl2onnx sphinx>=5.0 sphinx_gallery tqdm +traitlets wheel From 2a1928460217af9b55bcad227bfb92b6b709ee37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 19:56:22 +0200 Subject: [PATCH 4/9] implements links --- .../ut_mlmodel/test_linked_mlpregression.py | 52 +++++- mlinsights/mlmodel/linked_mlpregressor.py | 169 ++++++++++-------- 2 files changed, 139 insertions(+), 82 deletions(-) diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py index 616a6e03..c09b0a1f 100644 --- a/_unittests/ut_mlmodel/test_linked_mlpregression.py +++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py @@ -17,7 +17,7 @@ class TestLinkedMLPRegression(ExtTestCase): @ignore_warnings(ConvergenceWarning) - def test_quantile_regression_diff(self): + def test_regression_diff(self): X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]]) Y = numpy.array([1., 1.1, 1.2, 10, 1.4]) clr = MLPRegressor(hidden_layer_sizes=(3,)) @@ -32,7 +32,48 @@ def test_quantile_regression_diff(self): self.assertLesser(err2, 5) @ignore_warnings(ConvergenceWarning) - def test_quantile_regression_pickle(self): + def test_regression_linked_int(self): + X = numpy.array([[0.1, 0.11], [0.2, 0.21], [0.3, 0.31], + [0.4, 0.41], [0.5, 0.51]]) + Y = numpy.array([1., 1.1, 1.2, 10, 1.4]) + clr = MLPRegressor(hidden_layer_sizes=(3,)) + clr.fit(X, Y) + clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=2) + clq.fit(X, Y) + self.assertGreater(clr.n_iter_, 10) + self.assertGreater(clq.n_iter_, 10) + err1 = mean_absolute_error(Y, clr.predict(X)) + err2 = mean_absolute_error(Y, clq.predict(X)) + self.assertLesser(err1, 5) + self.assertLesser(err2, 5) + + @ignore_warnings(ConvergenceWarning) + def test_regression_linked(self): + linked = [((0, 'c', 1, 2), (0, 'i', 0)), + ((1, 'c', 0, 0), (1, 'c', 2, 0)), + ((0, 'c', 1, 1), (0, 'c', 0, 2)), + ((0, 'i', 2), (0, 'c', 0, 0)), + ((1, 'i', 0), (1, 'c', 1, 0)), + ((0, 'i', 1), (0, 'c', 0, 1))] + X = numpy.array([[0.1, 0.11], [0.2, 0.21], [0.3, 0.31], + [0.4, 0.41], [0.5, 0.51]]) + Y = numpy.array([1., 1.1, 1.2, 10, 1.4]) + clr = MLPRegressor(hidden_layer_sizes=(3,)) + clr.fit(X, Y) + clq = LinkedMLPRegressor(hidden_layer_sizes=(3,), linked=linked) + clq.fit(X, Y) + self.assertEqual(clq.linked_, linked) + self.assertEqual(clq.coefs_[0][1, 2], clq.intercepts_[0][0]) + self.assertEqual(clq.coefs_[1][0, 0], clq.coefs_[1][2, 0]) + self.assertGreater(clr.n_iter_, 10) + self.assertGreater(clq.n_iter_, 10) + err1 = mean_absolute_error(Y, clr.predict(X)) + err2 = mean_absolute_error(Y, clq.predict(X)) + self.assertLesser(err1, 5) + self.assertLesser(err2, 5) + + @ignore_warnings(ConvergenceWarning) + def test_regression_pickle(self): X = random(100) eps1 = (random(90) - 0.5) * 0.1 eps2 = random(10) * 2 @@ -45,11 +86,11 @@ def test_quantile_regression_pickle(self): hidden_layer_sizes=(3,)), X, Y) @ignore_warnings(ConvergenceWarning) - def test_quantile_regression_clone(self): + def test_regression_clone(self): test_sklearn_clone(lambda: LinkedMLPRegressor()) @ignore_warnings(ConvergenceWarning) - def test_quantile_regression_grid_search(self): + def test_regression_grid_search(self): X = random(100) eps1 = (random(90) - 0.5) * 0.1 eps2 = random(10) * 2 @@ -62,9 +103,10 @@ def test_quantile_regression_grid_search(self): X, Y, learning_rate_init=[0.001, 0.0001]) self.assertIn('model', res) self.assertIn('score', res) - self.assertGreater(res['score'], 0) + self.assertGreater(res['score'], -1) self.assertLesser(res['score'], 11) if __name__ == "__main__": + # TestLinkedMLPRegression().test_regression_linked() unittest.main() diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py index 578d8ad9..0f0d0975 100644 --- a/mlinsights/mlmodel/linked_mlpregressor.py +++ b/mlinsights/mlmodel/linked_mlpregressor.py @@ -4,6 +4,7 @@ @brief Implements a quantile non-linear regression. """ import inspect +import random import numpy as np from sklearn.base import RegressorMixin from sklearn.utils import check_X_y, column_or_1d @@ -15,90 +16,101 @@ class LinkedMLPBase: - def _backprop(self, X, y, activations, deltas, coef_grads, - intercept_grads): - """ - Computes the MLP loss function and its corresponding derivatives - with respect to each parameter: weights and bias vectors. - - :param X: {array-like, sparse matrix}, shape (n_samples, n_features) - The input data. - :param y: array-like, shape (n_samples,) - The target values. - :param activations: list, length = n_layers - 1 - The ith element of the list holds the values of the ith layer. - :param deltas: list, length = n_layers - 1 - The ith element of the list holds the difference between the - activations of the i + 1 layer and the backpropagated error. - More specifically, deltas are gradients of loss with respect to z - in each layer, where z = wx + b is the value of a particular layer - before passing through the activation function - :param coef_grads: list, length = n_layers - 1 - The ith element contains the amount of change used to update the - coefficient parameters of the ith layer in an iteration. - :param intercept_grads: list, length = n_layers - 1 - The ith element contains the amount of change used to update the - intercept parameters of the ith layer in an iteration. - :return: loss, float - :return: coef_grads, list, length = n_layers - 1 - :return: intercept_grads, list, length = n_layers - 1 - """ - stop - n_samples = X.shape[0] - - # Forward propagate - activations = self._forward_pass(activations) + def _initialize(self, y, layer_units, dtype): + super()._initialize(y, layer_units, dtype) + if hasattr(self, "linked_"): + return + if self.linked is None: + self.linked_ = None + return + if isinstance(self.linked, int): - # Get loss - loss_func_name = self.loss - if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic': - loss_func_name = 'binary_log_loss' - loss_function = self._get_loss_function(loss_func_name) - loss = loss_function(y, activations[-1]) - # Add L2 regularization term to loss - values = np.sum( - np.array([np.dot(s.ravel(), s.ravel()) for s in self.coefs_])) - loss += (0.5 * self.alpha) * values / n_samples + def _get_random(layer, selected, n_sel): + indices = [] + c = self.coefs_[layer] + for i in range(c.shape[0]): + for j in range(c.shape[1]): + key = layer, "c", i, j + if key in selected: + continue + indices.append(key) + c = self.intercepts_[layer] + for i in range(c.shape[0]): + key = layer, "i", i + if key in selected: + continue + indices.append(key) - # Backward propagate - last = self.n_layers_ - 2 + random.shuffle(indices) + inds = [] + pos = 0 + nis = set() + while len(inds) < n_sel and pos < len(indices): + ind = indices[pos] + if ind[2] in nis: + pos += 1 + continue + inds.append(pos) + nis.add(ind[2]) + pos += 1 + return tuple(indices[p] for p in inds) - # The calculation of delta[last] here works with following - # combinations of output activation and loss function: - # sigmoid and binary cross entropy, softmax and categorical cross - # entropy, and identity with squared loss - deltas[last] = activations[-1] - y + n_coefs = sum([c.size for c in self.coefs_] + + [c.size for c in self.intercepts_]) + linked = [] + selected = set() + unchanged = 0 + while len(linked) < n_coefs and unchanged < 10: + layer = random.randint(0, len(self.coefs_) - 1) + inds = _get_random(layer, selected, self.linked) + if len(inds) <= 1: + unchanged += 1 + continue + unchanged = 0 + for i in inds: + selected.add(i) + linked.append(inds) + self.linked_ = linked + self._fix_links(self.coefs_, self.intercepts_) + elif isinstance(self.linked, list): + self.linked_ = self.linked + self._fix_links(self.coefs_, self.intercepts_) + else: + raise TypeError(f"Unexpected type for linked {type(self.linked)}.") - # We insert the following modification to modify the gradient - # due to the modification of the loss function. - deltas[last] = self._modify_loss_derivatives(deltas[last]) + def _fix_links(self, coefs, intercepts): + if self.linked_ is None: + return + for links in self.linked_: + if len(links) <= 1: + raise RuntimeError(f"Unexpected value for link {links}.") + total = 0 + for key in links: + if key[1] == "c": + v = coefs[key[0]][key[2:]] + else: + v = intercepts[key[0]][key[2]] + total += v + total /= len(links) + for key in links: + if key[1] == "c": + coefs[key[0]][key[2:]] = total + else: + intercepts[key[0]][key[2]] = total - # recent version of scikit-learn - # Compute gradient for the last layer - self._compute_loss_grad( - last, n_samples, activations, deltas, coef_grads, intercept_grads) - - inplace_derivative = DERIVATIVES[self.activation] - # Iterate over the hidden layers - for i in range(self.n_layers_ - 2, 0, -1): - deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T) - inplace_derivative(activations[i], deltas[i - 1]) - - self._compute_loss_grad( - i - 1, n_samples, activations, deltas, coef_grads, - intercept_grads) - - return loss, coef_grads, intercept_grads + def _backprop(self, X, y, activations, deltas, coef_grads, + intercept_grads): + batch_loss, coef_grads, intercept_grads = super()._backprop( + X, y, activations, deltas, coef_grads, intercept_grads) + self._fix_links(coef_grads, intercept_grads) + return batch_loss, coef_grads, intercept_grads -class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase): +class LinkedMLPRegressor(LinkedMLPBase, MLPRegressor): """ - Quantile MLP Regression or neural networks regression - trained with norm :epkg:`L1`. This class inherits from - :epkg:`sklearn:neural_networks:MLPRegressor`. - This model optimizes the absolute-loss using LBFGS or stochastic gradient - descent. See @see cl CustomizedMultilayerPerceptron and - @see fn absolute_loss. + A neural networks regression for which a subset a coefficients + share the same value. In practice, it should make the training + more stable. See parameter *linked*. :param hidden_layer_sizes: tuple, length = n_layers - 2, default (100,) The ith element represents the number of neurons in the ith @@ -203,6 +215,8 @@ class LinkedMLPRegressor(MLPRegressor, LinkedMLPBase): :param n_iter_no_change: int, optional, default 10 Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' + :param linked: can be a float to defined the ratio of linked coefficients, + or list of set of indices Fitted attributes: @@ -235,7 +249,7 @@ def __init__(self, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-8, n_iter_no_change=10, - max_fun=15000): + max_fun=15000, linked=None): """ See :epkg:`sklearn:neural_networks:MLPRegressor` """ @@ -252,3 +266,4 @@ def __init__(self, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, n_iter_no_change=n_iter_no_change, max_fun=max_fun) + self.linked = linked From 1a5b8c44297edc6cfddabb134596ca9277e5b925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 20:09:33 +0200 Subject: [PATCH 5/9] lint --- appveyor.yml | 2 +- mlinsights/mlmodel/linked_mlpregressor.py | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 7e5afb0c..b96b1d17 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,7 @@ install: # install precompiled versions not available on pypi - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio" # other dependencies - - "%PYTHON%\\Scripts\\pip install -r requirements.txt --no-deps" + - "%PYTHON%\\Scripts\\pip install -r requirements.txt" - "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%" build: off diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py index 0f0d0975..3f1d7183 100644 --- a/mlinsights/mlmodel/linked_mlpregressor.py +++ b/mlinsights/mlmodel/linked_mlpregressor.py @@ -1,16 +1,10 @@ # -*- coding: utf-8 -*- +# pylint: disable=E1101 """ @file @brief Implements a quantile non-linear regression. """ -import inspect import random -import numpy as np -from sklearn.base import RegressorMixin -from sklearn.utils import check_X_y, column_or_1d -from sklearn.utils.validation import check_is_fitted -from sklearn.utils.extmath import safe_sparse_dot -from sklearn.neural_network._base import DERIVATIVES, LOSS_FUNCTIONS from sklearn.neural_network import MLPRegressor From 79eafc33a9b2f46f2bd1a543f02c5444d1b12c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 16 Oct 2022 20:22:15 +0200 Subject: [PATCH 6/9] Update linked_mlpregressor.py --- mlinsights/mlmodel/linked_mlpregressor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlinsights/mlmodel/linked_mlpregressor.py b/mlinsights/mlmodel/linked_mlpregressor.py index 3f1d7183..f90f87dc 100644 --- a/mlinsights/mlmodel/linked_mlpregressor.py +++ b/mlinsights/mlmodel/linked_mlpregressor.py @@ -9,6 +9,10 @@ class LinkedMLPBase: + """ + Overloads methods from :epkg:`sklearn:neural_networks:MLPRegressor` + and insert the logic to train linked coefficients. + """ def _initialize(self, y, layer_units, dtype): super()._initialize(y, layer_units, dtype) From 66b7425069221be372e23551fac0ea083004d74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 17 Oct 2022 08:52:18 +0200 Subject: [PATCH 7/9] Update test_linked_mlpregression.py --- _unittests/ut_mlmodel/test_linked_mlpregression.py | 1 - 1 file changed, 1 deletion(-) diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py index c09b0a1f..2fc4c063 100644 --- a/_unittests/ut_mlmodel/test_linked_mlpregression.py +++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py @@ -5,7 +5,6 @@ import unittest import numpy from numpy.random import random -import pandas from sklearn.neural_network import MLPRegressor from sklearn.metrics import mean_absolute_error from sklearn.exceptions import ConvergenceWarning From 69ca553569e0d3903648bbe027b888bd902e91fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 9 Dec 2022 00:33:41 +0100 Subject: [PATCH 8/9] documentation --- _doc/sphinxdoc/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_doc/sphinxdoc/source/index.rst b/_doc/sphinxdoc/source/index.rst index f26e005a..c5b0bf9e 100644 --- a/_doc/sphinxdoc/source/index.rst +++ b/_doc/sphinxdoc/source/index.rst @@ -86,11 +86,11 @@ Short example: :showcode: :warningout: FutureWarning - from sklearn.datasets import load_boston + from sklearn.datasets import load_diabetes from sklearn.linear_model import LinearRegression from mlinsights.mlmodel import QuantileLinearRegression - data = load_boston() + data = load_diabetes() X, y = data.data, data.target clq = QuantileLinearRegression() From 97ad740877efef9cb06395dd4f12bf447547d24e Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 2 Oct 2023 11:30:36 +0200 Subject: [PATCH 9/9] fix unit test --- .../ut_mlmodel/test_linked_mlpregression.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/_unittests/ut_mlmodel/test_linked_mlpregression.py b/_unittests/ut_mlmodel/test_linked_mlpregression.py index adbe9507..e3dd4cc5 100644 --- a/_unittests/ut_mlmodel/test_linked_mlpregression.py +++ b/_unittests/ut_mlmodel/test_linked_mlpregression.py @@ -8,12 +8,12 @@ from sklearn.neural_network import MLPRegressor from sklearn.metrics import mean_absolute_error from sklearn.exceptions import ConvergenceWarning -from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlinsights.ext_test_case import ExtTestCase, ignore_warnings from mlinsights.mlmodel import LinkedMLPRegressor from mlinsights.mlmodel import ( - test_sklearn_pickle, - test_sklearn_clone, - test_sklearn_grid_search_cv, + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv, ) @@ -86,12 +86,14 @@ def test_regression_pickle(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - test_sklearn_pickle(lambda: MLPRegressor(hidden_layer_sizes=(3,)), X, Y) - test_sklearn_pickle(lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y) + run_test_sklearn_pickle(lambda: MLPRegressor(hidden_layer_sizes=(3,)), X, Y) + run_test_sklearn_pickle( + lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y + ) @ignore_warnings(ConvergenceWarning) def test_regression_clone(self): - test_sklearn_clone(lambda: LinkedMLPRegressor()) + run_test_sklearn_clone(lambda: LinkedMLPRegressor()) @ignore_warnings(ConvergenceWarning) def test_regression_grid_search(self): @@ -102,12 +104,12 @@ def test_regression_grid_search(self): X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps self.assertRaise( - lambda: test_sklearn_grid_search_cv( + lambda: run_test_sklearn_grid_search_cv( lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y ), ValueError, ) - res = test_sklearn_grid_search_cv( + res = run_test_sklearn_grid_search_cv( lambda: LinkedMLPRegressor(hidden_layer_sizes=(3,)), X, Y,