diff --git a/.travis.yml b/.travis.yml
index 2563b54dc6741..d79723c969458 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,7 +38,7 @@ matrix:
# This environment tests the newest supported Anaconda release (4.4.0)
# It also runs tests requiring Pandas.
- env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
- NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
+ NUMPY_VERSION="1.13" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.2"
CYTHON_VERSION="0.25.2" COVERAGE=true
# This environment use pytest to run the tests. It uses the newest
# supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
@@ -49,7 +49,7 @@ matrix:
# flake8 linting on diff wrt common ancestor with upstream/master
- env: RUN_FLAKE8="true" SKIP_TESTS="true"
DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
- NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
+ NUMPY_VERSION="1.13" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
# This environment tests scikit-learn against numpy and scipy master
# installed from their CI wheels in a virtualenv with the Python
# interpreter provided by travis.
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 63c8da5aafeac..b3f785254c2ae 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -109,7 +109,7 @@ conda update --yes --quiet conda
conda create -n $CONDA_ENV_NAME --yes --quiet python numpy scipy \
cython nose coverage matplotlib sphinx=1.6.2 pillow
source activate testenv
-pip install numpydoc
+pip install sphinx-gallery numpydoc
# Build and install scikit-learn in dev mode
python setup.py develop
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 8cd774d649338..1b0832b19ab9c 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -39,22 +39,30 @@ if [[ "$DISTRIB" == "conda" ]]; then
# Configure the conda environment and put it in the path using the
# provided versions
+ if [[ "$USE_PYTEST" == "true" ]]; then
+ TEST_RUNNER_PACKAGE=pytest
+ else
+ TEST_RUNNER_PACKAGE=nose
+ fi
+
if [[ "$INSTALL_MKL" == "true" ]]; then
- conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
- numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+ conda create -n testenv --yes python=$PYTHON_VERSION pip \
+ $TEST_RUNNER_PACKAGE numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
mkl cython=$CYTHON_VERSION \
${PANDAS_VERSION+pandas=$PANDAS_VERSION}
else
- conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
- numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+ conda create -n testenv --yes python=$PYTHON_VERSION pip \
+ $TEST_RUNNER_PACKAGE numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
nomkl cython=$CYTHON_VERSION \
${PANDAS_VERSION+pandas=$PANDAS_VERSION}
fi
source activate testenv
- # Install nose-timer via pip
- pip install nose-timer
+ if [[ $USE_PYTEST != "true" ]]; then
+ # Install nose-timer via pip
+ pip install nose-timer
+ fi
elif [[ "$DISTRIB" == "ubuntu" ]]; then
# At the time of writing numpy 1.9.1 is included in the travis
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index cdcfbe01b3b8b..f7d3ab2a32e0e 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -43,10 +43,13 @@ run_tests() {
fi
$TEST_CMD sklearn
- # Test doc (only with nose until we switch completely to pytest)
- if [[ "$USE_PYTEST" != "true" ]]; then
- # Going back to git checkout folder needed for make test-doc
- cd $OLDPWD
+ # Going back to git checkout folder needed to test documentation
+ cd $OLDPWD
+
+ if [[ "$USE_PYTEST" == "true" ]]; then
+ pytest $(find doc -name '*.rst' | sort)
+ else
+ # Makefile is using nose
make test-doc
fi
}
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/doc/README.md b/doc/README.md
index 141db3d7a8da5..82240fb701aa3 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -1,8 +1,13 @@
# Documentation for scikit-learn
This section contains the full manual and web page as displayed in
-http://scikit-learn.org. To generate the full web page, including
-the example gallery (this might take a while):
+http://scikit-learn.org.
+Building the website requires the sphinx and sphinx-gallery packages:
+
+ pip install sphinx sphinx-gallery
+
+To generate the full web page, including the example gallery (this might take a
+while):
make html
@@ -16,7 +21,6 @@ To build the PDF manual, run
make latexpdf
-
The website is hosted at github and can be updated manually (for releases)
by pushing to the https://github.com/scikit-learn/scikit-learn.github.io repository.
diff --git a/doc/datasets/conftest.py b/doc/datasets/conftest.py
new file mode 100644
index 0000000000000..0ccc0bced9ee7
--- /dev/null
+++ b/doc/datasets/conftest.py
@@ -0,0 +1,75 @@
+from os.path import exists
+from os.path import join
+
+import numpy as np
+
+from sklearn.utils.testing import SkipTest
+from sklearn.utils.testing import check_skip_network
+from sklearn.datasets import get_data_home
+from sklearn.utils.testing import install_mldata_mock
+from sklearn.utils.testing import uninstall_mldata_mock
+
+
+def setup_labeled_faces():
+ data_home = get_data_home()
+ if not exists(join(data_home, 'lfw_home')):
+ raise SkipTest("Skipping dataset loading doctests")
+
+
+def setup_mldata():
+ # setup mock urllib2 module to avoid downloading from mldata.org
+ install_mldata_mock({
+ 'mnist-original': {
+ 'data': np.empty((70000, 784)),
+ 'label': np.repeat(np.arange(10, dtype='d'), 7000),
+ },
+ 'iris': {
+ 'data': np.empty((150, 4)),
+ },
+ 'datasets-uci-iris': {
+ 'double0': np.empty((150, 4)),
+ 'class': np.empty((150,)),
+ },
+ })
+
+
+def teardown_mldata():
+ uninstall_mldata_mock()
+
+
+def setup_rcv1():
+ check_skip_network()
+ # skip the test in rcv1.rst if the dataset is not already loaded
+ rcv1_dir = join(get_data_home(), "RCV1")
+ if not exists(rcv1_dir):
+ raise SkipTest("Download RCV1 dataset to run this test.")
+
+
+def setup_twenty_newsgroups():
+ data_home = get_data_home()
+ if not exists(join(data_home, '20news_home')):
+ raise SkipTest("Skipping dataset loading doctests")
+
+
+def setup_working_with_text_data():
+ check_skip_network()
+
+
+def pytest_runtest_setup(item):
+ fname = item.fspath.strpath
+ if fname.endswith('datasets/labeled_faces.rst'):
+ setup_labeled_faces()
+ elif fname.endswith('datasets/mldata.rst'):
+ setup_mldata()
+ elif fname.endswith('datasets/rcv1.rst'):
+ setup_rcv1()
+ elif fname.endswith('datasets/twenty_newsgroups.rst'):
+ setup_twenty_newsgroups()
+ elif fname.endswith('datasets/working_with_text_data.rst'):
+ setup_working_with_text_data()
+
+
+def pytest_runtest_teardown(item):
+ fname = item.fspath.strpath
+ if fname.endswith('datasets/mldata.rst'):
+ teardown_mldata()
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
index 5083317cffc53..b94dfd7620a24 100644
--- a/doc/datasets/mldata.rst
+++ b/doc/datasets/mldata.rst
@@ -3,6 +3,11 @@
>>> import numpy as np
>>> import os
+ >>> import tempfile
+ >>> # Create a temporary folder for the data fetcher
+ >>> custom_data_home = tempfile.mkdtemp()
+ >>> os.makedirs(os.path.join(custom_data_home, 'mldata'))
+
.. _mldata:
@@ -70,3 +75,8 @@ defaults to individual datasets:
... data_home=custom_data_home)
>>> iris3 = fetch_mldata('datasets-UCI iris', target_name='class',
... data_name='double0', data_home=custom_data_home)
+
+
+..
+ >>> import shutil
+ >>> shutil.rmtree(custom_data_home)
diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py
index 37d9f9af05dc3..0ee5cccaa0f5e 100644
--- a/doc/datasets/mldata_fixture.py
+++ b/doc/datasets/mldata_fixture.py
@@ -3,26 +3,12 @@
Mock urllib2 access to mldata.org and create a temporary data folder.
"""
-from os import makedirs
-from os.path import join
import numpy as np
-import tempfile
-import shutil
-from sklearn import datasets
from sklearn.utils.testing import install_mldata_mock
from sklearn.utils.testing import uninstall_mldata_mock
-def globs(globs):
- # Create a temporary folder for the data fetcher
- global custom_data_home
- custom_data_home = tempfile.mkdtemp()
- makedirs(join(custom_data_home, 'mldata'))
- globs['custom_data_home'] = custom_data_home
- return globs
-
-
def setup_module():
# setup mock urllib2 module to avoid downloading from mldata.org
install_mldata_mock({
@@ -42,4 +28,3 @@ def setup_module():
def teardown_module():
uninstall_mldata_mock()
- shutil.rmtree(custom_data_home)
diff --git a/doc/index.rst b/doc/index.rst
index e835de46a660e..ecea32e3229b9 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -207,13 +207,13 @@
On-going development:
What's new (Changelog)
- September 2016. scikit-learn 0.18.0 is available for download (Changelog).
+ September 2016. scikit-learn 0.18.0 is available for download (Changelog).
- November 2015. scikit-learn 0.17.0 is available for download (Changelog).
+ November 2015. scikit-learn 0.17.0 is available for download (Changelog).
- March 2015. scikit-learn 0.16.0 is available for download (Changelog).
+ March 2015. scikit-learn 0.16.0 is available for download (Changelog).
- July 2014. scikit-learn 0.15.0 is available for download (Changelog).
+ July 2014. scikit-learn 0.15.0 is available for download (Changelog).
July 14-20th, 2014: international sprint.
During this week-long sprint, we gathered 18 of the core
@@ -227,7 +227,7 @@
Inria,
and tinyclues.
- August 2013. scikit-learn 0.14 is available for download (Changelog).
+ August 2013. scikit-learn 0.14 is available for download (Changelog).
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index b47726979351f..c68bb7ef275b0 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -273,7 +273,7 @@ validation strategies.
.. _iid_cv:
Cross-validation iterators for i.i.d. data
-==========================================
+------------------------------------------
Assuming that some data is Independent and Identically Distributed (i.i.d.) is
making the assumption that all samples stem from the same generative process
@@ -294,7 +294,7 @@ devices) it safer to use :ref:`group-wise cross-validation `.
K-fold
-------
+^^^^^^
:class:`KFold` divides all the samples in :math:`k` groups of samples,
called folds (if :math:`k = n`, this is equivalent to the *Leave One
@@ -323,7 +323,7 @@ Thus, one can create the training/test sets using numpy indexing::
Repeated K-Fold
----------------
+^^^^^^^^^^^^^^^
:class:`RepeatedKFold` repeats K-Fold n times. It can be used when one
requires to run :class:`KFold` n times, producing different splits in
@@ -350,7 +350,7 @@ with different randomization in each repetition.
Leave One Out (LOO)
--------------------
+^^^^^^^^^^^^^^^^^^^
:class:`LeaveOneOut` (or LOO) is a simple cross-validation. Each learning
set is created by taking all the samples except one, the test set being
@@ -408,7 +408,7 @@ fold cross validation should be preferred to LOO.
Leave P Out (LPO)
------------------
+^^^^^^^^^^^^^^^^^
:class:`LeavePOut` is very similar to :class:`LeaveOneOut` as it creates all
the possible training/test sets by removing :math:`p` samples from the complete
@@ -435,7 +435,7 @@ Example of Leave-2-Out on a dataset with 4 samples::
.. _ShuffleSplit:
Random permutations cross-validation a.k.a. Shuffle & Split
------------------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
:class:`ShuffleSplit`
@@ -465,7 +465,7 @@ validation that allows a finer control on the number of iterations and
the proportion of samples on each side of the train / test split.
Cross-validation iterators with stratification based on class labels.
-=====================================================================
+---------------------------------------------------------------------
Some classification problems can exhibit a large imbalance in the distribution
of the target classes: for instance there could be several times more negative
@@ -475,7 +475,7 @@ stratified sampling as implemented in :class:`StratifiedKFold` and
approximately preserved in each train and validation fold.
Stratified k-fold
------------------
+^^^^^^^^^^^^^^^^^
:class:`StratifiedKFold` is a variation of *k-fold* which returns *stratified*
folds: each set contains approximately the same percentage of samples of each
@@ -500,7 +500,7 @@ with different randomization in each repetition.
Stratified Shuffle Split
-------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^
:class:`StratifiedShuffleSplit` is a variation of *ShuffleSplit*, which returns
stratified splits, *i.e* which creates splits by preserving the same
@@ -509,7 +509,7 @@ percentage for each target class as in the complete set.
.. _group_cv:
Cross-validation iterators for grouped data.
-============================================
+--------------------------------------------
The i.i.d. assumption is broken if the underlying generative process yield
groups of dependent samples.
@@ -530,7 +530,7 @@ parameter.
Group k-fold
-------------
+^^^^^^^^^^^^
:class:`GroupKFold` is a variation of k-fold which ensures that the same group is
not represented in both testing and training sets. For example if the data is
@@ -560,7 +560,7 @@ size due to the imbalance in the data.
Leave One Group Out
--------------------
+^^^^^^^^^^^^^^^^^^^
:class:`LeaveOneGroupOut` is a cross-validation scheme which holds out
the samples according to a third-party provided array of integer groups. This
@@ -591,7 +591,7 @@ groups could be the year of collection of the samples and thus allow
for cross-validation against time-based splits.
Leave P Groups Out
-------------------
+^^^^^^^^^^^^^^^^^^
:class:`LeavePGroupsOut` is similar as :class:`LeaveOneGroupOut`, but removes
samples related to :math:`P` groups for each training/test set.
@@ -611,7 +611,7 @@ Example of Leave-2-Group Out::
[0 1] [2 3 4 5]
Group Shuffle Split
--------------------
+^^^^^^^^^^^^^^^^^^^
The :class:`GroupShuffleSplit` iterator behaves as a combination of
:class:`ShuffleSplit` and :class:`LeavePGroupsOut`, and generates a
@@ -643,7 +643,7 @@ generated by :class:`LeavePGroupsOut`.
Predefined Fold-Splits / Validation-Sets
-========================================
+----------------------------------------
For some datasets, a pre-defined split of the data into training- and
validation fold or into several cross-validation folds already
@@ -656,7 +656,7 @@ samples that are part of the validation set, and to -1 for all other samples.
.. _timeseries_cv:
Cross validation of time series data
-====================================
+------------------------------------
Time series data is characterised by the correlation between observations
that are near in time (*autocorrelation*). However, classical
@@ -671,7 +671,7 @@ solution is provided by :class:`TimeSeriesSplit`.
Time Series Split
------------------
+^^^^^^^^^^^^^^^^^
:class:`TimeSeriesSplit` is a variation of *k-fold* which
returns first :math:`k` folds as train set and the :math:`(k+1)` th
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 386865d3d0a8a..62d566fe150ba 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -212,13 +212,12 @@ Then ``dual_coef_`` looks like this:
Scores and probabilities
------------------------
-The :class:`SVC` method ``decision_function`` gives per-class scores
-for each sample (or a single score per sample in the binary case).
-When the constructor option ``probability`` is set to ``True``,
-class membership probability estimates
-(from the methods ``predict_proba`` and ``predict_log_proba``) are enabled.
-In the binary case, the probabilities are calibrated using Platt scaling:
-logistic regression on the SVM's scores,
+The ``decision_function`` method of :class:`SVC` and :class:`NuSVC` gives
+per-class scores for each sample (or a single score per sample in the binary
+case). When the constructor option ``probability`` is set to ``True``,
+class membership probability estimates (from the methods ``predict_proba`` and
+``predict_log_proba``) are enabled. In the binary case, the probabilities are
+calibrated using Platt scaling: logistic regression on the SVM's scores,
fit by an additional cross-validation on the training data.
In the multiclass case, this is extended as per Wu et al. (2004).
@@ -245,7 +244,7 @@ and use ``decision_function`` instead of ``predict_proba``.
* Platt
`"Probabilistic outputs for SVMs and comparisons to regularized likelihood methods"
- `.
+ `_.
Unbalanced problems
--------------------
@@ -399,7 +398,7 @@ Tips on Practical Use
function can be configured to be almost the same as the :class:`LinearSVC`
model.
- * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`nuSVC` and
+ * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and
:class:`NuSVR`, the size of the kernel cache has a strong impact on run
times for larger problems. If you have enough RAM available, it is
recommended to set ``cache_size`` to a higher value than the default of
@@ -423,10 +422,24 @@ Tips on Practical Use
positive and few negative), set ``class_weight='balanced'`` and/or try
different penalty parameters ``C``.
- * The underlying :class:`LinearSVC` implementation uses a random
- number generator to select features when fitting the model. It is
- thus not uncommon, to have slightly different results for the same
- input data. If that happens, try with a smaller tol parameter.
+ * **Randomness of the underlying implementations**: The underlying
+ implementations of :class:`SVC` and :class:`NuSVC` use a random number
+ generator only to shuffle the data for probability estimation (when
+ ``probability`` is set to ``True``). This randomness can be controlled
+ with the ``random_state`` parameter. If ``probability`` is set to ``False``
+ these estimators are not random and ``random_state`` has no effect on the
+ results. The underlying :class:`OneClassSVM` implementation is similar to
+ the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation
+ is provided for :class:`OneClassSVM`, it is not random.
+
+ The underlying :class:`LinearSVC` implementation uses a random number
+ generator to select features when fitting the model with a dual coordinate
+ descent (i.e when ``dual`` is set to ``True``). It is thus not uncommon,
+ to have slightly different results for the same input data. If that
+ happens, try with a smaller tol parameter. This randomness can also be
+ controlled with the ``random_state`` parameter. When ``dual`` is
+ set to ``False`` the underlying implementation of :class:`LinearSVC` is
+ not random and ``random_state`` has no effect on the results.
* Using L1 penalization as provided by ``LinearSVC(loss='l2', penalty='l1',
dual=False)`` yields a sparse solution, i.e. only a subset of feature
diff --git a/doc/sphinxext/sphinx_gallery/__init__.py b/doc/sphinxext/sphinx_gallery/__init__.py
deleted file mode 100644
index e113f97d2a2c7..0000000000000
--- a/doc/sphinxext/sphinx_gallery/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Sphinx Gallery
-==============
-
-"""
-import os
-__version__ = '0.1.11'
-
-
-def glr_path_static():
- """Returns path to packaged static files"""
- return os.path.abspath(os.path.join(os.path.dirname(__file__), '_static'))
diff --git a/doc/sphinxext/sphinx_gallery/_static/broken_example.png b/doc/sphinxext/sphinx_gallery/_static/broken_example.png
deleted file mode 100644
index 4fea24e7df478..0000000000000
Binary files a/doc/sphinxext/sphinx_gallery/_static/broken_example.png and /dev/null differ
diff --git a/doc/sphinxext/sphinx_gallery/_static/gallery.css b/doc/sphinxext/sphinx_gallery/_static/gallery.css
deleted file mode 100644
index 37047a9b91175..0000000000000
--- a/doc/sphinxext/sphinx_gallery/_static/gallery.css
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
-Sphinx-Gallery has compatible CSS to fix default sphinx themes
-Tested for Sphinx 1.3.1 for all themes: default, alabaster, sphinxdoc,
-scrolls, agogo, traditional, nature, haiku, pyramid
-Tested for Read the Docs theme 0.1.7 */
-.sphx-glr-thumbcontainer {
- background: #fff;
- border: solid #fff 1px;
- -moz-border-radius: 5px;
- -webkit-border-radius: 5px;
- border-radius: 5px;
- box-shadow: none;
- float: left;
- margin: 5px;
- min-height: 230px;
- padding-top: 5px;
- position: relative;
-}
-.sphx-glr-thumbcontainer:hover {
- border: solid #b4ddfc 1px;
- box-shadow: 0 0 15px rgba(142, 176, 202, 0.5);
-}
-.sphx-glr-thumbcontainer a.internal {
- bottom: 0;
- display: block;
- left: 0;
- padding: 150px 10px 0;
- position: absolute;
- right: 0;
- top: 0;
-}
-/* Next one is to avoid Sphinx traditional theme to cover all the
-thumbnail with its default link Background color */
-.sphx-glr-thumbcontainer a.internal:hover {
- background-color: transparent;
-}
-
-.sphx-glr-thumbcontainer p {
- margin: 0 0 .1em 0;
-}
-.sphx-glr-thumbcontainer .figure {
- margin: 10px;
- width: 160px;
-}
-.sphx-glr-thumbcontainer img {
- display: inline;
- max-height: 160px;
- width: 160px;
-}
-.sphx-glr-thumbcontainer[tooltip]:hover:after {
- background: rgba(0, 0, 0, 0.8);
- -webkit-border-radius: 5px;
- -moz-border-radius: 5px;
- border-radius: 5px;
- color: #fff;
- content: attr(tooltip);
- left: 95%;
- padding: 5px 15px;
- position: absolute;
- z-index: 98;
- width: 220px;
- bottom: 52%;
-}
-.sphx-glr-thumbcontainer[tooltip]:hover:before {
- border: solid;
- border-color: #333 transparent;
- border-width: 18px 0 0 20px;
- bottom: 58%;
- content: '';
- left: 85%;
- position: absolute;
- z-index: 99;
-}
-
-.highlight-pytb pre {
- background-color: #ffe4e4;
- border: 1px solid #f66;
- margin-top: 10px;
- padding: 7px;
-}
-
-.sphx-glr-script-out {
- color: #888;
- margin: 0;
-}
-.sphx-glr-script-out .highlight {
- background-color: transparent;
- margin-left: 2.5em;
- margin-top: -1.4em;
-}
-.sphx-glr-script-out .highlight pre {
- background-color: #fafae2;
- border: 0;
- max-height: 30em;
- overflow: auto;
- padding-left: 1ex;
- margin: 0px;
- word-break: break-word;
-}
-.sphx-glr-script-out + p {
- margin-top: 1.8em;
-}
-blockquote.sphx-glr-script-out {
- margin-left: 0pt;
-}
-
-div.sphx-glr-footer {
- text-align: center;
-}
-
-div.sphx-glr-download {
- display: inline-block;
- margin: 1em auto 1ex 2ex;
- vertical-align: middle;
-}
-
-div.sphx-glr-download a {
- background-color: #ffc;
- background-image: linear-gradient(to bottom, #FFC, #d5d57e);
- border-radius: 4px;
- border: 1px solid #c2c22d;
- color: #000;
- display: inline-block;
- /* Not valid in old browser, hence we keep the line above to override */
- display: table-caption;
- font-weight: bold;
- padding: 1ex;
- text-align: center;
-}
-
-/* The last child of a download button is the file name */
-div.sphx-glr-download a span:last-child {
- font-size: smaller;
-}
-
-@media (min-width: 20em) {
- div.sphx-glr-download a {
- min-width: 10em;
- }
-}
-
-@media (min-width: 30em) {
- div.sphx-glr-download a {
- min-width: 13em;
- }
-}
-
-@media (min-width: 40em) {
- div.sphx-glr-download a {
- min-width: 16em;
- }
-}
-
-
-div.sphx-glr-download code.download {
- display: inline-block;
- white-space: normal;
- word-break: normal;
- overflow-wrap: break-word;
- /* border and background are given by the enclosing 'a' */
- border: none;
- background: none;
-}
-
-div.sphx-glr-download a:hover {
- box-shadow: inset 0 1px 0 rgba(255,255,255,.1), 0 1px 5px rgba(0,0,0,.25);
- text-decoration: none;
- background-image: none;
- background-color: #d5d57e;
-}
-
-ul.sphx-glr-horizontal {
- list-style: none;
- padding: 0;
-}
-ul.sphx-glr-horizontal li {
- display: inline;
-}
-ul.sphx-glr-horizontal img {
- height: auto !important;
-}
-
-p.sphx-glr-signature a.reference.external {
- -moz-border-radius: 5px;
- -webkit-border-radius: 5px;
- border-radius: 5px;
- padding: 3px;
- font-size: 75%;
- text-align: right;
- margin-left: auto;
- display: table;
-}
diff --git a/doc/sphinxext/sphinx_gallery/_static/no_image.png b/doc/sphinxext/sphinx_gallery/_static/no_image.png
deleted file mode 100644
index 8c2d48d5d3f00..0000000000000
Binary files a/doc/sphinxext/sphinx_gallery/_static/no_image.png and /dev/null differ
diff --git a/doc/sphinxext/sphinx_gallery/backreferences.py b/doc/sphinxext/sphinx_gallery/backreferences.py
deleted file mode 100644
index 32e4dd913f901..0000000000000
--- a/doc/sphinxext/sphinx_gallery/backreferences.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Backreferences Generator
-========================
-
-Parses example file code in order to keep track of used functions
-"""
-
-from __future__ import print_function
-import ast
-import os
-
-
-# Try Python 2 first, otherwise load from Python 3
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
-
-
-class NameFinder(ast.NodeVisitor):
- """Finds the longest form of variable names and their imports in code
-
- Only retains names from imported modules.
- """
-
- def __init__(self):
- super(NameFinder, self).__init__()
- self.imported_names = {}
- self.accessed_names = set()
-
- def visit_Import(self, node, prefix=''):
- for alias in node.names:
- local_name = alias.asname or alias.name
- self.imported_names[local_name] = prefix + alias.name
-
- def visit_ImportFrom(self, node):
- self.visit_Import(node, node.module + '.')
-
- def visit_Name(self, node):
- self.accessed_names.add(node.id)
-
- def visit_Attribute(self, node):
- attrs = []
- while isinstance(node, ast.Attribute):
- attrs.append(node.attr)
- node = node.value
-
- if isinstance(node, ast.Name):
- # This is a.b, not e.g. a().b
- attrs.append(node.id)
- self.accessed_names.add('.'.join(reversed(attrs)))
- else:
- # need to get a in a().b
- self.visit(node)
-
- def get_mapping(self):
- for name in self.accessed_names:
- local_name = name.split('.', 1)[0]
- remainder = name[len(local_name):]
- if local_name in self.imported_names:
- # Join import path to relative path
- full_name = self.imported_names[local_name] + remainder
- yield name, full_name
-
-
-def get_short_module_name(module_name, obj_name):
- """ Get the shortest possible module name """
- parts = module_name.split('.')
- short_name = module_name
- for i in range(len(parts) - 1, 0, -1):
- short_name = '.'.join(parts[:i])
- try:
- exec('from %s import %s' % (short_name, obj_name))
- except Exception: # libraries can throw all sorts of exceptions...
- # get the last working module name
- short_name = '.'.join(parts[:(i + 1)])
- break
- return short_name
-
-
-def identify_names(code):
- """Builds a codeobj summary by identifying and resolving used names
-
- >>> code = '''
- ... from a.b import c
- ... import d as e
- ... print(c)
- ... e.HelloWorld().f.g
- ... '''
- >>> for name, o in sorted(identify_names(code).items()):
- ... print(name, o['name'], o['module'], o['module_short'])
- c c a.b a.b
- e.HelloWorld HelloWorld d d
- """
- finder = NameFinder()
- try:
- finder.visit(ast.parse(code))
- except SyntaxError:
- return {}
-
- example_code_obj = {}
- for name, full_name in finder.get_mapping():
- # name is as written in file (e.g. np.asarray)
- # full_name includes resolved import path (e.g. numpy.asarray)
- splitted = full_name.rsplit('.', 1)
- if len(splitted) == 1:
- # module without attribute. This is not useful for
- # backreferences
- continue
-
- module, attribute = splitted
- # get shortened module name
- module_short = get_short_module_name(module, attribute)
- cobj = {'name': attribute, 'module': module,
- 'module_short': module_short}
- example_code_obj[name] = cobj
- return example_code_obj
-
-
-def scan_used_functions(example_file, gallery_conf):
- """save variables so we can later add links to the documentation"""
- example_code_obj = identify_names(open(example_file).read())
- if example_code_obj:
- codeobj_fname = example_file[:-3] + '_codeobj.pickle'
- with open(codeobj_fname, 'wb') as fid:
- pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)
-
- backrefs = set('{module_short}.{name}'.format(**entry)
- for entry in example_code_obj.values()
- if entry['module'].startswith(gallery_conf['doc_module']))
-
- return backrefs
-
-
-THUMBNAIL_TEMPLATE = """
-.. raw:: html
-
-
-
-.. only:: html
-
- .. figure:: /{thumbnail}
-
- :ref:`sphx_glr_{ref_name}`
-
-.. raw:: html
-
-
-"""
-
-BACKREF_THUMBNAIL_TEMPLATE = THUMBNAIL_TEMPLATE + """
-.. only:: not html
-
- * :ref:`sphx_glr_{ref_name}`
-"""
-
-
-def _thumbnail_div(full_dir, fname, snippet, is_backref=False):
- """Generates RST to place a thumbnail in a gallery"""
- thumb = os.path.join(full_dir, 'images', 'thumb',
- 'sphx_glr_%s_thumb.png' % fname[:-3])
-
- # Inside rst files forward slash defines paths
- thumb = thumb.replace(os.sep, "/")
-
- ref_name = os.path.join(full_dir, fname).replace(os.path.sep, '_')
-
- template = BACKREF_THUMBNAIL_TEMPLATE if is_backref else THUMBNAIL_TEMPLATE
- return template.format(snippet=snippet, thumbnail=thumb, ref_name=ref_name)
-
-
-def write_backreferences(seen_backrefs, gallery_conf,
- target_dir, fname, snippet):
- """Writes down back reference files, which include a thumbnail list
- of examples using a certain module"""
- if gallery_conf['backreferences_dir'] is None:
- return
-
- example_file = os.path.join(target_dir, fname)
- build_target_dir = os.path.relpath(target_dir, gallery_conf['src_dir'])
- backrefs = scan_used_functions(example_file, gallery_conf)
- for backref in backrefs:
- include_path = os.path.join(gallery_conf['src_dir'],
- gallery_conf['backreferences_dir'],
- '%s.examples' % backref)
- seen = backref in seen_backrefs
- with open(include_path, 'a' if seen else 'w') as ex_file:
- if not seen:
- heading = '\n\nExamples using ``%s``' % backref
- ex_file.write(heading + '\n')
- ex_file.write('^' * len(heading) + '\n')
- ex_file.write(_thumbnail_div(build_target_dir, fname, snippet,
- is_backref=True))
- seen_backrefs.add(backref)
diff --git a/doc/sphinxext/sphinx_gallery/docs_resolv.py b/doc/sphinxext/sphinx_gallery/docs_resolv.py
deleted file mode 100644
index 0f9943b683d1c..0000000000000
--- a/doc/sphinxext/sphinx_gallery/docs_resolv.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Link resolver objects
-=====================
-"""
-from __future__ import print_function
-import gzip
-import os
-import posixpath
-import re
-import shelve
-import sys
-
-from sphinx.util.console import fuchsia
-
-# Try Python 2 first, otherwise load from Python 3
-try:
- import cPickle as pickle
- import urllib2 as urllib
- from urllib2 import HTTPError, URLError
-except ImportError:
- import pickle
- import urllib.request
- import urllib.error
- import urllib.parse
- from urllib.error import HTTPError, URLError
-
-from io import StringIO
-
-
-def _get_data(url):
- """Helper function to get data over http or from a local file"""
- if url.startswith('http://'):
- # Try Python 2, use Python 3 on exception
- try:
- resp = urllib.urlopen(url)
- encoding = resp.headers.dict.get('content-encoding', 'plain')
- except AttributeError:
- resp = urllib.request.urlopen(url)
- encoding = resp.headers.get('content-encoding', 'plain')
- data = resp.read()
- if encoding == 'plain':
- pass
- elif encoding == 'gzip':
- data = StringIO(data)
- data = gzip.GzipFile(fileobj=data).read()
- else:
- raise RuntimeError('unknown encoding')
- else:
- with open(url, 'r') as fid:
- data = fid.read()
-
- return data
-
-
-def get_data(url, gallery_dir):
- """Persistent dictionary usage to retrieve the search indexes"""
-
- # shelve keys need to be str in python 2
- if sys.version_info[0] == 2 and isinstance(url, unicode):
- url = url.encode('utf-8')
-
- cached_file = os.path.join(gallery_dir, 'searchindex')
- search_index = shelve.open(cached_file)
- if url in search_index:
- data = search_index[url]
- else:
- data = _get_data(url)
- search_index[url] = data
- search_index.close()
-
- return data
-
-
-def _select_block(str_in, start_tag, end_tag):
- """Select first block delimited by start_tag and end_tag"""
- start_pos = str_in.find(start_tag)
- if start_pos < 0:
- raise ValueError('start_tag not found')
- depth = 0
- for pos in range(start_pos, len(str_in)):
- if str_in[pos] == start_tag:
- depth += 1
- elif str_in[pos] == end_tag:
- depth -= 1
-
- if depth == 0:
- break
- sel = str_in[start_pos + 1:pos]
- return sel
-
-
-def _parse_dict_recursive(dict_str):
- """Parse a dictionary from the search index"""
- dict_out = dict()
- pos_last = 0
- pos = dict_str.find(':')
- while pos >= 0:
- key = dict_str[pos_last:pos]
- if dict_str[pos + 1] == '[':
- # value is a list
- pos_tmp = dict_str.find(']', pos + 1)
- if pos_tmp < 0:
- raise RuntimeError('error when parsing dict')
- value = dict_str[pos + 2: pos_tmp].split(',')
- # try to convert elements to int
- for i in range(len(value)):
- try:
- value[i] = int(value[i])
- except ValueError:
- pass
- elif dict_str[pos + 1] == '{':
- # value is another dictionary
- subdict_str = _select_block(dict_str[pos:], '{', '}')
- value = _parse_dict_recursive(subdict_str)
- pos_tmp = pos + len(subdict_str)
- else:
- raise ValueError('error when parsing dict: unknown elem')
-
- key = key.strip('"')
- if len(key) > 0:
- dict_out[key] = value
-
- pos_last = dict_str.find(',', pos_tmp)
- if pos_last < 0:
- break
- pos_last += 1
- pos = dict_str.find(':', pos_last)
-
- return dict_out
-
-
-def parse_sphinx_searchindex(searchindex):
- """Parse a Sphinx search index
-
- Parameters
- ----------
- searchindex : str
- The Sphinx search index (contents of searchindex.js)
-
- Returns
- -------
- filenames : list of str
- The file names parsed from the search index.
- objects : dict
- The objects parsed from the search index.
- """
- # Make sure searchindex uses UTF-8 encoding
- if hasattr(searchindex, 'decode'):
- searchindex = searchindex.decode('UTF-8')
-
- # parse objects
- query = 'objects:'
- pos = searchindex.find(query)
- if pos < 0:
- raise ValueError('"objects:" not found in search index')
-
- sel = _select_block(searchindex[pos:], '{', '}')
- objects = _parse_dict_recursive(sel)
-
- # parse filenames
- query = 'filenames:'
- pos = searchindex.find(query)
- if pos < 0:
- raise ValueError('"filenames:" not found in search index')
- filenames = searchindex[pos + len(query) + 1:]
- filenames = filenames[:filenames.find(']')]
- filenames = [f.strip('"') for f in filenames.split(',')]
-
- return filenames, objects
-
-
-class SphinxDocLinkResolver(object):
- """ Resolve documentation links using searchindex.js generated by Sphinx
-
- Parameters
- ----------
- doc_url : str
- The base URL of the project website.
- searchindex : str
- Filename of searchindex, relative to doc_url.
- extra_modules_test : list of str
- List of extra module names to test.
- relative : bool
- Return relative links (only useful for links to documentation of this
- package).
- """
-
- def __init__(self, doc_url, gallery_dir, searchindex='searchindex.js',
- extra_modules_test=None, relative=False):
- self.doc_url = doc_url
- self.gallery_dir = gallery_dir
- self.relative = relative
- self._link_cache = {}
-
- self.extra_modules_test = extra_modules_test
- self._page_cache = {}
- if doc_url.startswith('http://'):
- if relative:
- raise ValueError('Relative links are only supported for local '
- 'URLs (doc_url cannot start with "http://)"')
- searchindex_url = doc_url + '/' + searchindex
- else:
- searchindex_url = os.path.join(doc_url, searchindex)
-
- # detect if we are using relative links on a Windows system
- if os.name.lower() == 'nt' and not doc_url.startswith('http://'):
- if not relative:
- raise ValueError('You have to use relative=True for the local'
- ' package on a Windows system.')
- self._is_windows = True
- else:
- self._is_windows = False
-
- # download and initialize the search index
- sindex = get_data(searchindex_url, gallery_dir)
- filenames, objects = parse_sphinx_searchindex(sindex)
-
- self._searchindex = dict(filenames=filenames, objects=objects)
-
- def _get_link(self, cobj):
- """Get a valid link, False if not found"""
-
- fname_idx = None
- full_name = cobj['module_short'] + '.' + cobj['name']
- if full_name in self._searchindex['objects']:
- value = self._searchindex['objects'][full_name]
- if isinstance(value, dict):
- value = value[next(iter(value.keys()))]
- fname_idx = value[0]
- elif cobj['module_short'] in self._searchindex['objects']:
- value = self._searchindex['objects'][cobj['module_short']]
- if cobj['name'] in value.keys():
- fname_idx = value[cobj['name']][0]
-
- if fname_idx is not None:
- fname = self._searchindex['filenames'][fname_idx]
- # In 1.5+ Sphinx seems to have changed from .rst.html to only
- # .html extension in converted files. But URLs could be
- # built with < 1.5 or >= 1.5 regardless of what we're currently
- # building with, so let's just check both :(
- fnames = [fname + '.html', os.path.splitext(fname)[0] + '.html']
- for fname in fnames:
- try:
- if self._is_windows:
- fname = fname.replace('/', '\\')
- link = os.path.join(self.doc_url, fname)
- else:
- link = posixpath.join(self.doc_url, fname)
-
- if hasattr(link, 'decode'):
- link = link.decode('utf-8', 'replace')
-
- if link in self._page_cache:
- html = self._page_cache[link]
- else:
- html = get_data(link, self.gallery_dir)
- self._page_cache[link] = html
- except (HTTPError, URLError, IOError):
- pass
- else:
- break
- else:
- raise
-
- # test if cobj appears in page
- comb_names = [cobj['module_short'] + '.' + cobj['name']]
- if self.extra_modules_test is not None:
- for mod in self.extra_modules_test:
- comb_names.append(mod + '.' + cobj['name'])
- url = False
- if hasattr(html, 'decode'):
- # Decode bytes under Python 3
- html = html.decode('utf-8', 'replace')
-
- for comb_name in comb_names:
- if hasattr(comb_name, 'decode'):
- # Decode bytes under Python 3
- comb_name = comb_name.decode('utf-8', 'replace')
- if comb_name in html:
- url = link + u'#' + comb_name
- link = url
- else:
- link = False
-
- return link
-
- def resolve(self, cobj, this_url):
- """Resolve the link to the documentation, returns None if not found
-
- Parameters
- ----------
- cobj : dict
- Dict with information about the "code object" for which we are
- resolving a link.
- cobj['name'] : function or class name (str)
- cobj['module_short'] : shortened module name (str)
- cobj['module'] : module name (str)
- this_url: str
- URL of the current page. Needed to construct relative URLs
- (only used if relative=True in constructor).
-
- Returns
- -------
- link : str | None
- The link (URL) to the documentation.
- """
- full_name = cobj['module_short'] + '.' + cobj['name']
- link = self._link_cache.get(full_name, None)
- if link is None:
- # we don't have it cached
- link = self._get_link(cobj)
- # cache it for the future
- self._link_cache[full_name] = link
-
- if link is False or link is None:
- # failed to resolve
- return None
-
- if self.relative:
- link = os.path.relpath(link, start=this_url)
- if self._is_windows:
- # replace '\' with '/' so it on the web
- link = link.replace('\\', '/')
-
- # for some reason, the relative link goes one directory too high up
- link = link[3:]
-
- return link
-
-
-def _embed_code_links(app, gallery_conf, gallery_dir):
- # Add resolvers for the packages for which we want to show links
- doc_resolvers = {}
-
- src_gallery_dir = os.path.join(app.builder.srcdir, gallery_dir)
- for this_module, url in gallery_conf['reference_url'].items():
- try:
- if url is None:
- doc_resolvers[this_module] = SphinxDocLinkResolver(
- app.builder.outdir,
- src_gallery_dir,
- relative=True)
- else:
- doc_resolvers[this_module] = SphinxDocLinkResolver(url,
- src_gallery_dir)
-
- except HTTPError as e:
- print("The following HTTP Error has occurred:\n")
- print(e.code)
- except URLError as e:
- print("\n...\n"
- "Warning: Embedding the documentation hyperlinks requires "
- "Internet access.\nPlease check your network connection.\n"
- "Unable to continue embedding `{0}` links due to a URL "
- "Error:\n".format(this_module))
- print(e.args)
-
- html_gallery_dir = os.path.abspath(os.path.join(app.builder.outdir,
- gallery_dir))
-
- # patterns for replacement
- link_pattern = ('%s')
- orig_pattern = '%s'
- period = '.'
-
- # This could be turned into a generator if necessary, but should be okay
- flat = [[dirpath, filename]
- for dirpath, _, filenames in os.walk(html_gallery_dir)
- for filename in filenames]
- iterator = app.status_iterator(
- flat, os.path.basename(html_gallery_dir), colorfunc=fuchsia,
- length=len(flat), stringify_func=lambda x: os.path.basename(x[1]))
- for dirpath, fname in iterator:
- full_fname = os.path.join(html_gallery_dir, dirpath, fname)
- subpath = dirpath[len(html_gallery_dir) + 1:]
- pickle_fname = os.path.join(src_gallery_dir, subpath,
- fname[:-5] + '_codeobj.pickle')
-
- if os.path.exists(pickle_fname):
- # we have a pickle file with the objects to embed links for
- with open(pickle_fname, 'rb') as fid:
- example_code_obj = pickle.load(fid)
- fid.close()
- str_repl = {}
- # generate replacement strings with the links
- for name, cobj in example_code_obj.items():
- this_module = cobj['module'].split('.')[0]
-
- if this_module not in doc_resolvers:
- continue
-
- try:
- link = doc_resolvers[this_module].resolve(cobj,
- full_fname)
- except (HTTPError, URLError) as e:
- if isinstance(e, HTTPError):
- extra = e.code
- else:
- extra = e.reason
- print("\n\t\tError resolving %s.%s: %r (%s)"
- % (cobj['module'], cobj['name'], e, extra))
- continue
-
- if link is not None:
- parts = name.split('.')
- name_html = period.join(orig_pattern % part
- for part in parts)
- full_function_name = '%s.%s' % (
- cobj['module'], cobj['name'])
- str_repl[name_html] = link_pattern % (
- link, full_function_name, name_html)
- # do the replacement in the html file
-
- # ensure greediness
- names = sorted(str_repl, key=len, reverse=True)
- regex_str = '|'.join(re.escape(name) for name in names)
- regex = re.compile(regex_str)
-
- def substitute_link(match):
- return str_repl[match.group()]
-
- if len(str_repl) > 0:
- with open(full_fname, 'rb') as fid:
- lines_in = fid.readlines()
- with open(full_fname, 'wb') as fid:
- for line in lines_in:
- line = line.decode('utf-8')
- line = regex.sub(substitute_link, line)
- fid.write(line.encode('utf-8'))
-
-
-def embed_code_links(app, exception):
- """Embed hyperlinks to documentation into example code"""
- if exception is not None:
- return
-
- # No need to waste time embedding hyperlinks when not running the examples
- # XXX: also at the time of writing this fixes make html-noplot
- # for some reason I don't fully understand
- if not app.builder.config.plot_gallery:
- return
-
- # XXX: Whitelist of builders for which it makes sense to embed
- # hyperlinks inside the example html. Note that the link embedding
- # require searchindex.js to exist for the links to the local doc
- # and there does not seem to be a good way of knowing which
- # builders creates a searchindex.js.
- if app.builder.name not in ['html', 'readthedocs']:
- return
-
- print('Embedding documentation hyperlinks in examples..')
-
- gallery_conf = app.config.sphinx_gallery_conf
-
- gallery_dirs = gallery_conf['gallery_dirs']
- if not isinstance(gallery_dirs, list):
- gallery_dirs = [gallery_dirs]
-
- for gallery_dir in gallery_dirs:
- _embed_code_links(app, gallery_conf, gallery_dir)
diff --git a/doc/sphinxext/sphinx_gallery/downloads.py b/doc/sphinxext/sphinx_gallery/downloads.py
deleted file mode 100644
index 6b5b3df17fc87..0000000000000
--- a/doc/sphinxext/sphinx_gallery/downloads.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Utilities for downloadable items
-================================
-
-"""
-# Author: Óscar Nájera
-# License: 3-clause BSD
-
-from __future__ import absolute_import, division, print_function
-
-import os
-import zipfile
-
-CODE_DOWNLOAD = """
-\n.. container:: sphx-glr-footer
-
-\n .. container:: sphx-glr-download
-
- :download:`Download Python source code: {0} <{0}>`\n
-
-\n .. container:: sphx-glr-download
-
- :download:`Download Jupyter notebook: {1} <{1}>`\n"""
-
-CODE_ZIP_DOWNLOAD = """
-\n.. container:: sphx-glr-footer
-
-\n .. container:: sphx-glr-download
-
- :download:`Download all examples in Python source code: {0} {1}>`\n
-
-\n .. container:: sphx-glr-download
-
- :download:`Download all examples in Jupyter notebooks: {2} {3}>`\n"""
-
-
-def python_zip(file_list, gallery_path, extension='.py'):
- """Stores all files in file_list into an zip file
-
- Parameters
- ----------
- file_list : list of strings
- Holds all the file names to be included in zip file
- gallery_path : string
- path to where the zipfile is stored
- extension : str
- '.py' or '.ipynb' In order to deal with downloads of python
- sources and jupyter notebooks the file extension from files in
- file_list will be removed and replace with the value of this
- variable while generating the zip file
- Returns
- -------
- zipname : string
- zip file name, written as `target_dir_{python,jupyter}.zip`
- depending on the extension
- """
- zipname = os.path.basename(gallery_path)
- zipname += '_python' if extension == '.py' else '_jupyter'
- zipname = os.path.join(gallery_path, zipname + '.zip')
-
- zipf = zipfile.ZipFile(zipname, mode='w')
- for fname in file_list:
- file_src = os.path.splitext(fname)[0] + extension
- zipf.write(file_src, os.path.relpath(file_src, gallery_path))
- zipf.close()
-
- return zipname
-
-
-def list_downloadable_sources(target_dir):
- """Returns a list of python source files is target_dir
-
- Parameters
- ----------
- target_dir : string
- path to the directory where python source file are
- Returns
- -------
- list
- list of paths to all Python source files in `target_dir`
- """
- return [os.path.join(target_dir, fname)
- for fname in os.listdir(target_dir)
- if fname.endswith('.py')]
-
-
-def generate_zipfiles(gallery_dir):
- """
- Collects all Python source files and Jupyter notebooks in
- gallery_dir and makes zipfiles of them
-
- Parameters
- ----------
- gallery_dir : string
- path of the gallery to collect downloadable sources
-
- Return
- ------
- download_rst: string
- RestructuredText to include download buttons to the generated files
- """
-
- listdir = list_downloadable_sources(gallery_dir)
- for directory in sorted(os.listdir(gallery_dir)):
- if os.path.isdir(os.path.join(gallery_dir, directory)):
- target_dir = os.path.join(gallery_dir, directory)
- listdir.extend(list_downloadable_sources(target_dir))
-
- py_zipfile = python_zip(listdir, gallery_dir)
- jy_zipfile = python_zip(listdir, gallery_dir, ".ipynb")
-
- def rst_path(filepath):
- return filepath.replace(os.sep, '/')
-
- dw_rst = CODE_ZIP_DOWNLOAD.format(os.path.basename(py_zipfile),
- rst_path(py_zipfile),
- os.path.basename(jy_zipfile),
- rst_path(jy_zipfile))
- return dw_rst
diff --git a/doc/sphinxext/sphinx_gallery/gen_gallery.py b/doc/sphinxext/sphinx_gallery/gen_gallery.py
deleted file mode 100644
index 1a1ce299fab1c..0000000000000
--- a/doc/sphinxext/sphinx_gallery/gen_gallery.py
+++ /dev/null
@@ -1,304 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Sphinx-Gallery Generator
-========================
-
-Attaches Sphinx-Gallery to Sphinx in order to generate the galleries
-when building the documentation.
-"""
-
-
-from __future__ import division, print_function, absolute_import
-import copy
-import re
-import os
-
-from . import glr_path_static
-from .gen_rst import generate_dir_rst, SPHX_GLR_SIG
-from .docs_resolv import embed_code_links
-from .downloads import generate_zipfiles
-
-try:
- FileNotFoundError
-except NameError:
- # Python2
- FileNotFoundError = IOError
-
-DEFAULT_GALLERY_CONF = {
- 'filename_pattern': re.escape(os.sep) + 'plot',
- 'examples_dirs': os.path.join('..', 'examples'),
- 'gallery_dirs': 'auto_examples',
- 'backreferences_dir': None,
- 'doc_module': (),
- 'reference_url': {},
- # build options
- 'plot_gallery': True,
- 'download_all_examples': True,
- 'abort_on_example_error': False,
- 'failing_examples': {},
- 'expected_failing_examples': set(),
-}
-
-
-def clean_gallery_out(build_dir):
- """Deletes images under the sphx_glr namespace in the build directory"""
- # Sphinx hack: sphinx copies generated images to the build directory
- # each time the docs are made. If the desired image name already
- # exists, it appends a digit to prevent overwrites. The problem is,
- # the directory is never cleared. This means that each time you build
- # the docs, the number of images in the directory grows.
- #
- # This question has been asked on the sphinx development list, but there
- # was no response: http://osdir.com/ml/sphinx-dev/2011-02/msg00123.html
- #
- # The following is a hack that prevents this behavior by clearing the
- # image build directory from gallery images each time the docs are built.
- # If sphinx changes their layout between versions, this will not
- # work (though it should probably not cause a crash).
- # Tested successfully on Sphinx 1.0.7
-
- build_image_dir = os.path.join(build_dir, '_images')
- if os.path.exists(build_image_dir):
- filelist = os.listdir(build_image_dir)
- for filename in filelist:
- if filename.startswith('sphx_glr') and filename.endswith('png'):
- os.remove(os.path.join(build_image_dir, filename))
-
-
-def parse_config(app):
- """Process the Sphinx Gallery configuration"""
- # TODO: Test this behavior.
- try:
- plot_gallery = eval(app.builder.config.plot_gallery)
- except TypeError:
- plot_gallery = bool(app.builder.config.plot_gallery)
-
- gallery_conf = copy.deepcopy(DEFAULT_GALLERY_CONF)
- gallery_conf.update(app.config.sphinx_gallery_conf)
- gallery_conf.update(plot_gallery=plot_gallery)
- gallery_conf.update(
- abort_on_example_error=app.builder.config.abort_on_example_error)
- gallery_conf['src_dir'] = app.builder.srcdir
-
- backreferences_warning = """\n========
-Sphinx-Gallery now requires you to set the configuration variable
-'backreferences_dir' in your config to activate the
-backreferences. That is mini galleries clustered by the functions used
-in the example scripts. Have a look at it in sphinx-gallery
-
-https://sphinx-gallery.readthedocs.io/en/stable/index.html#examples-using-numpy-linspace
-"""
-
- if gallery_conf.get("mod_example_dir", False):
- update_msg = """\nFor a quick fix try replacing 'mod_example_dir'
-by 'backreferences_dir' in your conf.py file. If that does not solve the
-present issue read carefully how to update in the online documentation
-
-https://sphinx-gallery.readthedocs.io/en/latest/advanced_configuration.html#references-to-examples"""
-
- gallery_conf['backreferences_dir'] = gallery_conf['mod_example_dir']
- app.warn("Old configuration for backreferences detected \n"
- "using the configuration variable `mod_example_dir`\n"
- + backreferences_warning
- + update_msg, prefix="DeprecationWarning: ")
-
- elif gallery_conf['backreferences_dir'] is None:
- no_care_msg = """
-If you don't care about this features set in your conf.py
-'backreferences_dir': False\n"""
-
- app.warn(backreferences_warning + no_care_msg)
-
- gallery_conf['backreferences_dir'] = os.path.join(
- 'modules', 'generated')
- app.warn("using old default 'backreferences_dir':'{}'.\n"
- " This will be disabled in future releases\n".format(
- gallery_conf['backreferences_dir']),
- prefix="DeprecationWarning: ")
-
- # this assures I can call the config in other places
- app.config.sphinx_gallery_conf = gallery_conf
- app.config.html_static_path.append(glr_path_static())
-
- return gallery_conf
-
-
-def _prepare_sphx_glr_dirs(gallery_conf, srcdir):
- """Creates necessary folders for sphinx_gallery files """
- examples_dirs = gallery_conf['examples_dirs']
- gallery_dirs = gallery_conf['gallery_dirs']
-
- if not isinstance(examples_dirs, list):
- examples_dirs = [examples_dirs]
- if not isinstance(gallery_dirs, list):
- gallery_dirs = [gallery_dirs]
-
- if bool(gallery_conf['backreferences_dir']):
- backreferences_dir = os.path.join(
- srcdir, gallery_conf['backreferences_dir'])
- if not os.path.exists(backreferences_dir):
- os.makedirs(backreferences_dir)
-
- return examples_dirs, gallery_dirs
-
-
-def generate_gallery_rst(app):
- """Generate the Main examples gallery reStructuredText
-
- Start the sphinx-gallery configuration and recursively scan the examples
- directories in order to populate the examples gallery
- """
- print('Generating gallery')
- gallery_conf = parse_config(app)
-
- clean_gallery_out(app.builder.outdir)
-
- seen_backrefs = set()
-
- computation_times = []
- examples_dirs, gallery_dirs = _prepare_sphx_glr_dirs(gallery_conf,
- app.builder.srcdir)
-
- for examples_dir, gallery_dir in zip(examples_dirs, gallery_dirs):
- examples_dir = os.path.join(app.builder.srcdir, examples_dir)
- gallery_dir = os.path.join(app.builder.srcdir, gallery_dir)
-
- for workdir in [examples_dir, gallery_dir]:
- if not os.path.exists(workdir):
- os.makedirs(workdir)
- # Here we don't use an os.walk, but we recurse only twice: flat is
- # better than nested.
- this_fhindex, this_computation_times = generate_dir_rst(
- examples_dir, gallery_dir, gallery_conf, seen_backrefs)
- if this_fhindex == "":
- raise FileNotFoundError("Main example directory {0} does not "
- "have a README.txt file. Please write "
- "one to introduce your gallery."
- .format(examples_dir))
-
- computation_times += this_computation_times
-
- # we create an index.rst with all examples
- fhindex = open(os.path.join(gallery_dir, 'index.rst'), 'w')
- # :orphan: to suppress "not included in TOCTREE" sphinx warnings
- fhindex.write(":orphan:\n\n" + this_fhindex)
- for directory in sorted(os.listdir(examples_dir)):
- if os.path.isdir(os.path.join(examples_dir, directory)):
- src_dir = os.path.join(examples_dir, directory)
- target_dir = os.path.join(gallery_dir, directory)
- this_fhindex, this_computation_times = generate_dir_rst(src_dir, target_dir, gallery_conf,
- seen_backrefs)
- fhindex.write(this_fhindex)
- computation_times += this_computation_times
-
- if gallery_conf['download_all_examples']:
- download_fhindex = generate_zipfiles(gallery_dir)
- fhindex.write(download_fhindex)
-
- fhindex.write(SPHX_GLR_SIG)
- fhindex.flush()
-
- if gallery_conf['plot_gallery']:
- print("Computation time summary:")
- for time_elapsed, fname in sorted(computation_times)[::-1]:
- if time_elapsed is not None:
- print("\t- %s : %.2g sec" % (fname, time_elapsed))
- else:
- print("\t- %s : not run" % fname)
-
-
-def touch_empty_backreferences(app, what, name, obj, options, lines):
- """Generate empty back-reference example files
-
- This avoids inclusion errors/warnings if there are no gallery
- examples for a class / module that is being parsed by autodoc"""
-
- if not bool(app.config.sphinx_gallery_conf['backreferences_dir']):
- return
-
- examples_path = os.path.join(app.srcdir,
- app.config.sphinx_gallery_conf[
- "backreferences_dir"],
- "%s.examples" % name)
-
- if not os.path.exists(examples_path):
- # touch file
- open(examples_path, 'w').close()
-
-
-def sumarize_failing_examples(app, exception):
- """Collects the list of falling examples during build and prints them with the traceback
-
- Raises ValueError if there where failing examples
- """
- if exception is not None:
- return
-
- # Under no-plot Examples are not run so nothing to summarize
- if not app.config.sphinx_gallery_conf['plot_gallery']:
- return
-
- gallery_conf = app.config.sphinx_gallery_conf
- failing_examples = set(gallery_conf['failing_examples'].keys())
- expected_failing_examples = set([os.path.normpath(os.path.join(app.srcdir, path))
- for path in
- gallery_conf['expected_failing_examples']])
-
- examples_expected_to_fail = failing_examples.intersection(
- expected_failing_examples)
- expected_fail_msg = []
- if examples_expected_to_fail:
- expected_fail_msg.append("\n\nExamples failing as expected:")
- for fail_example in examples_expected_to_fail:
- expected_fail_msg.append(fail_example + ' failed leaving traceback:\n' +
- gallery_conf['failing_examples'][fail_example] + '\n')
- print("\n".join(expected_fail_msg))
-
- examples_not_expected_to_fail = failing_examples.difference(
- expected_failing_examples)
- fail_msgs = []
- if examples_not_expected_to_fail:
- fail_msgs.append("Unexpected failing examples:")
- for fail_example in examples_not_expected_to_fail:
- fail_msgs.append(fail_example + ' failed leaving traceback:\n' +
- gallery_conf['failing_examples'][fail_example] + '\n')
-
- examples_not_expected_to_pass = expected_failing_examples.difference(
- failing_examples)
- if examples_not_expected_to_pass:
- fail_msgs.append("Examples expected to fail, but not failling:\n" +
- "Please remove these examples from\n" +
- "sphinx_gallery_conf['expected_failing_examples']\n" +
- "in your conf.py file"
- "\n".join(examples_not_expected_to_pass))
-
- if fail_msgs:
- raise ValueError("Here is a summary of the problems encountered when "
- "running the examples\n\n" + "\n".join(fail_msgs) +
- "\n" + "-" * 79)
-
-
-def get_default_config_value(key):
- def default_getter(conf):
- return conf['sphinx_gallery_conf'].get(key, DEFAULT_GALLERY_CONF[key])
- return default_getter
-
-
-def setup(app):
- """Setup sphinx-gallery sphinx extension"""
- app.add_config_value('sphinx_gallery_conf', DEFAULT_GALLERY_CONF, 'html')
- for key in ['plot_gallery', 'abort_on_example_error']:
- app.add_config_value(key, get_default_config_value(key), 'html')
-
- app.add_stylesheet('gallery.css')
- # Sphinx < 1.6 calls it `_extensions`, >= 1.6 is `extensions`.
- extensions_attr = '_extensions' if hasattr(app, '_extensions') else 'extensions'
- if 'sphinx.ext.autodoc' in getattr(app, extensions_attr):
- app.connect('autodoc-process-docstring', touch_empty_backreferences)
-
- app.connect('builder-inited', generate_gallery_rst)
-
- app.connect('build-finished', sumarize_failing_examples)
- app.connect('build-finished', embed_code_links)
diff --git a/doc/sphinxext/sphinx_gallery/gen_rst.py b/doc/sphinxext/sphinx_gallery/gen_rst.py
deleted file mode 100644
index c2a0b95545499..0000000000000
--- a/doc/sphinxext/sphinx_gallery/gen_rst.py
+++ /dev/null
@@ -1,641 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-RST file generator
-==================
-
-Generate the rst files for the examples by iterating over the python
-example files.
-
-Files that generate images should start with 'plot'
-
-"""
-# Don't use unicode_literals here (be explicit with u"..." instead) otherwise
-# tricky errors come up with exec(code_blocks, ...) calls
-from __future__ import division, print_function, absolute_import
-from time import time
-import codecs
-import hashlib
-import os
-import re
-import shutil
-import subprocess
-import sys
-import traceback
-import warnings
-
-
-# Try Python 2 first, otherwise load from Python 3
-try:
- # textwrap indent only exists in python 3
- from textwrap import indent
-except ImportError:
- def indent(text, prefix, predicate=None):
- """Adds 'prefix' to the beginning of selected lines in 'text'.
-
- If 'predicate' is provided, 'prefix' will only be added to the lines
- where 'predicate(line)' is True. If 'predicate' is not provided,
- it will default to adding 'prefix' to all non-empty lines that do not
- consist solely of whitespace characters.
- """
- if predicate is None:
- def predicate(line):
- return line.strip()
-
- def prefixed_lines():
- for line in text.splitlines(True):
- yield (prefix + line if predicate(line) else line)
- return ''.join(prefixed_lines())
-
-from io import StringIO
-
-# make sure that the Agg backend is set before importing any
-# matplotlib
-import matplotlib
-matplotlib.use('agg')
-matplotlib_backend = matplotlib.get_backend()
-
-if matplotlib_backend != 'agg':
- mpl_backend_msg = (
- "Sphinx-Gallery relies on the matplotlib 'agg' backend to "
- "render figures and write them to files. You are "
- "currently using the {} backend. Sphinx-Gallery will "
- "terminate the build now, because changing backends is "
- "not well supported by matplotlib. We advise you to move "
- "sphinx_gallery imports before any matplotlib-dependent "
- "import. Moving sphinx_gallery imports at the top of "
- "your conf.py file should fix this issue")
-
- raise ValueError(mpl_backend_msg.format(matplotlib_backend))
-
-import matplotlib.pyplot as plt
-
-from . import glr_path_static
-from .backreferences import write_backreferences, _thumbnail_div
-from .downloads import CODE_DOWNLOAD
-from .py_source_parser import (get_docstring_and_rest,
- split_code_and_text_blocks)
-
-from .notebook import jupyter_notebook, save_notebook
-
-try:
- basestring
-except NameError:
- basestring = str
- unicode = str
-
-
-###############################################################################
-
-
-class Tee(object):
- """A tee object to redirect streams to multiple outputs"""
-
- def __init__(self, file1, file2):
- self.file1 = file1
- self.file2 = file2
-
- def write(self, data):
- self.file1.write(data)
- self.file2.write(data)
-
- def flush(self):
- self.file1.flush()
- self.file2.flush()
-
- # When called from a local terminal seaborn needs it in Python3
- def isatty(self):
- self.file1.isatty()
-
-
-class MixedEncodingStringIO(StringIO):
- """Helper when both ASCII and unicode strings will be written"""
-
- def write(self, data):
- if not isinstance(data, unicode):
- data = data.decode('utf-8')
- StringIO.write(self, data)
-
-
-###############################################################################
-# The following strings are used when we have several pictures: we use
-# an html div tag that our CSS uses to turn the lists into horizontal
-# lists.
-HLIST_HEADER = """
-.. rst-class:: sphx-glr-horizontal
-
-"""
-
-HLIST_IMAGE_TEMPLATE = """
- *
-
- .. image:: /%s
- :scale: 47
-"""
-
-SINGLE_IMAGE = """
-.. image:: /%s
- :align: center
-"""
-
-
-# This one could contain unicode
-CODE_OUTPUT = u""".. rst-class:: sphx-glr-script-out
-
- Out::
-
-{0}\n"""
-
-
-SPHX_GLR_SIG = """\n.. rst-class:: sphx-glr-signature
-
- `Generated by Sphinx-Gallery `_\n"""
-
-
-def codestr2rst(codestr, lang='python'):
- """Return reStructuredText code block from code string"""
- code_directive = "\n.. code-block:: {0}\n\n".format(lang)
- indented_block = indent(codestr, ' ' * 4)
- return code_directive + indented_block
-
-
-def extract_thumbnail_number(text):
- """ Pull out the thumbnail image number specified in the docstring. """
-
- # check whether the user has specified a specific thumbnail image
- pattr = re.compile(
- r"^\s*#\s*sphinx_gallery_thumbnail_number\s*=\s*([0-9]+)\s*$",
- flags=re.MULTILINE)
- match = pattr.search(text)
-
- if match is None:
- # by default, use the first figure created
- thumbnail_number = 1
- else:
- thumbnail_number = int(match.groups()[0])
-
- return thumbnail_number
-
-
-def extract_intro(filename):
- """ Extract the first paragraph of module-level docstring. max:95 char"""
-
- docstring, _ = get_docstring_and_rest(filename)
-
- # lstrip is just in case docstring has a '\n\n' at the beginning
- paragraphs = docstring.lstrip().split('\n\n')
- if len(paragraphs) > 1:
- first_paragraph = re.sub('\n', ' ', paragraphs[1])
- first_paragraph = (first_paragraph[:95] + '...'
- if len(first_paragraph) > 95 else first_paragraph)
- else:
- raise ValueError(
- "Example docstring should have a header for the example title "
- "and at least a paragraph explaining what the example is about. "
- "Please check the example file:\n {}\n".format(filename))
-
- return first_paragraph
-
-
-def get_md5sum(src_file):
- """Returns md5sum of file"""
-
- with open(src_file, 'rb') as src_data:
- src_content = src_data.read()
-
- src_md5 = hashlib.md5(src_content).hexdigest()
- return src_md5
-
-
-def md5sum_is_current(src_file):
- """Checks whether src_file has the same md5 hash as the one on disk"""
-
- src_md5 = get_md5sum(src_file)
-
- src_md5_file = src_file + '.md5'
- if os.path.exists(src_md5_file):
- with open(src_md5_file, 'r') as file_checksum:
- ref_md5 = file_checksum.read()
-
- return src_md5 == ref_md5
-
- return False
-
-
-def save_figures(image_path, fig_count, gallery_conf):
- """Save all open matplotlib figures of the example code-block
-
- Parameters
- ----------
- image_path : str
- Path where plots are saved (format string which accepts figure number)
- fig_count : int
- Previous figure number count. Figure number add from this number
- gallery_conf : dict
- Contains the configuration of Sphinx-Gallery
-
- Returns
- -------
- images_rst : str
- rst code to embed the images in the document
- fig_num : int
- number of figures saved
- """
- figure_list = []
-
- for fig_num in plt.get_fignums():
- # Set the fig_num figure as the current figure as we can't
- # save a figure that's not the current figure.
- fig = plt.figure(fig_num)
- kwargs = {}
- to_rgba = matplotlib.colors.colorConverter.to_rgba
- for attr in ['facecolor', 'edgecolor']:
- fig_attr = getattr(fig, 'get_' + attr)()
- default_attr = matplotlib.rcParams['figure.' + attr]
- if to_rgba(fig_attr) != to_rgba(default_attr):
- kwargs[attr] = fig_attr
-
- current_fig = image_path.format(fig_count + fig_num)
- fig.savefig(current_fig, **kwargs)
- figure_list.append(current_fig)
-
- if gallery_conf.get('find_mayavi_figures', False):
- from mayavi import mlab
- e = mlab.get_engine()
- last_matplotlib_fig_num = fig_count + len(figure_list)
- total_fig_num = last_matplotlib_fig_num + len(e.scenes)
- mayavi_fig_nums = range(last_matplotlib_fig_num + 1, total_fig_num + 1)
-
- for scene, mayavi_fig_num in zip(e.scenes, mayavi_fig_nums):
- current_fig = image_path.format(mayavi_fig_num)
- mlab.savefig(current_fig, figure=scene)
- # make sure the image is not too large
- scale_image(current_fig, current_fig, 850, 999)
- figure_list.append(current_fig)
- mlab.close(all=True)
-
- return figure_rst(figure_list, gallery_conf['src_dir'])
-
-
-def figure_rst(figure_list, sources_dir):
- """Given a list of paths to figures generate the corresponding rst
-
- Depending on whether we have one or more figures, we use a
- single rst call to 'image' or a horizontal list.
-
- Parameters
- ----------
- figure_list : list of str
- Strings are the figures' absolute paths
- sources_dir : str
- absolute path of Sphinx documentation sources
-
- Returns
- -------
- images_rst : str
- rst code to embed the images in the document
- fig_num : int
- number of figures saved
- """
-
- figure_paths = [os.path.relpath(figure_path, sources_dir)
- .replace(os.sep, '/').lstrip('/')
- for figure_path in figure_list]
- images_rst = ""
- if len(figure_paths) == 1:
- figure_name = figure_paths[0]
- images_rst = SINGLE_IMAGE % figure_name
- elif len(figure_paths) > 1:
- images_rst = HLIST_HEADER
- for figure_name in figure_paths:
- images_rst += HLIST_IMAGE_TEMPLATE % figure_name
-
- return images_rst, len(figure_list)
-
-
-def scale_image(in_fname, out_fname, max_width, max_height):
- """Scales an image with the same aspect ratio centered in an
- image with a given max_width and max_height
- if in_fname == out_fname the image can only be scaled down
- """
- # local import to avoid testing dependency on PIL:
- try:
- from PIL import Image
- except ImportError:
- import Image
- img = Image.open(in_fname)
- width_in, height_in = img.size
- scale_w = max_width / float(width_in)
- scale_h = max_height / float(height_in)
-
- if height_in * scale_w <= max_height:
- scale = scale_w
- else:
- scale = scale_h
-
- if scale >= 1.0 and in_fname == out_fname:
- return
-
- width_sc = int(round(scale * width_in))
- height_sc = int(round(scale * height_in))
-
- # resize the image
- img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
-
- # insert centered
- thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
- pos_insert = ((max_width - width_sc) // 2, (max_height - height_sc) // 2)
- thumb.paste(img, pos_insert)
-
- thumb.save(out_fname)
- # Use optipng to perform lossless compression on the resized image if
- # software is installed
- if os.environ.get('SKLEARN_DOC_OPTIPNG', False):
- try:
- subprocess.call(["optipng", "-quiet", "-o", "9", out_fname])
- except Exception:
- warnings.warn('Install optipng to reduce the size of the \
- generated images')
-
-
-def save_thumbnail(image_path_template, src_file, gallery_conf):
- """Save the thumbnail image"""
- # read specification of the figure to display as thumbnail from main text
- _, content = get_docstring_and_rest(src_file)
- thumbnail_number = extract_thumbnail_number(content)
- thumbnail_image_path = image_path_template.format(thumbnail_number)
-
- thumb_dir = os.path.join(os.path.dirname(thumbnail_image_path), 'thumb')
- if not os.path.exists(thumb_dir):
- os.makedirs(thumb_dir)
-
- base_image_name = os.path.splitext(os.path.basename(src_file))[0]
- thumb_file = os.path.join(thumb_dir,
- 'sphx_glr_%s_thumb.png' % base_image_name)
-
- if src_file in gallery_conf['failing_examples']:
- broken_img = os.path.join(glr_path_static(), 'broken_example.png')
- scale_image(broken_img, thumb_file, 200, 140)
-
- elif os.path.exists(thumbnail_image_path):
- scale_image(thumbnail_image_path, thumb_file, 400, 280)
-
- elif not os.path.exists(thumb_file):
- # create something to replace the thumbnail
- default_thumb_file = os.path.join(glr_path_static(), 'no_image.png')
- default_thumb_file = gallery_conf.get("default_thumb_file",
- default_thumb_file)
- scale_image(default_thumb_file, thumb_file, 200, 140)
-
-
-def generate_dir_rst(src_dir, target_dir, gallery_conf, seen_backrefs):
- """Generate the gallery reStructuredText for an example directory"""
- if not os.path.exists(os.path.join(src_dir, 'README.txt')):
- print(80 * '_')
- print('Example directory %s does not have a README.txt file' %
- src_dir)
- print('Skipping this directory')
- print(80 * '_')
- return "", [] # because string is an expected return type
-
- with open(os.path.join(src_dir, 'README.txt')) as fid:
- fhindex = fid.read()
- # Add empty lines to avoid bug in issue #165
- fhindex += "\n\n"
-
- if not os.path.exists(target_dir):
- os.makedirs(target_dir)
- sorted_listdir = [fname for fname in sorted(os.listdir(src_dir))
- if fname.endswith('.py')]
- entries_text = []
- computation_times = []
- build_target_dir = os.path.relpath(target_dir, gallery_conf['src_dir'])
- for fname in sorted_listdir:
- amount_of_code, time_elapsed = \
- generate_file_rst(fname, target_dir, src_dir, gallery_conf)
- computation_times.append((time_elapsed, fname))
- new_fname = os.path.join(src_dir, fname)
- intro = extract_intro(new_fname)
- this_entry = _thumbnail_div(build_target_dir, fname, intro) + """
-
-.. toctree::
- :hidden:
-
- /%s\n""" % os.path.join(build_target_dir, fname[:-3]).replace(os.sep, '/')
- entries_text.append((amount_of_code, this_entry))
-
- if gallery_conf['backreferences_dir']:
- write_backreferences(seen_backrefs, gallery_conf,
- target_dir, fname, intro)
-
- # sort to have the smallest entries in the beginning
- entries_text.sort()
-
- for _, entry_text in entries_text:
- fhindex += entry_text
-
- # clear at the end of the section
- fhindex += """.. raw:: html\n
- \n\n"""
-
- return fhindex, computation_times
-
-
-def execute_code_block(code_block, example_globals,
- block_vars, gallery_conf):
- """Executes the code block of the example file"""
- time_elapsed = 0
- stdout = ''
-
- # If example is not suitable to run, skip executing its blocks
- if not block_vars['execute_script']:
- return stdout, time_elapsed
-
- plt.close('all')
- cwd = os.getcwd()
- # Redirect output to stdout and
- orig_stdout = sys.stdout
- src_file = block_vars['src_file']
-
- try:
- # First cd in the original example dir, so that any file
- # created by the example get created in this directory
- os.chdir(os.path.dirname(src_file))
- my_buffer = MixedEncodingStringIO()
- my_stdout = Tee(sys.stdout, my_buffer)
- sys.stdout = my_stdout
-
- t_start = time()
- # don't use unicode_literals at the top of this file or you get
- # nasty errors here on Py2.7
- exec(code_block, example_globals)
- time_elapsed = time() - t_start
-
- sys.stdout = orig_stdout
-
- my_stdout = my_buffer.getvalue().strip().expandtabs()
- # raise RuntimeError
- if my_stdout:
- stdout = CODE_OUTPUT.format(indent(my_stdout, u' ' * 4))
- os.chdir(cwd)
- images_rst, fig_num = save_figures(block_vars['image_path'],
- block_vars['fig_count'], gallery_conf)
-
- except Exception:
- formatted_exception = traceback.format_exc()
-
- fail_example_warning = 80 * '_' + '\n' + \
- '%s failed to execute correctly:' % src_file + \
- formatted_exception + 80 * '_' + '\n'
- warnings.warn(fail_example_warning)
-
- fig_num = 0
- images_rst = codestr2rst(formatted_exception, lang='pytb')
-
- # Breaks build on first example error
- # XXX This check can break during testing e.g. if you uncomment the
- # `raise RuntimeError` by the `my_stdout` call, maybe use `.get()`?
- if gallery_conf['abort_on_example_error']:
- raise
- # Stores failing file
- gallery_conf['failing_examples'][src_file] = formatted_exception
- block_vars['execute_script'] = False
-
- finally:
- os.chdir(cwd)
- sys.stdout = orig_stdout
-
- code_output = u"\n{0}\n\n{1}\n\n".format(images_rst, stdout)
- block_vars['fig_count'] += fig_num
-
- return code_output, time_elapsed
-
-
-def clean_modules():
- """Remove "unload" seaborn from the name space
-
- After a script is executed it can load a variety of setting that one
- does not want to influence in other examples in the gallery."""
-
- # Horrible code to 'unload' seaborn, so that it resets
- # its default when is load
- # Python does not support unloading of modules
- # https://bugs.python.org/issue9072
- for module in list(sys.modules.keys()):
- if 'seaborn' in module:
- del sys.modules[module]
-
- # Reset Matplotlib to default
- plt.rcdefaults()
-
-
-def generate_file_rst(fname, target_dir, src_dir, gallery_conf):
- """Generate the rst file for a given example.
-
- Returns
- -------
- amount_of_code : int
- character count of the corresponding python script in file
- time_elapsed : float
- seconds required to run the script
- """
-
- src_file = os.path.normpath(os.path.join(src_dir, fname))
- example_file = os.path.join(target_dir, fname)
- shutil.copyfile(src_file, example_file)
- script_blocks = split_code_and_text_blocks(src_file)
- amount_of_code = sum([len(bcontent)
- for blabel, bcontent in script_blocks
- if blabel == 'code'])
-
- if md5sum_is_current(example_file):
- return amount_of_code, 0
-
- image_dir = os.path.join(target_dir, 'images')
- if not os.path.exists(image_dir):
- os.makedirs(image_dir)
-
- base_image_name = os.path.splitext(fname)[0]
- image_fname = 'sphx_glr_' + base_image_name + '_{0:03}.png'
- build_image_dir = os.path.relpath(image_dir, gallery_conf['src_dir'])
- image_path_template = os.path.join(image_dir, image_fname)
-
- ref_fname = os.path.relpath(example_file, gallery_conf['src_dir'])
- ref_fname = ref_fname.replace(os.path.sep, '_')
- example_rst = """\n\n.. _sphx_glr_{0}:\n\n""".format(ref_fname)
-
- filename_pattern = gallery_conf.get('filename_pattern')
- execute_script = re.search(filename_pattern, src_file) and gallery_conf[
- 'plot_gallery']
- example_globals = {
- # A lot of examples contains 'print(__doc__)' for example in
- # scikit-learn so that running the example prints some useful
- # information. Because the docstring has been separated from
- # the code blocks in sphinx-gallery, __doc__ is actually
- # __builtin__.__doc__ in the execution context and we do not
- # want to print it
- '__doc__': '',
- # Examples may contain if __name__ == '__main__' guards
- # for in example scikit-learn if the example uses multiprocessing
- '__name__': '__main__',
- # Don't ever support __file__: Issues #166 #212
- }
-
- # A simple example has two blocks: one for the
- # example introduction/explanation and one for the code
- is_example_notebook_like = len(script_blocks) > 2
- time_elapsed = 0
- block_vars = {'execute_script': execute_script, 'fig_count': 0,
- 'image_path': image_path_template, 'src_file': src_file}
- if block_vars['execute_script']:
- print('Executing file %s' % src_file)
- for blabel, bcontent in script_blocks:
- if blabel == 'code':
- code_output, rtime = execute_code_block(bcontent,
- example_globals,
- block_vars,
- gallery_conf)
-
- time_elapsed += rtime
-
- if is_example_notebook_like:
- example_rst += codestr2rst(bcontent) + '\n'
- example_rst += code_output
- else:
- example_rst += code_output
- if 'sphx-glr-script-out' in code_output:
- # Add some vertical space after output
- example_rst += "\n\n|\n\n"
- example_rst += codestr2rst(bcontent) + '\n'
-
- else:
- example_rst += bcontent + '\n\n'
-
- clean_modules()
-
- # Writes md5 checksum if example has build correctly
- # not failed and was initially meant to run(no-plot shall not cache md5sum)
- if block_vars['execute_script']:
- with open(example_file + '.md5', 'w') as file_checksum:
- file_checksum.write(get_md5sum(example_file))
-
- save_thumbnail(image_path_template, src_file, gallery_conf)
-
- time_m, time_s = divmod(time_elapsed, 60)
- example_nb = jupyter_notebook(script_blocks)
- save_notebook(example_nb, example_file.replace('.py', '.ipynb'))
- with codecs.open(os.path.join(target_dir, base_image_name + '.rst'),
- mode='w', encoding='utf-8') as f:
- example_rst += "**Total running time of the script:**" \
- " ({0: .0f} minutes {1: .3f} seconds)\n\n".format(
- time_m, time_s)
- example_rst += CODE_DOWNLOAD.format(fname,
- fname.replace('.py', '.ipynb'))
- example_rst += SPHX_GLR_SIG
- f.write(example_rst)
-
- if block_vars['execute_script']:
- print("{0} ran in : {1:.2g} seconds\n".format(src_file, time_elapsed))
-
- return amount_of_code, time_elapsed
diff --git a/doc/sphinxext/sphinx_gallery/notebook.py b/doc/sphinxext/sphinx_gallery/notebook.py
deleted file mode 100644
index a0cfdbd7881d6..0000000000000
--- a/doc/sphinxext/sphinx_gallery/notebook.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Parser for Jupyter notebooks
-============================
-
-Class that holds the Jupyter notebook information
-
-"""
-# Author: Óscar Nájera
-# License: 3-clause BSD
-
-from __future__ import division, absolute_import, print_function
-from functools import partial
-import argparse
-import json
-import re
-import sys
-from .py_source_parser import split_code_and_text_blocks
-
-
-def jupyter_notebook_skeleton():
- """Returns a dictionary with the elements of a Jupyter notebook"""
- py_version = sys.version_info
- notebook_skeleton = {
- "cells": [],
- "metadata": {
- "kernelspec": {
- "display_name": "Python " + str(py_version[0]),
- "language": "python",
- "name": "python" + str(py_version[0])
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": py_version[0]
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython" + str(py_version[0]),
- "version": '{0}.{1}.{2}'.format(*sys.version_info[:3])
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
- return notebook_skeleton
-
-
-def directive_fun(match, directive):
- """Helper to fill in directives"""
- directive_to_alert = dict(note="info", warning="danger")
- return (''
- .format(directive_to_alert[directive], directive.capitalize(),
- match.group(1).strip()))
-
-
-def rst2md(text):
- """Converts the RST text from the examples docstrigs and comments
- into markdown text for the Jupyter notebooks"""
-
- top_heading = re.compile(r'^=+$\s^([\w\s-]+)^=+$', flags=re.M)
- text = re.sub(top_heading, r'# \1', text)
-
- math_eq = re.compile(r'^\.\. math::((?:.+)?(?:\n+^ .+)*)', flags=re.M)
- text = re.sub(math_eq,
- lambda match: r'\begin{{align}}{0}\end{{align}}'.format(
- match.group(1).strip()),
- text)
- inline_math = re.compile(r':math:`(.+?)`', re.DOTALL)
- text = re.sub(inline_math, r'$\1$', text)
-
- directives = ('warning', 'note')
- for directive in directives:
- directive_re = re.compile(r'^\.\. %s::((?:.+)?(?:\n+^ .+)*)'
- % directive, flags=re.M)
- text = re.sub(directive_re,
- partial(directive_fun, directive=directive), text)
-
- links = re.compile(r'^ *\.\. _.*:.*$\n', flags=re.M)
- text = re.sub(links, '', text)
-
- refs = re.compile(r':ref:`')
- text = re.sub(refs, '`', text)
-
- contents = re.compile(r'^\s*\.\. contents::.*$(\n +:\S+: *$)*\n',
- flags=re.M)
- text = re.sub(contents, '', text)
-
- images = re.compile(
- r'^\.\. image::(.*$)(?:\n *:alt:(.*$)\n)?(?: +:\S+:.*$\n)*',
- flags=re.M)
- text = re.sub(
- images, lambda match: '\n'.format(
- match.group(1).strip(), (match.group(2) or '').strip()), text)
-
- return text
-
-
-def jupyter_notebook(script_blocks):
- """Generate a Jupyter notebook file cell-by-cell
-
- Parameters
- ----------
- script_blocks: list
- script execution cells
- """
-
- work_notebook = jupyter_notebook_skeleton()
- add_code_cell(work_notebook, "%matplotlib inline")
- fill_notebook(work_notebook, script_blocks)
-
- return work_notebook
-
-
-def add_code_cell(work_notebook, code):
- """Add a code cell to the notebook
-
- Parameters
- ----------
- code : str
- Cell content
- """
-
- code_cell = {
- "cell_type": "code",
- "execution_count": None,
- "metadata": {"collapsed": False},
- "outputs": [],
- "source": [code.strip()]
- }
- work_notebook["cells"].append(code_cell)
-
-
-def add_markdown_cell(work_notebook, text):
- """Add a markdown cell to the notebook
-
- Parameters
- ----------
- code : str
- Cell content
- """
- markdown_cell = {
- "cell_type": "markdown",
- "metadata": {},
- "source": [rst2md(text)]
- }
- work_notebook["cells"].append(markdown_cell)
-
-
-def fill_notebook(work_notebook, script_blocks):
- """Writes the Jupyter notebook cells
-
- Parameters
- ----------
- script_blocks : list of tuples
- """
-
- for blabel, bcontent in script_blocks:
- if blabel == 'code':
- add_code_cell(work_notebook, bcontent)
- else:
- add_markdown_cell(work_notebook, bcontent + '\n')
-
-
-def save_notebook(work_notebook, write_file):
- """Saves the Jupyter work_notebook to write_file"""
- with open(write_file, 'w') as out_nb:
- json.dump(work_notebook, out_nb, indent=2)
-
-
-###############################################################################
-# Notebook shell utility
-
-def python_to_jupyter_cli(args=None, namespace=None):
- """Exposes the jupyter notebook renderer to the command line
-
- Takes the same arguments as ArgumentParser.parse_args
- """
- parser = argparse.ArgumentParser(
- description='Sphinx-Gallery Notebook converter')
- parser.add_argument('python_src_file', nargs='+',
- help='Input Python file script to convert. '
- 'Supports multiple files and shell wildcards'
- ' (e.g. *.py)')
- args = parser.parse_args(args, namespace)
-
- for src_file in args.python_src_file:
- blocks = split_code_and_text_blocks(src_file)
- print('Converting {0}'.format(src_file))
- example_nb = jupyter_notebook(blocks)
- save_notebook(example_nb, src_file.replace('.py', '.ipynb'))
diff --git a/doc/sphinxext/sphinx_gallery/py_source_parser.py b/doc/sphinxext/sphinx_gallery/py_source_parser.py
deleted file mode 100644
index d397087f99fbd..0000000000000
--- a/doc/sphinxext/sphinx_gallery/py_source_parser.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Parser for python source files
-==============================
-"""
-# Created Sun Nov 27 14:03:07 2016
-# Author: Óscar Nájera
-
-from __future__ import division, absolute_import, print_function
-import ast
-import re
-from textwrap import dedent
-
-SYNTAX_ERROR_DOCSTRING = """
-SyntaxError
-===========
-
-Example script with invalid Python syntax
-"""
-
-
-def get_docstring_and_rest(filename):
- """Separate `filename` content between docstring and the rest
-
- Strongly inspired from ast.get_docstring.
-
- Returns
- -------
- docstring: str
- docstring of `filename`
- rest: str
- `filename` content without the docstring
- """
- # can't use codecs.open(filename, 'r', 'utf-8') here b/c ast doesn't
- # seem to work with unicode strings in Python2.7
- # "SyntaxError: encoding declaration in Unicode string"
- with open(filename, 'rb') as fid:
- content = fid.read()
- # change from Windows format to UNIX for uniformity
- content = content.replace(b'\r\n', b'\n')
-
- try:
- node = ast.parse(content)
- except SyntaxError:
- return SYNTAX_ERROR_DOCSTRING, content.decode('utf-8')
-
- if not isinstance(node, ast.Module):
- raise TypeError("This function only supports modules. "
- "You provided {0}".format(node.__class__.__name__))
- if node.body and isinstance(node.body[0], ast.Expr) and \
- isinstance(node.body[0].value, ast.Str):
- docstring_node = node.body[0]
- docstring = docstring_node.value.s
- if hasattr(docstring, 'decode'): # python2.7
- docstring = docstring.decode('utf-8')
- # This get the content of the file after the docstring last line
- # Note: 'maxsplit' argument is not a keyword argument in python2
- rest = content.decode('utf-8').split('\n', docstring_node.lineno)[-1]
- return docstring, rest
- else:
- raise ValueError(('Could not find docstring in file "{0}". '
- 'A docstring is required by sphinx-gallery')
- .format(filename))
-
-
-def split_code_and_text_blocks(source_file):
- """Return list with source file separated into code and text blocks.
-
- Returns
- -------
- blocks : list of (label, content)
- List where each element is a tuple with the label ('text' or 'code'),
- and content string of block.
- """
- docstring, rest_of_content = get_docstring_and_rest(source_file)
- blocks = [('text', docstring)]
-
- pattern = re.compile(
- r'(?P^#{20,}.*)\s(?P(?:^#.*\s)*)',
- flags=re.M)
-
- pos_so_far = 0
- for match in re.finditer(pattern, rest_of_content):
- match_start_pos, match_end_pos = match.span()
- code_block_content = rest_of_content[pos_so_far:match_start_pos]
- text_content = match.group('text_content')
- sub_pat = re.compile('^#', flags=re.M)
- text_block_content = dedent(re.sub(sub_pat, '', text_content)).lstrip()
- if code_block_content.strip():
- blocks.append(('code', code_block_content))
- if text_block_content.strip():
- blocks.append(('text', text_block_content))
- pos_so_far = match_end_pos
-
- remaining_content = rest_of_content[pos_so_far:]
- if remaining_content.strip():
- blocks.append(('code', remaining_content))
-
- return blocks
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e5159054c8153..65b47a42289e4 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -1,8 +1,8 @@
.. currentmodule:: sklearn
-
-
+.. include:: includes/big_toc_css.rst
+.. include:: whats_new/_contributors.rst
===============
-Release history
+Release History
===============
Version 0.20 (under development)
@@ -5756,3 +5756,19 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
.. _Neeraj Gangwar: http://neerajgangwar.in
.. _Arthur Mensch: https://amensch.fr
+.. include:: whats_new/v0.20.rst
+.. include:: whats_new/v0.19.rst
+
+=================
+Previous Releases
+=================
+.. toctree::
+ :maxdepth: 1
+
+ Version 0.18
+ Version 0.17
+ Version 0.16
+ Version 0.15
+ Version 0.14
+ Version 0.13
+ Older Versions
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
new file mode 100644
index 0000000000000..dfbc319da88f4
--- /dev/null
+++ b/doc/whats_new/_contributors.rst
@@ -0,0 +1,143 @@
+.. _Olivier Grisel: https://twitter.com/ogrisel
+
+.. _Gael Varoquaux: http://gael-varoquaux.info
+
+.. _Alexandre Gramfort: http://alexandre.gramfort.net
+
+.. _Fabian Pedregosa: http://fa.bianp.net
+
+.. _Mathieu Blondel: http://www.mblondel.org
+
+.. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/
+
+.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+
+.. _Yaroslav Halchenko: http://www.onerussian.com/
+
+.. _Vlad Niculae: http://vene.ro
+
+.. _Edouard Duchesnay: https://sites.google.com/site/duchesnay/home
+
+.. _Peter Prettenhofer: https://sites.google.com/site/peterprettenhofer/
+
+.. _Alexandre Passos: http://atpassos.me
+
+.. _Nicolas Pinto: https://twitter.com/npinto
+
+.. _Bertrand Thirion: https://team.inria.fr/parietal/bertrand-thirions-page
+
+.. _Andreas Müller: http://peekaboo-vision.blogspot.com
+
+.. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
+
+.. _Jake Vanderplas: http://staff.washington.edu/jakevdp/
+
+.. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
+
+.. _INRIA: http://www.inria.fr
+
+.. _Parietal Team: http://parietal.saclay.inria.fr/
+
+.. _David Warde-Farley: http://www-etud.iro.umontreal.ca/~wardefar/
+
+.. _Brian Holt: http://personal.ee.surrey.ac.uk/Personal/B.Holt
+
+.. _Satrajit Ghosh: http://www.mit.edu/~satra/
+
+.. _Robert Layton: https://twitter.com/robertlayton
+
+.. _Scott White: https://twitter.com/scottblanc
+
+.. _David Marek: http://www.davidmarek.cz/
+
+.. _Christian Osendorfer: https://osdf.github.io
+
+.. _Arnaud Joly: http://www.ajoly.org
+
+.. _Rob Zinkov: http://zinkov.com
+
+.. _Joel Nothman: http://joelnothman.com
+
+.. _Nicolas Trésegnie : http://nicolastr.com/
+
+.. _Kemal Eren: http://www.kemaleren.com
+
+.. _Yann Dauphin: http://ynd.github.io/
+
+.. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
+
+.. _Kyle Kastner: http://kastnerkyle.github.io
+
+.. _Daniel Nouri: http://danielnouri.org
+
+.. _Manoj Kumar: https://manojbits.wordpress.com
+
+.. _Luis Pedro Coelho: http://luispedro.org
+
+.. _Fares Hedyati: http://www.eecs.berkeley.edu/~fareshed
+
+.. _Antony Lee: https://www.ocf.berkeley.edu/~antonyl/
+
+.. _Martin Billinger: http://tnsre.embs.org/author/martinbillinger
+
+.. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
+
+.. _Trevor Stephens: http://trevorstephens.com/
+
+.. _Jan Hendrik Metzen: https://jmetzen.github.io/
+
+.. _Will Dawson: http://www.dawsonresearch.com
+
+.. _Andrew Tulloch: http://tullo.ch/
+
+.. _Hanna Wallach: http://dirichlet.net/
+
+.. _Yan Yi: http://seowyanyi.org
+
+.. _Hervé Bredin: http://herve.niderb.fr/
+
+.. _Eric Martin: http://www.ericmart.in
+
+.. _Nicolas Goix: https://perso.telecom-paristech.fr/~goix/
+
+.. _Sebastian Raschka: http://sebastianraschka.com
+
+.. _Brian McFee: https://bmcfee.github.io
+
+.. _Valentin Stolbunov: http://www.vstolbunov.com
+
+.. _Jaques Grobler: https://github.com/jaquesgrobler
+
+.. _Lars Buitinck: https://github.com/larsmans
+
+.. _Loic Esteve: https://github.com/lesteve
+
+.. _Noel Dawe: https://github.com/ndawe
+
+.. _Raghav RV: https://github.com/raghavrv
+
+.. _Tom Dupre la Tour: https://github.com/TomDLT
+
+.. _Nelle Varoquaux: https://github.com/nellev
+
+.. _Bing Tian Dai: https://github.com/btdai
+
+.. _Dylan Werner-Meier: https://github.com/unautre
+
+.. _Alyssa Batula: https://github.com/abatula
+
+.. _Srivatsan Ramesh: https://github.com/srivatsan-ramesh
+
+.. _Ron Weiss: http://www.ee.columbia.edu/~ronw
+
+.. _Kathleen Chen: https://github.com/kchen17
+
+.. _Vincent Pham: https://github.com/vincentpham1991
+
+.. _Denis Engemann: http://denis-engemann.de
+
+.. _Anish Shah: https://github.com/AnishShah
+
+.. _Neeraj Gangwar: http://neerajgangwar.in
+
+.. _Arthur Mensch: https://amensch.fr
diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst
new file mode 100644
index 0000000000000..eeb672914f033
--- /dev/null
+++ b/doc/whats_new/older_versions.rst
@@ -0,0 +1,1386 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_12.1:
+
+Version 0.12.1
+===============
+
+**October 8, 2012**
+
+The 0.12.1 release is a bug-fix release with no additional features, but is
+instead a set of bug fixes
+
+Changelog
+----------
+
+- Improved numerical stability in spectral embedding by `Gael
+ Varoquaux`_
+
+- Doctest under windows 64bit by `Gael Varoquaux`_
+
+- Documentation fixes for elastic net by `Andreas Müller`_ and
+ `Alexandre Gramfort`_
+
+- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
+
+- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
+
+- Fix parallel computing in MDS by `Gael Varoquaux`_
+
+- Fix Unicode support in count vectorizer by `Andreas Müller`_
+
+- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch `
+
+- Fix clone of SGD objects by `Peter Prettenhofer`_
+
+- Stabilize GMM by :user:`Virgile Fritsch `
+
+People
+------
+
+ * 14 `Peter Prettenhofer`_
+ * 12 `Gael Varoquaux`_
+ * 10 `Andreas Müller`_
+ * 5 `Lars Buitinck`_
+ * 3 :user:`Virgile Fritsch `
+ * 1 `Alexandre Gramfort`_
+ * 1 `Gilles Louppe`_
+ * 1 `Mathieu Blondel`_
+
+.. _changes_0_12:
+
+Version 0.12
+============
+
+**September 4, 2012**
+
+Changelog
+---------
+
+- Various speed improvements of the :ref:`decision trees ` module, by
+ `Gilles Louppe`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+ :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
+ via the ``max_features`` argument, by `Peter Prettenhofer`_.
+
+- Added Huber and Quantile loss functions to
+ :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
+
+- :ref:`Decision trees ` and :ref:`forests of randomized trees `
+ now support multi-output classification and regression problems, by
+ `Gilles Louppe`_.
+
+- Added :class:`preprocessing.LabelEncoder`, a simple utility class to
+ normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
+
+- Added the epsilon-insensitive loss and the ability to make probabilistic
+ predictions with the modified huber loss in :ref:`sgd`, by
+ `Mathieu Blondel`_.
+
+- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
+
+- SVMlight file format loader now detects compressed (gzip/bzip2) files and
+ decompresses them on the fly, by `Lars Buitinck`_.
+
+- SVMlight file format serializer now preserves double precision floating
+ point values, by `Olivier Grisel`_.
+
+- A common testing framework for all estimators was added, by `Andreas Müller`_.
+
+- Understandable error messages for estimators that do not accept
+ sparse input by `Gael Varoquaux`_
+
+- Speedups in hierarchical clustering by `Gael Varoquaux`_. In
+ particular building the tree now supports early stopping. This is
+ useful when the number of clusters is not small compared to the
+ number of samples.
+
+- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
+ by `Alexandre Gramfort`_.
+
+- Added :func:`metrics.auc_score` and
+ :func:`metrics.average_precision_score` convenience functions by `Andreas
+ Müller`_.
+
+- Improved sparse matrix support in the :ref:`feature_selection`
+ module by `Andreas Müller`_.
+
+- New word boundaries-aware character n-gram analyzer for the
+ :ref:`text_feature_extraction` module by :user:`@kernc `.
+
+- Fixed bug in spectral clustering that led to single point clusters
+ by `Andreas Müller`_.
+
+- In :class:`feature_extraction.text.CountVectorizer`, added an option to
+ ignore infrequent words, ``min_df`` by `Andreas Müller`_.
+
+- Add support for multiple targets in some linear models (ElasticNet, Lasso
+ and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
+ `Alexandre Gramfort`_.
+
+- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
+
+- Fixed feature importance computation in
+ :ref:`gradient_boosting`.
+
+API changes summary
+-------------------
+
+- The old ``scikits.learn`` package has disappeared; all code should import
+ from ``sklearn`` instead, which was introduced in 0.9.
+
+- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
+ with it's order reversed, in order to keep it consistent with the order
+ of the returned ``fpr`` and ``tpr``.
+
+- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
+ :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
+ object when initialising it and not through ``fit``. Now ``fit`` will
+ only accept the data as an input parameter.
+
+- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
+ the default gamma value was only computed the first time ``fit`` was called
+ and then stored. It is now recalculated on every call to ``fit``.
+
+- All ``Base`` classes are now abstract meta classes so that they can not be
+ instantiated.
+
+- :func:`cluster.ward_tree` now also returns the parent array. This is
+ necessary for early-stopping in which case the tree is not
+ completely built.
+
+- In :class:`feature_extraction.text.CountVectorizer` the parameters
+ ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
+ enable grid-searching both at once.
+
+- In :class:`feature_extraction.text.CountVectorizer`, words that appear
+ only in one document are now ignored by default. To reproduce
+ the previous behavior, set ``min_df=1``.
+
+- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
+ returns 2d array when fit on two classes.
+
+- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
+ and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
+ when fit on two classes.
+
+- Grid of alphas used for fitting :class:`linear_model.LassoCV` and
+ :class:`linear_model.ElasticNetCV` is now stored
+ in the attribute ``alphas_`` rather than overriding the init parameter
+ ``alphas``.
+
+- Linear models when alpha is estimated by cross-validation store
+ the estimated value in the ``alpha_`` attribute rather than just
+ ``alpha`` or ``best_alpha``.
+
+- :class:`ensemble.GradientBoostingClassifier` now supports
+ :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
+ :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
+
+- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
+ The all classes in the :ref:`svm` module now automatically select the
+ sparse or dense representation base on the input.
+
+- All clustering algorithms now interpret the array ``X`` given to ``fit`` as
+ input data, in particular :class:`cluster.SpectralClustering` and
+ :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
+
+- For clustering algorithms that take the desired number of clusters as a parameter,
+ this parameter is now called ``n_clusters``.
+
+
+People
+------
+ * 267 `Andreas Müller`_
+ * 94 `Gilles Louppe`_
+ * 89 `Gael Varoquaux`_
+ * 79 `Peter Prettenhofer`_
+ * 60 `Mathieu Blondel`_
+ * 57 `Alexandre Gramfort`_
+ * 52 `Vlad Niculae`_
+ * 45 `Lars Buitinck`_
+ * 44 Nelle Varoquaux
+ * 37 `Jaques Grobler`_
+ * 30 Alexis Mignon
+ * 30 Immanuel Bayer
+ * 27 `Olivier Grisel`_
+ * 16 Subhodeep Moitra
+ * 13 Yannick Schwartz
+ * 12 :user:`@kernc `
+ * 11 :user:`Virgile Fritsch `
+ * 9 Daniel Duckworth
+ * 9 `Fabian Pedregosa`_
+ * 9 `Robert Layton`_
+ * 8 John Benediktsson
+ * 7 Marko Burjek
+ * 5 `Nicolas Pinto`_
+ * 4 Alexandre Abraham
+ * 4 `Jake Vanderplas`_
+ * 3 `Brian Holt`_
+ * 3 `Edouard Duchesnay`_
+ * 3 Florian Hoenig
+ * 3 flyingimmidev
+ * 2 Francois Savard
+ * 2 Hannes Schulz
+ * 2 Peter Welinder
+ * 2 `Yaroslav Halchenko`_
+ * 2 Wei Li
+ * 1 Alex Companioni
+ * 1 Brandyn A. White
+ * 1 Bussonnier Matthias
+ * 1 Charles-Pierre Astolfi
+ * 1 Dan O'Huiginn
+ * 1 David Cournapeau
+ * 1 Keith Goodman
+ * 1 Ludwig Schwardt
+ * 1 Olivier Hervieu
+ * 1 Sergio Medina
+ * 1 Shiqiao Du
+ * 1 Tim Sheerman-Chase
+ * 1 buguen
+
+
+
+.. _changes_0_11:
+
+Version 0.11
+============
+
+**May 7, 2012**
+
+Changelog
+---------
+
+Highlights
+.............
+
+- Gradient boosted regression trees (:ref:`gradient_boosting`)
+ for classification and regression by `Peter Prettenhofer`_
+ and `Scott White`_ .
+
+- Simple dict-based feature loader with support for categorical variables
+ (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
+
+- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
+ and added macro and micro average options to
+ :func:`metrics.precision_score`, :func:`metrics.recall_score` and
+ :func:`metrics.f1_score` by `Satrajit Ghosh`_.
+
+- :ref:`out_of_bag` of generalization error for :ref:`ensemble`
+ by `Andreas Müller`_.
+
+- Randomized sparse linear models for feature
+ selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
+
+- :ref:`label_propagation` for semi-supervised learning, by Clay
+ Woolam. **Note** the semi-supervised API is still work in progress,
+ and may change.
+
+- Added BIC/AIC model selection to classical :ref:`gmm` and unified
+ the API with the remainder of scikit-learn, by `Bertrand Thirion`_
+
+- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
+ a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
+ by Yannick Schwartz.
+
+- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
+ ``shrink_threshold`` parameter, which implements **shrunken centroid
+ classification**, by `Robert Layton`_.
+
+Other changes
+..............
+
+- Merged dense and sparse implementations of :ref:`sgd` module and
+ exposed utility extension types for sequential
+ datasets ``seq_dataset`` and weight vectors ``weight_vector``
+ by `Peter Prettenhofer`_.
+
+- Added ``partial_fit`` (support for online/minibatch learning) and
+ warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
+
+- Dense and sparse implementations of :ref:`svm` classes and
+ :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
+
+- Regressors can now be used as base estimator in the :ref:`multiclass`
+ module by `Mathieu Blondel`_.
+
+- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
+ and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
+ by `Mathieu Blondel`_.
+
+- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
+ to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
+
+- Improved :ref:`cross_validation` and :ref:`grid_search` documentation
+ and introduced the new :func:`cross_validation.train_test_split`
+ helper function by `Olivier Grisel`_
+
+- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
+ consistency with ``decision_function``; for ``kernel==linear``,
+ ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
+
+- Performance improvements to efficient leave-one-out cross-validated
+ Ridge regression, esp. for the ``n_samples > n_features`` case, in
+ :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
+
+- Refactoring and simplification of the :ref:`text_feature_extraction`
+ API and fixed a bug that caused possible negative IDF,
+ by `Olivier Grisel`_.
+
+- Beam pruning option in :class:`_BaseHMM` module has been removed since it
+ is difficult to Cythonize. If you are interested in contributing a Cython
+ version, you can use the python version in the git history as a reference.
+
+- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
+ nearest neighbors searches. The metric can be specified by argument ``p``.
+
+API changes summary
+-------------------
+
+- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
+ instead.
+
+- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
+ :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
+ :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
+ and/or :class:`RadiusNeighborsRegressor` instead.
+
+- Sparse classes in the :ref:`sgd` module are now deprecated.
+
+- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
+ parameters must be passed to an object when initialising it and not through
+ ``fit``. Now ``fit`` will only accept the data as an input parameter.
+
+- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
+ ``sample`` and ``score`` or ``predict`` should be used instead.
+
+- attribute ``_scores`` and ``_pvalues`` in univariate feature selection
+ objects are now deprecated.
+ ``scores_`` or ``pvalues_`` should be used instead.
+
+- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
+ :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
+ parameter, not a parameter to fit. This makes grid searches
+ over this parameter possible.
+
+- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
+ consistent with the Olivetti faces dataset. Use ``images`` and
+ ``pairs`` attribute to access the natural images shapes instead.
+
+- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
+ changed. Options now are ``'ovr'`` and ``'crammer_singer'``, with
+ ``'ovr'`` being the default. This does not change the default behavior
+ but hopefully is less confusing.
+
+- Class :class:`feature_selection.text.Vectorizer` is deprecated and
+ replaced by :class:`feature_selection.text.TfidfVectorizer`.
+
+- The preprocessor / analyzer nested structure for text feature
+ extraction has been removed. All those features are
+ now directly passed as flat constructor arguments
+ to :class:`feature_selection.text.TfidfVectorizer` and
+ :class:`feature_selection.text.CountVectorizer`, in particular the
+ following parameters are now used:
+
+- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
+ analysis scheme, or use a specific python callable (as previously).
+
+- ``tokenizer`` and ``preprocessor`` have been introduced to make it
+ still possible to customize those steps with the new API.
+
+- ``input`` explicitly control how to interpret the sequence passed to
+ ``fit`` and ``predict``: filenames, file objects or direct (byte or
+ Unicode) strings.
+
+- charset decoding is explicit and strict by default.
+
+- the ``vocabulary``, fitted or not is now stored in the
+ ``vocabulary_`` attribute to be consistent with the project
+ conventions.
+
+- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
+ from :class:`feature_selection.text.CountVectorizer` to make grid
+ search trivial.
+
+- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
+ ``sample`` should be used instead.
+
+- Beam pruning option in :class:`_BaseHMM` module is removed since it is
+ difficult to be Cythonized. If you are interested, you can look in the
+ history codes by git.
+
+- The SVMlight format loader now supports files with both zero-based and
+ one-based column indices, since both occur "in the wild".
+
+- Arguments in class :class:`ShuffleSplit` are now consistent with
+ :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
+ ``train_fraction`` are deprecated and renamed to ``test_size`` and
+ ``train_size`` and can accept both ``float`` and ``int``.
+
+- Arguments in class :class:`Bootstrap` are now consistent with
+ :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
+ ``n_train`` are deprecated and renamed to ``test_size`` and
+ ``train_size`` and can accept both ``float`` and ``int``.
+
+- Argument ``p`` added to classes in :ref:`neighbors` to specify an
+ arbitrary Minkowski metric for nearest neighbors searches.
+
+
+People
+------
+ * 282 `Andreas Müller`_
+ * 239 `Peter Prettenhofer`_
+ * 198 `Gael Varoquaux`_
+ * 129 `Olivier Grisel`_
+ * 114 `Mathieu Blondel`_
+ * 103 Clay Woolam
+ * 96 `Lars Buitinck`_
+ * 88 `Jaques Grobler`_
+ * 82 `Alexandre Gramfort`_
+ * 50 `Bertrand Thirion`_
+ * 42 `Robert Layton`_
+ * 28 flyingimmidev
+ * 26 `Jake Vanderplas`_
+ * 26 Shiqiao Du
+ * 21 `Satrajit Ghosh`_
+ * 17 `David Marek`_
+ * 17 `Gilles Louppe`_
+ * 14 `Vlad Niculae`_
+ * 11 Yannick Schwartz
+ * 10 `Fabian Pedregosa`_
+ * 9 fcostin
+ * 7 Nick Wilson
+ * 5 Adrien Gaidon
+ * 5 `Nicolas Pinto`_
+ * 4 `David Warde-Farley`_
+ * 5 Nelle Varoquaux
+ * 5 Emmanuelle Gouillart
+ * 3 Joonas Sillanpää
+ * 3 Paolo Losi
+ * 2 Charles McCarthy
+ * 2 Roy Hyunjin Han
+ * 2 Scott White
+ * 2 ibayer
+ * 1 Brandyn White
+ * 1 Carlos Scheidegger
+ * 1 Claire Revillet
+ * 1 Conrad Lee
+ * 1 `Edouard Duchesnay`_
+ * 1 Jan Hendrik Metzen
+ * 1 Meng Xinfan
+ * 1 `Rob Zinkov`_
+ * 1 Shiqiao
+ * 1 Udi Weinsberg
+ * 1 Virgile Fritsch
+ * 1 Xinfan Meng
+ * 1 Yaroslav Halchenko
+ * 1 jansoe
+ * 1 Leon Palafox
+
+
+.. _changes_0_10:
+
+Version 0.10
+============
+
+**January 11, 2012**
+
+Changelog
+---------
+
+- Python 2.5 compatibility was dropped; the minimum Python version needed
+ to use scikit-learn is now 2.6.
+
+- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
+ associated cross-validated estimator, by `Gael Varoquaux`_
+
+- New :ref:`Tree ` module by `Brian Holt`_, `Peter Prettenhofer`_,
+ `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
+ documentation and examples.
+
+- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
+
+- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
+
+- Faster tests by `Fabian Pedregosa`_ and others.
+
+- Silhouette Coefficient cluster analysis evaluation metric added as
+ :func:`sklearn.metrics.silhouette_score` by Robert Layton.
+
+- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
+ the clustering algorithm used to be run ``n_init`` times but the last
+ solution was retained instead of the best solution by `Olivier Grisel`_.
+
+- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
+ predict methods; Enhanced test time performance by converting model
+ parameters to fortran-style arrays after fitting (only multi-class).
+
+- Adjusted Mutual Information metric added as
+ :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
+
+- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
+ now support scaling of C regularization parameter by the number of
+ samples by `Alexandre Gramfort`_.
+
+- New :ref:`Ensemble Methods ` module by `Gilles Louppe`_ and
+ `Brian Holt`_. The module comes with the random forest algorithm and the
+ extra-trees method, along with documentation and examples.
+
+- :ref:`outlier_detection`: outlier and novelty detection, by
+ :user:`Virgile Fritsch `.
+
+- :ref:`kernel_approximation`: a transform implementing kernel
+ approximation for fast SGD on non-linear kernels by
+ `Andreas Müller`_.
+
+- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
+
+- :ref:`SparseCoder` by `Vlad Niculae`_.
+
+- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
+
+- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
+
+- Improved documentation for developers and for the :mod:`sklearn.utils`
+ module, by `Jake Vanderplas`_.
+
+- Vectorized 20newsgroups dataset loader
+ (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
+ `Mathieu Blondel`_.
+
+- :ref:`multiclass` by `Lars Buitinck`_.
+
+- Utilities for fast computation of mean and variance for sparse matrices
+ by `Mathieu Blondel`_.
+
+- Make :func:`sklearn.preprocessing.scale` and
+ :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
+ `Olivier Grisel`_
+
+- Feature importances using decision trees and/or forest of trees,
+ by `Gilles Louppe`_.
+
+- Parallel implementation of forests of randomized trees by
+ `Gilles Louppe`_.
+
+- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
+ sets as well as the test sets by `Olivier Grisel`_.
+
+- Errors in the build of the documentation fixed by `Andreas Müller`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.9:
+
+- Some estimators that may overwrite their inputs to save memory previously
+ had ``overwrite_`` parameters; these have been replaced with ``copy_``
+ parameters with exactly the opposite meaning.
+
+ This particularly affects some of the estimators in :mod:`linear_model`.
+ The default behavior is still to copy everything passed in.
+
+- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
+ longer supports loading two files at once; use ``load_svmlight_files``
+ instead. Also, the (unused) ``buffer_mb`` parameter is gone.
+
+- Sparse estimators in the :ref:`sgd` module use dense parameter vector
+ ``coef_`` instead of ``sparse_coef_``. This significantly improves
+ test time performance.
+
+- The :ref:`covariance` module now has a robust estimator of
+ covariance, the Minimum Covariance Determinant estimator.
+
+- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
+ but the changes are backwards compatible. They have been moved to the
+ :mod:`metrics.cluster.supervised`, along with
+ :mod:`metrics.cluster.unsupervised` which contains the Silhouette
+ Coefficient.
+
+- The ``permutation_test_score`` function now behaves the same way as
+ ``cross_val_score`` (i.e. uses the mean score across the folds.)
+
+- Cross Validation generators now use integer indices (``indices=True``)
+ by default instead of boolean masks. This make it more intuitive to
+ use with sparse matrix data.
+
+- The functions used for sparse coding, ``sparse_encode`` and
+ ``sparse_encode_parallel`` have been combined into
+ :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
+ have been transposed for consistency with the matrix factorization setting,
+ as opposed to the regression setting.
+
+- Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
+ files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
+ re-generated. (They should continue to work, but accidentally had one
+ extra column of zeros prepended.)
+
+- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
+
+- :func:`sklearn.utils.extmath.fast_svd` has been renamed
+ :func:`sklearn.utils.extmath.randomized_svd` and the default
+ oversampling is now fixed to 10 additional random vectors instead
+ of doubling the number of components to extract. The new behavior
+ follows the reference paper.
+
+
+People
+------
+
+The following people contributed to scikit-learn since last release:
+
+ * 246 `Andreas Müller`_
+ * 242 `Olivier Grisel`_
+ * 220 `Gilles Louppe`_
+ * 183 `Brian Holt`_
+ * 166 `Gael Varoquaux`_
+ * 144 `Lars Buitinck`_
+ * 73 `Vlad Niculae`_
+ * 65 `Peter Prettenhofer`_
+ * 64 `Fabian Pedregosa`_
+ * 60 Robert Layton
+ * 55 `Mathieu Blondel`_
+ * 52 `Jake Vanderplas`_
+ * 44 Noel Dawe
+ * 38 `Alexandre Gramfort`_
+ * 24 :user:`Virgile Fritsch `
+ * 23 `Satrajit Ghosh`_
+ * 3 Jan Hendrik Metzen
+ * 3 Kenneth C. Arnold
+ * 3 Shiqiao Du
+ * 3 Tim Sheerman-Chase
+ * 3 `Yaroslav Halchenko`_
+ * 2 Bala Subrahmanyam Varanasi
+ * 2 DraXus
+ * 2 Michael Eickenberg
+ * 1 Bogdan Trach
+ * 1 Félix-Antoine Fortin
+ * 1 Juan Manuel Caicedo Carvajal
+ * 1 Nelle Varoquaux
+ * 1 `Nicolas Pinto`_
+ * 1 Tiziano Zito
+ * 1 Xinfan Meng
+
+
+
+.. _changes_0_9:
+
+Version 0.9
+===========
+
+**September 21, 2011**
+
+scikit-learn 0.9 was released on September 2011, three months after the 0.8
+release and includes the new modules :ref:`manifold`, :ref:`dirichlet_process`
+as well as several new algorithms and documentation improvements.
+
+This release also includes the dictionary-learning work developed by
+`Vlad Niculae`_ as part of the `Google Summer of Code
+`_ program.
+
+
+
+.. |banner1| image:: ../auto_examples/manifold/images/thumb/sphx_glr_plot_compare_methods_thumb.png
+ :target: ../auto_examples/manifold/plot_compare_methods.html
+
+.. |banner2| image:: ../auto_examples/linear_model/images/thumb/sphx_glr_plot_omp_thumb.png
+ :target: ../auto_examples/linear_model/plot_omp.html
+
+.. |banner3| image:: ../auto_examples/decomposition/images/thumb/sphx_glr_plot_kernel_pca_thumb.png
+ :target: ../auto_examples/decomposition/plot_kernel_pca.html
+
+.. |center-div| raw:: html
+
+
+
+.. |end-div| raw:: html
+
+
+
+
+|center-div| |banner2| |banner1| |banner3| |end-div|
+
+Changelog
+---------
+
+- New :ref:`manifold` module by `Jake Vanderplas`_ and
+ `Fabian Pedregosa`_.
+
+- New :ref:`Dirichlet Process ` Gaussian Mixture
+ Model by `Alexandre Passos`_
+
+- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
+ general refactoring, support for sparse matrices in input, speed and
+ documentation improvements. See the next section for a full list of API
+ changes.
+
+- Improvements on the :ref:`feature_selection` module by
+ `Gilles Louppe`_ : refactoring of the RFE classes, documentation
+ rewrite, increased efficiency and minor API changes.
+
+- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
+ `Alexandre Gramfort`_
+
+- Printing an estimator now behaves independently of architectures
+ and Python version thanks to :user:`Jean Kossaifi `.
+
+- :ref:`Loader for libsvm/svmlight format ` by
+ `Mathieu Blondel`_ and `Lars Buitinck`_
+
+- Documentation improvements: thumbnails in
+ example gallery by `Fabian Pedregosa`_.
+
+- Important bugfixes in :ref:`svm` module (segfaults, bad
+ performance) by `Fabian Pedregosa`_.
+
+- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
+ by `Lars Buitinck`_
+
+- Text feature extraction optimizations by Lars Buitinck
+
+- Chi-Square feature selection
+ (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
+
+- :ref:`sample_generators` module refactoring by `Gilles Louppe`_
+
+- :ref:`multiclass` by `Mathieu Blondel`_
+
+- Ball tree rewrite by `Jake Vanderplas`_
+
+- Implementation of :ref:`dbscan` algorithm by Robert Layton
+
+- Kmeans predict and transform by Robert Layton
+
+- Preprocessing module refactoring by `Olivier Grisel`_
+
+- Faster mean shift by Conrad Lee
+
+- New ``Bootstrap``, :ref:`ShuffleSplit` and various other
+ improvements in cross validation schemes by `Olivier Grisel`_ and
+ `Gael Varoquaux`_
+
+- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
+
+- Added :class:`Orthogonal Matching Pursuit ` by `Vlad Niculae`_
+
+- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
+
+- Implementation of :class:`linear_model.LassoLarsCV`
+ (cross-validated Lasso solver using the Lars algorithm) and
+ :class:`linear_model.LassoLarsIC` (BIC/AIC model
+ selection in Lars) by `Gael Varoquaux`_
+ and `Alexandre Gramfort`_
+
+- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
+
+- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
+ and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
+
+- :class:`Mini-Batch K-Means ` by Nelle Varoquaux and Peter Prettenhofer.
+
+- :ref:`mldata` utilities by Pietro Berkes.
+
+- :ref:`olivetti_faces` by `David Warde-Farley`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.8:
+
+- The ``scikits.learn`` package was renamed ``sklearn``. There is
+ still a ``scikits.learn`` package alias for backward compatibility.
+
+ Third-party projects with a dependency on scikit-learn 0.9+ should
+ upgrade their codebase. For instance, under Linux / MacOSX just run
+ (make a backup first!)::
+
+ find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g'
+
+- Estimators no longer accept model parameters as ``fit`` arguments:
+ instead all parameters must be only be passed as constructor
+ arguments or using the now public ``set_params`` method inherited
+ from :class:`base.BaseEstimator`.
+
+ Some estimators can still accept keyword arguments on the ``fit``
+ but this is restricted to data-dependent values (e.g. a Gram matrix
+ or an affinity matrix that are precomputed from the ``X`` data matrix.
+
+- The ``cross_val`` package has been renamed to ``cross_validation``
+ although there is also a ``cross_val`` package alias in place for
+ backward compatibility.
+
+ Third-party projects with a dependency on scikit-learn 0.9+ should
+ upgrade their codebase. For instance, under Linux / MacOSX just run
+ (make a backup first!)::
+
+ find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g'
+
+- The ``score_func`` argument of the
+ ``sklearn.cross_validation.cross_val_score`` function is now expected
+ to accept ``y_test`` and ``y_predicted`` as only arguments for
+ classification and regression tasks or ``X_test`` for unsupervised
+ estimators.
+
+- ``gamma`` parameter for support vector machine algorithms is set
+ to ``1 / n_features`` by default, instead of ``1 / n_samples``.
+
+- The ``sklearn.hmm`` has been marked as orphaned: it will be removed
+ from scikit-learn in version 0.11 unless someone steps up to
+ contribute documentation, examples and fix lurking numerical
+ stability issues.
+
+- ``sklearn.neighbors`` has been made into a submodule. The two previously
+ available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
+ have been marked as deprecated. Their functionality has been divided
+ among five new classes: ``NearestNeighbors`` for unsupervised neighbors
+ searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
+ for supervised classification problems, and ``KNeighborsRegressor``
+ & ``RadiusNeighborsRegressor`` for supervised regression problems.
+
+- ``sklearn.ball_tree.BallTree`` has been moved to
+ ``sklearn.neighbors.BallTree``. Using the former will generate a warning.
+
+- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
+ LassoLARSCV, etc.) have been renamed to
+ ``sklearn.linear_model.Lars()``.
+
+- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
+ parameter, which by default is None. If not given, the result is the distance
+ (or kernel similarity) between each sample in Y. If given, the result is the
+ pairwise distance (or kernel similarity) between samples in X to Y.
+
+- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
+ and by default returns the pairwise distance. For the component wise distance,
+ set the parameter ``sum_over_features`` to ``False``.
+
+Backward compatibility package aliases and other deprecated classes and
+functions will be removed in version 0.11.
+
+
+People
+------
+
+38 people contributed to this release.
+
+- 387 `Vlad Niculae`_
+- 320 `Olivier Grisel`_
+- 192 `Lars Buitinck`_
+- 179 `Gael Varoquaux`_
+- 168 `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
+- 127 `Jake Vanderplas`_
+- 120 `Mathieu Blondel`_
+- 85 `Alexandre Passos`_
+- 67 `Alexandre Gramfort`_
+- 57 `Peter Prettenhofer`_
+- 56 `Gilles Louppe`_
+- 42 Robert Layton
+- 38 Nelle Varoquaux
+- 32 :user:`Jean Kossaifi `
+- 30 Conrad Lee
+- 22 Pietro Berkes
+- 18 andy
+- 17 David Warde-Farley
+- 12 Brian Holt
+- 11 Robert
+- 8 Amit Aides
+- 8 :user:`Virgile Fritsch `
+- 7 `Yaroslav Halchenko`_
+- 6 Salvatore Masecchia
+- 5 Paolo Losi
+- 4 Vincent Schut
+- 3 Alexis Metaireau
+- 3 Bryan Silverthorn
+- 3 `Andreas Müller`_
+- 2 Minwoo Jake Lee
+- 1 Emmanuelle Gouillart
+- 1 Keith Goodman
+- 1 Lucas Wiman
+- 1 `Nicolas Pinto`_
+- 1 Thouis (Ray) Jones
+- 1 Tim Sheerman-Chase
+
+
+.. _changes_0_8:
+
+Version 0.8
+===========
+
+**May 11, 2011**
+
+scikit-learn 0.8 was released on May 2011, one month after the first
+"international" `scikit-learn coding sprint
+`_ and is
+marked by the inclusion of important modules: :ref:`hierarchical_clustering`,
+:ref:`cross_decomposition`, :ref:`NMF`, initial support for Python 3 and by important
+enhancements and bug fixes.
+
+
+Changelog
+---------
+
+Several new modules where introduced during this release:
+
+- New :ref:`hierarchical_clustering` module by Vincent Michel,
+ `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
+
+- :ref:`kernel_pca` implementation by `Mathieu Blondel`_
+
+- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
+
+- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
+
+- :ref:`NMF` module `Vlad Niculae`_
+
+- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
+ :user:`Virgile Fritsch ` in the :ref:`covariance` module.
+
+
+Some other modules benefited from significant improvements or cleanups.
+
+
+- Initial support for Python 3: builds and imports cleanly,
+ some modules are usable while others have failing tests by `Fabian Pedregosa`_.
+
+- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
+
+- Guide :ref:`performance-howto` by `Olivier Grisel`_.
+
+- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
+
+- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
+
+- Add attribute converged to Gaussian Mixture Models by Vincent Schut.
+
+- Implemented ``transform``, ``predict_log_proba`` in
+ :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
+
+- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
+ `Gael Varoquaux`_ and Amit Aides.
+
+- Refactored SGD module (removed code duplication, better variable naming),
+ added interface for sample weight by `Peter Prettenhofer`_.
+
+- Wrapped BallTree with Cython by Thouis (Ray) Jones.
+
+- Added function :func:`svm.l1_min_c` by Paolo Losi.
+
+- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
+ `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
+ `Fabian Pedregosa`_.
+
+
+People
+-------
+
+People that made this release possible preceded by number of commits:
+
+
+- 159 `Olivier Grisel`_
+- 96 `Gael Varoquaux`_
+- 96 `Vlad Niculae`_
+- 94 `Fabian Pedregosa`_
+- 36 `Alexandre Gramfort`_
+- 32 Paolo Losi
+- 31 `Edouard Duchesnay`_
+- 30 `Mathieu Blondel`_
+- 25 `Peter Prettenhofer`_
+- 22 `Nicolas Pinto`_
+- 11 :user:`Virgile Fritsch `
+ - 7 Lars Buitinck
+ - 6 Vincent Michel
+ - 5 `Bertrand Thirion`_
+ - 4 Thouis (Ray) Jones
+ - 4 Vincent Schut
+ - 3 Jan Schlüter
+ - 2 Julien Miotte
+ - 2 `Matthieu Perrot`_
+ - 2 Yann Malet
+ - 2 `Yaroslav Halchenko`_
+ - 1 Amit Aides
+ - 1 `Andreas Müller`_
+ - 1 Feth Arezki
+ - 1 Meng Xinfan
+
+
+.. _changes_0_7:
+
+Version 0.7
+===========
+
+**March 2, 2011**
+
+scikit-learn 0.7 was released in March 2011, roughly three months
+after the 0.6 release. This release is marked by the speed
+improvements in existing algorithms like k-Nearest Neighbors and
+K-Means algorithm and by the inclusion of an efficient algorithm for
+computing the Ridge Generalized Cross Validation solution. Unlike the
+preceding release, no new modules where added to this release.
+
+Changelog
+---------
+
+- Performance improvements for Gaussian Mixture Model sampling [Jan
+ Schlüter].
+
+- Implementation of efficient leave-one-out cross-validated Ridge in
+ :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
+
+- Better handling of collinearity and early stopping in
+ :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
+ Pedregosa`_].
+
+- Fixes for liblinear ordering of labels and sign of coefficients
+ [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
+
+- Performance improvements for Nearest Neighbors algorithm in
+ high-dimensional spaces [`Fabian Pedregosa`_].
+
+- Performance improvements for :class:`cluster.KMeans` [`Gael
+ Varoquaux`_ and `James Bergstra`_].
+
+- Sanity checks for SVM-based classes [`Mathieu Blondel`_].
+
+- Refactoring of :class:`neighbors.NeighborsClassifier` and
+ :func:`neighbors.kneighbors_graph`: added different algorithms for
+ the k-Nearest Neighbor Search and implemented a more stable
+ algorithm for finding barycenter weights. Also added some
+ developer documentation for this module, see
+ `notes_neighbors
+ `_ for more information [`Fabian Pedregosa`_].
+
+- Documentation improvements: Added :class:`pca.RandomizedPCA` and
+ :class:`linear_model.LogisticRegression` to the class
+ reference. Also added references of matrices used for clustering
+ and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
+ Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
+ Gouillart]
+
+- Binded decision_function in classes that make use of liblinear_,
+ dense and sparse variants, like :class:`svm.LinearSVC` or
+ :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
+
+- Performance and API improvements to
+ :func:`metrics.euclidean_distances` and to
+ :class:`pca.RandomizedPCA` [`James Bergstra`_].
+
+- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
+
+- Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
+ [`Ron Weiss`_].
+
+- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+- 85 `Fabian Pedregosa`_
+- 67 `Mathieu Blondel`_
+- 20 `Alexandre Gramfort`_
+- 19 `James Bergstra`_
+- 14 Dan Yamins
+- 13 `Olivier Grisel`_
+- 12 `Gael Varoquaux`_
+- 4 `Edouard Duchesnay`_
+- 4 `Ron Weiss`_
+- 2 Satrajit Ghosh
+- 2 Vincent Dubourg
+- 1 Emmanuelle Gouillart
+- 1 Kamel Ibn Hassen Derouiche
+- 1 Paolo Losi
+- 1 VirgileFritsch
+- 1 `Yaroslav Halchenko`_
+- 1 Xinfan Meng
+
+
+.. _changes_0_6:
+
+Version 0.6
+===========
+
+**December 21, 2010**
+
+scikit-learn 0.6 was released on December 2010. It is marked by the
+inclusion of several new modules and a general renaming of old
+ones. It is also marked by the inclusion of new example, including
+applications to real-world datasets.
+
+
+Changelog
+---------
+
+- New `stochastic gradient
+ `_ descent
+ module by Peter Prettenhofer. The module comes with complete
+ documentation and examples.
+
+- Improved svm module: memory consumption has been reduced by 50%,
+ heuristic to automatically set class weights, possibility to
+ assign weights to samples (see
+ :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
+
+- New :ref:`gaussian_process` module by Vincent Dubourg. This module
+ also has great documentation and some very neat examples. See
+ example_gaussian_process_plot_gp_regression.py or
+ example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
+ for a taste of what can be done.
+
+- It is now possible to use liblinear’s Multi-class SVC (option
+ multi_class in :class:`svm.LinearSVC`)
+
+- New features and performance improvements of text feature
+ extraction.
+
+- Improved sparse matrix support, both in main classes
+ (:class:`grid_search.GridSearchCV`) as in modules
+ sklearn.svm.sparse and sklearn.linear_model.sparse.
+
+- Lots of cool new examples and a new section that uses real-world
+ datasets was created. These include:
+ :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
+ :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
+ :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
+ :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
+ others.
+
+- Faster :ref:`least_angle_regression` algorithm. It is now 2x
+ faster than the R version on worst case and up to 10x times faster
+ on some cases.
+
+- Faster coordinate descent algorithm. In particular, the full path
+ version of lasso (:func:`linear_model.lasso_path`) is more than
+ 200x times faster than before.
+
+- It is now possible to get probability estimates from a
+ :class:`linear_model.LogisticRegression` model.
+
+- module renaming: the glm module has been renamed to linear_model,
+ the gmm module has been included into the more general mixture
+ model and the sgd module has been included in linear_model.
+
+- Lots of bug fixes and documentation improvements.
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+ * 207 `Olivier Grisel`_
+
+ * 167 `Fabian Pedregosa`_
+
+ * 97 `Peter Prettenhofer`_
+
+ * 68 `Alexandre Gramfort`_
+
+ * 59 `Mathieu Blondel`_
+
+ * 55 `Gael Varoquaux`_
+
+ * 33 Vincent Dubourg
+
+ * 21 `Ron Weiss`_
+
+ * 9 Bertrand Thirion
+
+ * 3 `Alexandre Passos`_
+
+ * 3 Anne-Laure Fouque
+
+ * 2 Ronan Amicel
+
+ * 1 `Christian Osendorfer`_
+
+
+
+.. _changes_0_5:
+
+
+Version 0.5
+===========
+
+**October 11, 2010**
+
+Changelog
+---------
+
+New classes
+-----------
+
+- Support for sparse matrices in some classifiers of modules
+ ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
+ :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
+ :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
+
+- New :class:`pipeline.Pipeline` object to compose different estimators.
+
+- Recursive Feature Elimination routines in module
+ :ref:`feature_selection`.
+
+- Addition of various classes capable of cross validation in the
+ linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
+ etc.).
+
+- New, more efficient LARS algorithm implementation. The Lasso
+ variant of the algorithm is also implemented. See
+ :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
+ :class:`linear_model.LassoLars`.
+
+- New Hidden Markov Models module (see classes
+ :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
+ :class:`hmm.GMMHMM`)
+
+- New module feature_extraction (see :ref:`class reference
+ `)
+
+- New FastICA algorithm in module sklearn.fastica
+
+
+Documentation
+-------------
+
+- Improved documentation for many modules, now separating
+ narrative documentation from the class reference. As an example,
+ see `documentation for the SVM module
+ `_ and the
+ complete `class reference
+ `_.
+
+Fixes
+-----
+
+- API changes: adhere variable names to PEP-8, give more
+ meaningful names.
+
+- Fixes for svm module to run on a shared memory context
+ (multiprocessing).
+
+- It is again possible to generate latex (and thus PDF) from the
+ sphinx docs.
+
+Examples
+--------
+
+- new examples using some of the mlcomp datasets:
+ ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
+ :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
+
+- Many more examples. `See here
+ `_
+ the full list of examples.
+
+
+External dependencies
+---------------------
+
+- Joblib is now a dependency of this package, although it is
+ shipped with (sklearn.externals.joblib).
+
+Removed modules
+---------------
+
+- Module ann (Artificial Neural Networks) has been removed from
+ the distribution. Users wanting this sort of algorithms should
+ take a look into pybrain.
+
+Misc
+----
+
+- New sphinx theme for the web page.
+
+
+Authors
+-------
+
+The following is a list of authors for this release, preceded by
+number of commits:
+
+ * 262 Fabian Pedregosa
+ * 240 Gael Varoquaux
+ * 149 Alexandre Gramfort
+ * 116 Olivier Grisel
+ * 40 Vincent Michel
+ * 38 Ron Weiss
+ * 23 Matthieu Perrot
+ * 10 Bertrand Thirion
+ * 7 Yaroslav Halchenko
+ * 9 VirgileFritsch
+ * 6 Edouard Duchesnay
+ * 4 Mathieu Blondel
+ * 1 Ariel Rokem
+ * 1 Matthieu Brucher
+
+Version 0.4
+===========
+
+**August 26, 2010**
+
+Changelog
+---------
+
+Major changes in this release include:
+
+- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
+ speed improvements (roughly 100x times faster).
+
+- Coordinate Descent Refactoring (and bug fixing) for consistency
+ with R's package GLMNET.
+
+- New metrics module.
+
+- New GMM module contributed by Ron Weiss.
+
+- Implementation of the LARS algorithm (without Lasso variant for now).
+
+- feature_selection module redesign.
+
+- Migration to GIT as version control system.
+
+- Removal of obsolete attrselect module.
+
+- Rename of private compiled extensions (added underscore).
+
+- Removal of legacy unmaintained code.
+
+- Documentation improvements (both docstring and rst).
+
+- Improvement of the build system to (optionally) link with MKL.
+ Also, provide a lite BLAS implementation in case no system-wide BLAS is
+ found.
+
+- Lots of new examples.
+
+- Many, many bug fixes ...
+
+
+Authors
+-------
+
+The committer list for this release is the following (preceded by number
+of commits):
+
+ * 143 Fabian Pedregosa
+ * 35 Alexandre Gramfort
+ * 34 Olivier Grisel
+ * 11 Gael Varoquaux
+ * 5 Yaroslav Halchenko
+ * 2 Vincent Michel
+ * 1 Chris Filo Gorgolewski
+
+
+Earlier versions
+================
+
+Earlier versions included contributions by Fred Mailhot, David Cooke,
+David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
+
diff --git a/doc/whats_new/v0.13.rst b/doc/whats_new/v0.13.rst
new file mode 100644
index 0000000000000..c234cd6eb2a37
--- /dev/null
+++ b/doc/whats_new/v0.13.rst
@@ -0,0 +1,391 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_13_1:
+
+Version 0.13.1
+==============
+
+**February 23, 2013**
+
+The 0.13.1 release only fixes some bugs and does not add any new functionality.
+
+Changelog
+---------
+
+- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
+ interpreted as a test by `Yaroslav Halchenko`_.
+
+- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
+ by `Gael Varoquaux`_.
+
+- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
+
+- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
+
+- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
+
+- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
+
+- Other small improvements to tests and documentation.
+
+People
+------
+List of contributors for release 0.13.1 by number of commits.
+ * 16 `Lars Buitinck`_
+ * 12 `Andreas Müller`_
+ * 8 `Gael Varoquaux`_
+ * 5 Robert Marchman
+ * 3 `Peter Prettenhofer`_
+ * 2 Hrishikesh Huilgolkar
+ * 1 Bastiaan van den Berg
+ * 1 Diego Molla
+ * 1 `Gilles Louppe`_
+ * 1 `Mathieu Blondel`_
+ * 1 `Nelle Varoquaux`_
+ * 1 Rafael Cunha de Almeida
+ * 1 Rolando Espinoza La fuente
+ * 1 `Vlad Niculae`_
+ * 1 `Yaroslav Halchenko`_
+
+
+.. _changes_0_13:
+
+Version 0.13
+============
+
+**January 21, 2013**
+
+New Estimator Classes
+---------------------
+
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
+ data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
+ your estimators. See :ref:`dummy_estimators` in the user guide.
+ Multioutput support added by `Arnaud Joly`_.
+
+- :class:`decomposition.FactorAnalysis`, a transformer implementing the
+ classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
+ Gramfort`_. See :ref:`FA` in the user guide.
+
+- :class:`feature_extraction.FeatureHasher`, a transformer implementing the
+ "hashing trick" for fast, low-memory feature extraction from string fields
+ by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
+ for text documents by `Olivier Grisel`_ See :ref:`feature_hashing` and
+ :ref:`hashing_vectorizer` for the documentation and sample usage.
+
+- :class:`pipeline.FeatureUnion`, a transformer that concatenates
+ results of several other transformers by `Andreas Müller`_. See
+ :ref:`feature_union` in the user guide.
+
+- :class:`random_projection.GaussianRandomProjection`,
+ :class:`random_projection.SparseRandomProjection` and the function
+ :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
+ transformers implementing Gaussian and sparse random projection matrix
+ by `Olivier Grisel`_ and `Arnaud Joly`_.
+ See :ref:`random_projection` in the user guide.
+
+- :class:`kernel_approximation.Nystroem`, a transformer for approximating
+ arbitrary kernels by `Andreas Müller`_. See
+ :ref:`nystroem_kernel_approx` in the user guide.
+
+- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
+ encodings of categorical features by `Andreas Müller`_. See
+ :ref:`preprocessing_categorical_features` in the user guide.
+
+- :class:`linear_model.PassiveAggressiveClassifier` and
+ :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
+ an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
+ `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
+ guide.
+
+- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
+ sparse representations using ensembles of totally random trees by `Andreas Müller`_.
+ See :ref:`random_trees_embedding` in the user guide.
+
+- :class:`manifold.SpectralEmbedding` and function
+ :func:`manifold.spectral_embedding`, implementing the "laplacian
+ eigenmaps" transformation for non-linear dimensionality reduction by Wei
+ Li. See :ref:`spectral_embedding` in the user guide.
+
+- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
+ and `Nelle Varoquaux`_,
+
+
+Changelog
+---------
+
+- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
+ option for normalized output that reports the fraction of
+ misclassifications, rather than the raw number of misclassifications. By
+ Kyle Beauchamp.
+
+- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
+ support sample weighting, by `Noel Dawe`_ and `Gilles Louppe`_.
+
+- Speedup improvement when using bootstrap samples in forests of randomized
+ trees, by `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Partial dependence plots for :ref:`gradient_boosting` in
+ :func:`ensemble.partial_dependence.partial_dependence` by `Peter
+ Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
+ example.
+
+- The table of contents on the website has now been made expandable by
+ `Jaques Grobler`_.
+
+- :class:`feature_selection.SelectPercentile` now breaks ties
+ deterministically instead of returning all equally ranked features.
+
+- :class:`feature_selection.SelectKBest` and
+ :class:`feature_selection.SelectPercentile` are more numerically stable
+ since they use scores, rather than p-values, to rank results. This means
+ that they might sometimes select different features than they did
+ previously.
+
+- Ridge regression and ridge classification fitting with ``sparse_cg`` solver
+ no longer has quadratic memory complexity, by `Lars Buitinck`_ and
+ `Fabian Pedregosa`_.
+
+- Ridge regression and ridge classification now support a new fast solver
+ called ``lsqr``, by `Mathieu Blondel`_.
+
+- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
+
+- Added support for reading/writing svmlight files with pairwise
+ preference attribute (qid in svmlight file format) in
+ :func:`datasets.dump_svmlight_file` and
+ :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
+
+- Faster and more robust :func:`metrics.confusion_matrix` and
+ :ref:`clustering_evaluation` by Wei Li.
+
+- :func:`cross_validation.cross_val_score` now works with precomputed kernels
+ and affinity matrices, by `Andreas Müller`_.
+
+- LARS algorithm made more numerically stable with heuristics to drop
+ regressors too correlated as well as to stop the path when
+ numerical noise becomes predominant, by `Gael Varoquaux`_.
+
+- Faster implementation of :func:`metrics.precision_recall_curve` by
+ Conrad Lee.
+
+- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
+ in computer vision applications.
+
+- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
+ Shaun Jackman.
+
+- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
+ by Andrew Winterman.
+
+- Improve consistency in gradient boosting: estimators
+ :class:`ensemble.GradientBoostingRegressor` and
+ :class:`ensemble.GradientBoostingClassifier` use the estimator
+ :class:`tree.DecisionTreeRegressor` instead of the
+ :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
+
+- Fixed a floating point exception in the :ref:`decision trees `
+ module, by Seberg.
+
+- Fix :func:`metrics.roc_curve` fails when y_true has only one class
+ by Wei Li.
+
+- Add the :func:`metrics.mean_absolute_error` function which computes the
+ mean absolute error. The :func:`metrics.mean_squared_error`,
+ :func:`metrics.mean_absolute_error` and
+ :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
+
+- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
+ :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
+ of ``class_weight`` was reversed as erroneously higher weight meant less
+ positives of a given class in earlier releases.
+
+- Improve narrative documentation and consistency in
+ :mod:`sklearn.metrics` for regression and classification metrics
+ by `Arnaud Joly`_.
+
+- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
+ unsorted indices by Xinfan Meng and `Andreas Müller`_.
+
+- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
+ with little observations attached to them, by `Gael Varoquaux`_.
+
+
+API changes summary
+-------------------
+- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
+ This applies to :class:`decomposition.DictionaryLearning`,
+ :class:`decomposition.MiniBatchDictionaryLearning`,
+ :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
+
+- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
+ This applies to :class:`semi_supervised.LabelPropagation` and
+ :class:`semi_supervised.label_propagation.LabelSpreading`.
+
+- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
+ consistency in :class:`ensemble.BaseGradientBoosting` and
+ :class:`ensemble.GradientBoostingRegressor`.
+
+- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
+ was already integrated into the "regular" linear models.
+
+- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
+ accumulated error, was removed. Use ``mean_squared_error`` instead.
+
+- Passing ``class_weight`` parameters to ``fit`` methods is no longer
+ supported. Pass them to estimator constructors instead.
+
+- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
+ ``predict`` or ``sample`` methods instead.
+
+- The ``solver`` fit option in Ridge regression and classification is now
+ deprecated and will be removed in v0.14. Use the constructor option
+ instead.
+
+- :class:`feature_extraction.text.DictVectorizer` now returns sparse
+ matrices in the CSR format, instead of COO.
+
+- Renamed ``k`` in :class:`cross_validation.KFold` and
+ :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
+ ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
+
+- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
+ This applies to :class:`cross_validation.ShuffleSplit`,
+ :class:`cross_validation.StratifiedShuffleSplit`,
+ :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
+
+- Replaced ``rho`` in :class:`linear_model.ElasticNet` and
+ :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
+ had different meanings; ``l1_ratio`` was introduced to avoid confusion.
+ It has the same meaning as previously ``rho`` in
+ :class:`linear_model.ElasticNet` and ``(1-rho)`` in
+ :class:`linear_model.SGDClassifier`.
+
+- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
+ store a list of paths in the case of multiple targets, rather than
+ an array of paths.
+
+- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
+ to adhere more strictly with the API.
+
+- :func:`cluster.spectral_embedding` was moved to
+ :func:`manifold.spectral_embedding`.
+
+- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
+ :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
+ to ``eigen_solver``.
+
+- Renamed ``mode`` in :func:`manifold.spectral_embedding` and
+ :class:`cluster.SpectralClustering` to ``eigen_solver``.
+
+- ``classes_`` and ``n_classes_`` attributes of
+ :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
+ now flat in case of single output problems and nested in case of
+ multi-output problems.
+
+- The ``estimators_`` attribute of
+ :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
+ :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
+ array of :class:'tree.DecisionTreeRegressor'.
+
+- Renamed ``chunk_size`` to ``batch_size`` in
+ :class:`decomposition.MiniBatchDictionaryLearning` and
+ :class:`decomposition.MiniBatchSparsePCA` for consistency.
+
+- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
+ attribute and support arbitrary dtypes for labels ``y``.
+ Also, the dtype returned by ``predict`` now reflects the dtype of
+ ``y`` during ``fit`` (used to be ``np.float``).
+
+- Changed default test_size in :func:`cross_validation.train_test_split`
+ to None, added possibility to infer ``test_size`` from ``train_size`` in
+ :class:`cross_validation.ShuffleSplit` and
+ :class:`cross_validation.StratifiedShuffleSplit`.
+
+- Renamed function :func:`sklearn.metrics.zero_one` to
+ :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
+ in :func:`sklearn.metrics.zero_one_loss` is different from
+ :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
+ ``normalize=True``.
+
+- Renamed function :func:`metrics.zero_one_score` to
+ :func:`metrics.accuracy_score`.
+
+- :func:`datasets.make_circles` now has the same number of inner and outer points.
+
+- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
+ from ``fit`` to ``__init__``.
+
+People
+------
+List of contributors for release 0.13 by number of commits.
+
+ * 364 `Andreas Müller`_
+ * 143 `Arnaud Joly`_
+ * 137 `Peter Prettenhofer`_
+ * 131 `Gael Varoquaux`_
+ * 117 `Mathieu Blondel`_
+ * 108 `Lars Buitinck`_
+ * 106 Wei Li
+ * 101 `Olivier Grisel`_
+ * 65 `Vlad Niculae`_
+ * 54 `Gilles Louppe`_
+ * 40 `Jaques Grobler`_
+ * 38 `Alexandre Gramfort`_
+ * 30 `Rob Zinkov`_
+ * 19 Aymeric Masurelle
+ * 18 Andrew Winterman
+ * 17 `Fabian Pedregosa`_
+ * 17 Nelle Varoquaux
+ * 16 `Christian Osendorfer`_
+ * 14 `Daniel Nouri`_
+ * 13 :user:`Virgile Fritsch `
+ * 13 syhw
+ * 12 `Satrajit Ghosh`_
+ * 10 Corey Lynch
+ * 10 Kyle Beauchamp
+ * 9 Brian Cheung
+ * 9 Immanuel Bayer
+ * 9 mr.Shu
+ * 8 Conrad Lee
+ * 8 `James Bergstra`_
+ * 7 Tadej Janež
+ * 6 Brian Cajes
+ * 6 `Jake Vanderplas`_
+ * 6 Michael
+ * 6 Noel Dawe
+ * 6 Tiago Nunes
+ * 6 cow
+ * 5 Anze
+ * 5 Shiqiao Du
+ * 4 Christian Jauvin
+ * 4 Jacques Kvam
+ * 4 Richard T. Guy
+ * 4 `Robert Layton`_
+ * 3 Alexandre Abraham
+ * 3 Doug Coleman
+ * 3 Scott Dickerson
+ * 2 ApproximateIdentity
+ * 2 John Benediktsson
+ * 2 Mark Veronda
+ * 2 Matti Lyra
+ * 2 Mikhail Korobov
+ * 2 Xinfan Meng
+ * 1 Alejandro Weinstein
+ * 1 `Alexandre Passos`_
+ * 1 Christoph Deil
+ * 1 Eugene Nizhibitsky
+ * 1 Kenneth C. Arnold
+ * 1 Luis Pedro Coelho
+ * 1 Miroslav Batchkarov
+ * 1 Pavel
+ * 1 Sebastian Berg
+ * 1 Shaun Jackman
+ * 1 Subhodeep Moitra
+ * 1 bob
+ * 1 dengemann
+ * 1 emanuele
+ * 1 x006
+
diff --git a/doc/whats_new/v0.14.rst b/doc/whats_new/v0.14.rst
new file mode 100644
index 0000000000000..2b0456593e613
--- /dev/null
+++ b/doc/whats_new/v0.14.rst
@@ -0,0 +1,389 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_14:
+
+Version 0.14
+===============
+
+**August 7, 2013**
+
+Changelog
+---------
+
+- Missing values with sparse and dense matrices can be imputed with the
+ transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
+
+- The core implementation of decisions trees has been rewritten from
+ scratch, allowing for faster tree induction and lower memory
+ consumption in all tree-based estimators. By `Gilles Louppe`_.
+
+- Added :class:`ensemble.AdaBoostClassifier` and
+ :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_ and
+ `Gilles Louppe`_. See the :ref:`AdaBoost ` section of the user
+ guide for details and examples.
+
+- Added :class:`grid_search.RandomizedSearchCV` and
+ :class:`grid_search.ParameterSampler` for randomized hyperparameter
+ optimization. By `Andreas Müller`_.
+
+- Added :ref:`biclustering ` algorithms
+ (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
+ :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
+ generation methods (:func:`sklearn.datasets.make_biclusters` and
+ :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
+ (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
+
+- Added :ref:`Restricted Boltzmann Machines`
+ (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
+
+- Python 3 support by :user:`Justin Vincent `, `Lars Buitinck`_,
+ :user:`Subhodeep Moitra ` and `Olivier Grisel`_. All tests now pass under
+ Python 3.3.
+
+- Ability to pass one penalty (alpha value) per target in
+ :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
+
+- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
+ issue (minor practical significance).
+ By :user:`Norbert Crombach ` and `Mathieu Blondel`_ .
+
+- Added an interactive version of `Andreas Müller`_'s
+ `Machine Learning Cheat Sheet (for scikit-learn)
+ `_
+ to the documentation. See :ref:`Choosing the right estimator `.
+ By `Jaques Grobler`_.
+
+- :class:`grid_search.GridSearchCV` and
+ :func:`cross_validation.cross_val_score` now support the use of advanced
+ scoring function such as area under the ROC curve and f-beta scores.
+ See :ref:`scoring_parameter` for details. By `Andreas Müller`_
+ and `Lars Buitinck`_.
+ Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
+ deprecated.
+
+- Multi-label classification output is now supported by
+ :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
+ :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
+ :func:`metrics.classification_report`,
+ :func:`metrics.precision_score` and :func:`metrics.recall_score`
+ by `Arnaud Joly`_.
+
+- Two new metrics :func:`metrics.hamming_loss` and
+ :func:`metrics.jaccard_similarity_score`
+ are added with multi-label support by `Arnaud Joly`_.
+
+- Speed and memory usage improvements in
+ :class:`feature_extraction.text.CountVectorizer` and
+ :class:`feature_extraction.text.TfidfVectorizer`,
+ by Jochen Wersdörfer and Roman Sinayev.
+
+- The ``min_df`` parameter in
+ :class:`feature_extraction.text.CountVectorizer` and
+ :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
+ has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
+ for novice users who try it out on tiny document collections.
+ A value of at least 2 is still recommended for practical use.
+
+- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
+ :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
+ converts their ``coef_`` into a sparse matrix, meaning stored models
+ trained using these estimators can be made much more compact.
+
+- :class:`linear_model.SGDClassifier` now produces multiclass probability
+ estimates when trained under log loss or modified Huber loss.
+
+- Hyperlinks to documentation in example code on the website by
+ :user:`Martin Luessi `.
+
+- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
+ of the features for non-default ``feature_range`` settings. By `Andreas
+ Müller`_.
+
+- ``max_features`` in :class:`tree.DecisionTreeClassifier`,
+ :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+ now supports percentage values. By `Gilles Louppe`_.
+
+- Performance improvements in :class:`isotonic.IsotonicRegression` by
+ `Nelle Varoquaux`_.
+
+- :func:`metrics.accuracy_score` has an option normalize to return
+ the fraction or the number of correctly classified sample
+ by `Arnaud Joly`_.
+
+- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
+ loss. By Jochen Wersdörfer and `Lars Buitinck`_.
+
+- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
+ incorrect probabilities has been fixed.
+
+- Feature selectors now share a mixin providing consistent ``transform``,
+ ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
+
+- A fitted :class:`grid_search.GridSearchCV` or
+ :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
+ By `Joel Nothman`_.
+
+- Refactored and vectorized implementation of :func:`metrics.roc_curve`
+ and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
+
+- The new estimator :class:`sklearn.decomposition.TruncatedSVD`
+ performs dimensionality reduction using SVD on sparse matrices,
+ and can be used for latent semantic analysis (LSA).
+ By `Lars Buitinck`_.
+
+- Added self-contained example of out-of-core learning on text data
+ :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
+ By :user:`Eustache Diemert `.
+
+- The default number of components for
+ :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
+ to be ``n_features``. This was the default behavior, so programs using it
+ will continue to work as they did.
+
+- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
+ faster on sparse data (the speedup depends on the sparsity). By
+ `Lars Buitinck`_.
+
+- Reduce memory footprint of FastICA by `Denis Engemann`_ and
+ `Alexandre Gramfort`_.
+
+- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
+ a column format and prints progress in decreasing frequency.
+ It also shows the remaining time. By `Peter Prettenhofer`_.
+
+- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
+ :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
+ rather than the OOB score for model selection. An example that shows
+ how to use OOB estimates to select the number of trees was added.
+ By `Peter Prettenhofer`_.
+
+- Most metrics now support string labels for multiclass classification
+ by `Arnaud Joly`_ and `Lars Buitinck`_.
+
+- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
+ and `Vlad Niculae`_.
+
+- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
+ 'alphas' parameter now works as expected when given a list of
+ values. By Philippe Gervais.
+
+- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
+ that prevented all folds provided by a CV object to be used (only
+ the first 3 were used). When providing a CV object, execution
+ time may thus increase significantly compared to the previous
+ version (bug results are correct now). By Philippe Gervais.
+
+- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
+ module is now tested with multi-output data by `Arnaud Joly`_.
+
+- :func:`datasets.make_multilabel_classification` can now return
+ the output in label indicator multilabel format by `Arnaud Joly`_.
+
+- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
+ and :class:`neighbors.RadiusNeighborsRegressor`,
+ and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
+ :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
+ by `Arnaud Joly`_.
+
+- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
+ :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
+ controlled. This is useful to ensure consistency in the probability
+ estimates for the classifiers trained with ``probability=True``. By
+ `Vlad Niculae`_.
+
+- Out-of-core learning support for discrete naive Bayes classifiers
+ :class:`sklearn.naive_bayes.MultinomialNB` and
+ :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
+ method by `Olivier Grisel`_.
+
+- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
+ Vincent Michel and `Andreas Müller`_.
+
+- Improved documentation on :ref:`multi-class, multi-label and multi-output
+ classification ` by `Yannick Schwartz`_ and `Arnaud Joly`_.
+
+- Better input and error handling in the :mod:`metrics` module by
+ `Arnaud Joly`_ and `Joel Nothman`_.
+
+- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov `
+
+- Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
+ by `cleverless `_
+
+
+API changes summary
+-------------------
+
+- The :func:`auc_score` was renamed :func:`roc_auc_score`.
+
+- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
+ ``nosetests sklearn`` from the command line.
+
+- Feature importances in :class:`tree.DecisionTreeClassifier`,
+ :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+ are now computed on the fly when accessing the ``feature_importances_``
+ attribute. Setting ``compute_importances=True`` is no longer required.
+ By `Gilles Louppe`_.
+
+- :class:`linear_model.lasso_path` and
+ :class:`linear_model.enet_path` can return its results in the same
+ format as that of :class:`linear_model.lars_path`. This is done by
+ setting the ``return_models`` parameter to ``False``. By
+ `Jaques Grobler`_ and `Alexandre Gramfort`_
+
+- :class:`grid_search.IterGrid` was renamed to
+ :class:`grid_search.ParameterGrid`.
+
+- Fixed bug in :class:`KFold` causing imperfect class balance in some
+ cases. By `Alexandre Gramfort`_ and Tadej Janež.
+
+- :class:`sklearn.neighbors.BallTree` has been refactored, and a
+ :class:`sklearn.neighbors.KDTree` has been
+ added which shares the same interface. The Ball Tree now works with
+ a wide variety of distance metrics. Both classes have many new
+ methods, including single-tree and dual-tree queries, breadth-first
+ and depth-first searching, and more advanced queries such as
+ kernel density estimation and 2-point correlation functions.
+ By `Jake Vanderplas`_
+
+- Support for scipy.spatial.cKDTree within neighbors queries has been
+ removed, and the functionality replaced with the new :class:`KDTree`
+ class.
+
+- :class:`sklearn.neighbors.KernelDensity` has been added, which performs
+ efficient kernel density estimation with a variety of kernels.
+
+- :class:`sklearn.decomposition.KernelPCA` now always returns output with
+ ``n_components`` components, unless the new parameter ``remove_zero_eig``
+ is set to ``True``. This new behavior is consistent with the way
+ kernel PCA was always documented; previously, the removal of components
+ with zero eigenvalues was tacitly performed on all data.
+
+- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
+ sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
+
+- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
+ is now deprecated in favor of the new ``TruncatedSVD``.
+
+- :class:`cross_validation.KFold` and
+ :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
+ otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
+
+- :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
+ parameters were renamed ``encoding`` and ``decode_errors``.
+
+- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
+ and :class:`sklearn.ensemble.GradientBoostingClassifier`
+ is deprecated and has been replaced by ``oob_improvement_`` .
+
+- Attributes in OrthogonalMatchingPursuit have been deprecated
+ (copy_X, Gram, ...) and precompute_gram renamed precompute
+ for consistency. See #2224.
+
+- :class:`sklearn.preprocessing.StandardScaler` now converts integer input
+ to float, and raises a warning. Previously it rounded for dense integer
+ input.
+
+- :class:`sklearn.multiclass.OneVsRestClassifier` now has a
+ ``decision_function`` method. This will return the distance of each
+ sample from the decision boundary for each class, as long as the
+ underlying estimators implement the ``decision_function`` method.
+ By `Kyle Kastner`_.
+
+- Better input validation, warning on unexpected shapes for y.
+
+People
+------
+List of contributors for release 0.14 by number of commits.
+
+ * 277 Gilles Louppe
+ * 245 Lars Buitinck
+ * 187 Andreas Mueller
+ * 124 Arnaud Joly
+ * 112 Jaques Grobler
+ * 109 Gael Varoquaux
+ * 107 Olivier Grisel
+ * 102 Noel Dawe
+ * 99 Kemal Eren
+ * 79 Joel Nothman
+ * 75 Jake VanderPlas
+ * 73 Nelle Varoquaux
+ * 71 Vlad Niculae
+ * 65 Peter Prettenhofer
+ * 64 Alexandre Gramfort
+ * 54 Mathieu Blondel
+ * 38 Nicolas Trésegnie
+ * 35 eustache
+ * 27 Denis Engemann
+ * 25 Yann N. Dauphin
+ * 19 Justin Vincent
+ * 17 Robert Layton
+ * 15 Doug Coleman
+ * 14 Michael Eickenberg
+ * 13 Robert Marchman
+ * 11 Fabian Pedregosa
+ * 11 Philippe Gervais
+ * 10 Jim Holmström
+ * 10 Tadej Janež
+ * 10 syhw
+ * 9 Mikhail Korobov
+ * 9 Steven De Gryze
+ * 8 sergeyf
+ * 7 Ben Root
+ * 7 Hrishikesh Huilgolkar
+ * 6 Kyle Kastner
+ * 6 Martin Luessi
+ * 6 Rob Speer
+ * 5 Federico Vaggi
+ * 5 Raul Garreta
+ * 5 Rob Zinkov
+ * 4 Ken Geis
+ * 3 A. Flaxman
+ * 3 Denton Cockburn
+ * 3 Dougal Sutherland
+ * 3 Ian Ozsvald
+ * 3 Johannes Schönberger
+ * 3 Robert McGibbon
+ * 3 Roman Sinayev
+ * 3 Szabo Roland
+ * 2 Diego Molla
+ * 2 Imran Haque
+ * 2 Jochen Wersdörfer
+ * 2 Sergey Karayev
+ * 2 Yannick Schwartz
+ * 2 jamestwebber
+ * 1 Abhijeet Kolhe
+ * 1 Alexander Fabisch
+ * 1 Bastiaan van den Berg
+ * 1 Benjamin Peterson
+ * 1 Daniel Velkov
+ * 1 Fazlul Shahriar
+ * 1 Felix Brockherde
+ * 1 Félix-Antoine Fortin
+ * 1 Harikrishnan S
+ * 1 Jack Hale
+ * 1 JakeMick
+ * 1 James McDermott
+ * 1 John Benediktsson
+ * 1 John Zwinck
+ * 1 Joshua Vredevoogd
+ * 1 Justin Pati
+ * 1 Kevin Hughes
+ * 1 Kyle Kelley
+ * 1 Matthias Ekman
+ * 1 Miroslav Shubernetskiy
+ * 1 Naoki Orii
+ * 1 Norbert Crombach
+ * 1 Rafael Cunha de Almeida
+ * 1 Rolando Espinoza La fuente
+ * 1 Seamus Abshere
+ * 1 Sergey Feldman
+ * 1 Sergio Medina
+ * 1 Stefano Lattarini
+ * 1 Steve Koch
+ * 1 Sturla Molden
+ * 1 Thomas Jarosch
+ * 1 Yaroslav Halchenko
+
diff --git a/doc/whats_new/v0.15.rst b/doc/whats_new/v0.15.rst
new file mode 100644
index 0000000000000..a2eafc63b0617
--- /dev/null
+++ b/doc/whats_new/v0.15.rst
@@ -0,0 +1,623 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_15_2:
+
+Version 0.15.2
+==============
+
+**September 4, 2014**
+
+Bug fixes
+---------
+
+- Fixed handling of the ``p`` parameter of the Minkowski distance that was
+ previously ignored in nearest neighbors models. By :user:`Nikolay
+ Mayorov `.
+
+- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
+ stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
+
+- Fixed the build under Windows when scikit-learn is built with MSVC while
+ NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
+ Vaggi `.
+
+- Fixed an array index overflow bug in the coordinate descent solver. By
+ `Gael Varoquaux`_.
+
+- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
+
+- Removed unnecessary data copy in :class:`cluster.KMeans`.
+ By `Gael Varoquaux`_.
+
+- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
+ By Calvin Giles.
+
+- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+ now projects the input on the most discriminant directions. By Martin Billinger.
+
+- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
+
+- Performance optimization in :class:`isotonic.IsotonicRegression`.
+ By Robert Bradshaw.
+
+- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
+ running the tests. By `Joel Nothman`_.
+
+- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
+ :user:`Matt Pico `, and others.
+
+.. _changes_0_15_1:
+
+Version 0.15.1
+==============
+
+**August 1, 2014**
+
+Bug fixes
+---------
+
+- Made :func:`cross_validation.cross_val_score` use
+ :class:`cross_validation.KFold` instead of
+ :class:`cross_validation.StratifiedKFold` on multi-output classification
+ problems. By :user:`Nikolay Mayorov `.
+
+- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
+ the default behavior of 0.14.1 for backward compatibility. By
+ :user:`Hamzeh Alsalhi `.
+
+- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
+ convergence detection. By Edward Raff and `Gael Varoquaux`_.
+
+- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
+ in case of ties at the per-class vote level by computing the correct
+ per-class sum of prediction scores. By `Andreas Müller`_.
+
+- Made :func:`cross_validation.cross_val_score` and
+ :class:`grid_search.GridSearchCV` accept Python lists as input data.
+ This is especially useful for cross-validation and model selection of
+ text processing pipelines. By `Andreas Müller`_.
+
+- Fixed data input checks of most estimators to accept input data that
+ implements the NumPy ``__array__`` protocol. This is the case for
+ for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
+ pandas. By `Gael Varoquaux`_.
+
+- Fixed a regression for :class:`linear_model.SGDClassifier` with
+ ``class_weight="auto"`` on data with non-contiguous labels. By
+ `Olivier Grisel`_.
+
+
+.. _changes_0_15:
+
+Version 0.15
+============
+
+**July 15, 2014**
+
+Highlights
+-----------
+
+- Many speed and memory improvements all across the code
+
+- Huge speed and memory improvements to random forests (and extra
+ trees) that also benefit better from parallel computing.
+
+- Incremental fit to :class:`BernoulliRBM `
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+ agglomerative clustering with average linkage, complete linkage and
+ ward strategies.
+
+- Added :class:`linear_model.RANSACRegressor` for robust regression
+ models.
+
+- Added dimensionality reduction with :class:`manifold.TSNE` which can be
+ used to visualize high-dimensional data.
+
+
+Changelog
+---------
+
+New features
+............
+
+- Added :class:`ensemble.BaggingClassifier` and
+ :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
+ any kind of base estimator. See the :ref:`Bagging ` section of
+ the user guide for details and examples. By `Gilles Louppe`_.
+
+- New unsupervised feature selection algorithm
+ :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
+
+- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
+ fitting of regression models. By :user:`Johannes Schönberger `.
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+ agglomerative clustering with average linkage, complete linkage and
+ ward strategies, by `Nelle Varoquaux`_ and `Gael Varoquaux`_.
+
+- Shorthand constructors :func:`pipeline.make_pipeline` and
+ :func:`pipeline.make_union` were added by `Lars Buitinck`_.
+
+- Shuffle option for :class:`cross_validation.StratifiedKFold`.
+ By :user:`Jeffrey Blackburne `.
+
+- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
+ Imran Haque.
+
+- Added ``partial_fit`` to :class:`BernoulliRBM
+ `
+ By :user:`Danny Sullivan `.
+
+- Added :func:`learning_curve ` utility to
+ chart performance with respect to training size. See
+ :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
+
+- Add positive option in :class:`LassoCV ` and
+ :class:`ElasticNetCV `.
+ By Brian Wignall and `Alexandre Gramfort`_.
+
+- Added :class:`linear_model.MultiTaskElasticNetCV` and
+ :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
+
+- Added :class:`manifold.TSNE`. By Alexander Fabisch.
+
+Enhancements
+............
+
+- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
+ :class:`ensemble.AdaBoostRegressor` meta-estimators.
+ By :user:`Hamzeh Alsalhi `.
+
+- Memory improvements of decision trees, by `Arnaud Joly`_.
+
+- Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
+ as the stopping criteria. Refactored the tree code to use either a
+ stack or a priority queue for tree building.
+ By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Decision trees can now be fitted on fortran- and c-style arrays, and
+ non-continuous arrays without the need to make a copy.
+ If the input array has a different dtype than ``np.float32``, a fortran-
+ style copy will be made since fortran-style memory layout has speed
+ advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Speed improvement of regression trees by optimizing the
+ the computation of the mean square error criterion. This lead
+ to speed improvement of the tree, forest and gradient boosting tree
+ modules. By `Arnaud Joly`_
+
+- The ``img_to_graph`` and ``grid_tograph`` functions in
+ :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
+ instead of ``np.matrix`` when ``return_as=np.ndarray``. See the
+ Notes section for more information on compatibility.
+
+- Changed the internal storage of decision trees to use a struct array.
+ This fixed some small bugs, while improving code and providing a small
+ speed gain. By `Joel Nothman`_.
+
+- Reduce memory usage and overhead when fitting and predicting with forests
+ of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
+ threading backend of joblib 0.8 and releasing the GIL in the tree fitting
+ Cython code. By `Olivier Grisel`_ and `Gilles Louppe`_.
+
+- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
+ By `Gilles Louppe`_ and `Peter Prettenhofer`_.
+
+- Various enhancements to the :mod:`sklearn.ensemble.gradient_boosting`
+ module: a ``warm_start`` argument to fit additional trees,
+ a ``max_leaf_nodes`` argument to fit GBM style trees,
+ a ``monitor`` fit argument to inspect the estimator during training, and
+ refactoring of the verbose code. By `Peter Prettenhofer`_.
+
+- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
+ By `Arnaud Joly`_.
+
+- Faster depth-based tree building algorithm such as decision tree,
+ random forest, extra trees or gradient tree boosting (with depth based
+ growing strategy) by avoiding trying to split on found constant features
+ in the sample subset. By `Arnaud Joly`_.
+
+- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
+ methods: the minimum weighted fraction of the input samples required to be
+ at a leaf node. By `Noel Dawe`_.
+
+- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
+
+- Added predict method to :class:`cluster.AffinityPropagation` and
+ :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
+
+- Vector and matrix multiplications have been optimised throughout the
+ library by `Denis Engemann`_, and `Alexandre Gramfort`_.
+ In particular, they should take less memory with older NumPy versions
+ (prior to 1.7.2).
+
+- Precision-recall and ROC examples now use train_test_split, and have more
+ explanation of why these metrics are useful. By `Kyle Kastner`_
+
+- The training algorithm for :class:`decomposition.NMF` is faster for
+ sparse matrices and has much lower memory complexity, meaning it will
+ scale up gracefully to large datasets. By `Lars Buitinck`_.
+
+- Added svd_method option with default value to "randomized" to
+ :class:`decomposition.FactorAnalysis` to save memory and
+ significantly speedup computation by `Denis Engemann`_, and
+ `Alexandre Gramfort`_.
+
+- Changed :class:`cross_validation.StratifiedKFold` to try and
+ preserve as much of the original ordering of samples as possible so as
+ not to hide overfitting on datasets with a non-negligible level of
+ samples dependency.
+ By `Daniel Nouri`_ and `Olivier Grisel`_.
+
+- Add multi-output support to :class:`gaussian_process.GaussianProcess`
+ by John Novak.
+
+- Support for precomputed distance matrices in nearest neighbor estimators
+ by `Robert Layton`_ and `Joel Nothman`_.
+
+- Norm computations optimized for NumPy 1.6 and later versions by
+ `Lars Buitinck`_. In particular, the k-means algorithm no longer
+ needs a temporary data structure the size of its input.
+
+- :class:`dummy.DummyClassifier` can now be used to predict a constant
+ output value. By `Manoj Kumar`_.
+
+- :class:`dummy.DummyRegressor` has now a strategy parameter which allows
+ to predict the mean, the median of the training set or a constant
+ output value. By :user:`Maheshakya Wijewardena `.
+
+- Multi-label classification output in multilabel indicator format
+ is now supported by :func:`metrics.roc_auc_score` and
+ :func:`metrics.average_precision_score` by `Arnaud Joly`_.
+
+- Significant performance improvements (more than 100x speedup for
+ large problems) in :class:`isotonic.IsotonicRegression` by
+ `Andrew Tulloch`_.
+
+- Speed and memory usage improvements to the SGD algorithm for linear
+ models: it now uses threads, not separate processes, when ``n_jobs>1``.
+ By `Lars Buitinck`_.
+
+- Grid search and cross validation allow NaNs in the input arrays so that
+ preprocessors such as :class:`preprocessing.Imputer
+ ` can be trained within the cross validation loop,
+ avoiding potentially skewed results.
+
+- Ridge regression can now deal with sample weights in feature space
+ (only sample space until then). By :user:`Michael Eickenberg `.
+ Both solutions are provided by the Cholesky solver.
+
+- Several classification and regression metrics now support weighted
+ samples with the new ``sample_weight`` argument:
+ :func:`metrics.accuracy_score`,
+ :func:`metrics.zero_one_loss`,
+ :func:`metrics.precision_score`,
+ :func:`metrics.average_precision_score`,
+ :func:`metrics.f1_score`,
+ :func:`metrics.fbeta_score`,
+ :func:`metrics.recall_score`,
+ :func:`metrics.roc_auc_score`,
+ :func:`metrics.explained_variance_score`,
+ :func:`metrics.mean_squared_error`,
+ :func:`metrics.mean_absolute_error`,
+ :func:`metrics.r2_score`.
+ By `Noel Dawe`_.
+
+- Speed up of the sample generator
+ :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
+
+Documentation improvements
+...........................
+
+- The :ref:`Working With Text Data ` tutorial
+ has now been worked in to the main documentation's tutorial section.
+ Includes exercises and skeletons for tutorial presentation.
+ Original tutorial created by several authors including
+ `Olivier Grisel`_, Lars Buitinck and many others.
+ Tutorial integration into the scikit-learn documentation
+ by `Jaques Grobler`_
+
+- Added :ref:`Computational Performance `
+ documentation. Discussion and examples of prediction latency / throughput
+ and different factors that have influence over speed. Additional tips for
+ building faster models and choosing a relevant compromise between speed
+ and predictive power.
+ By :user:`Eustache Diemert `.
+
+Bug fixes
+.........
+
+- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
+ ``partial_fit`` was not working properly.
+
+- Fixed bug in :class:`linear_model.stochastic_gradient` :
+ ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
+
+- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
+ labels
+
+- Fixed a bug in :class:`LassoCV ` and
+ :class:`ElasticNetCV `: they would not
+ pre-compute the Gram matrix with ``precompute=True`` or
+ ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
+
+- Fixed incorrect estimation of the degrees of freedom in
+ :func:`feature_selection.f_regression` when variates are not centered.
+ By :user:`Virgile Fritsch `.
+
+- Fixed a race condition in parallel processing with
+ ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
+ By `Olivier Grisel`_.
+
+- Raise error in :class:`cluster.FeatureAgglomeration` and
+ :class:`cluster.WardAgglomeration` when no samples are given,
+ rather than returning meaningless clustering.
+
+- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
+ ``loss='huber'``: ``gamma`` might have not been initialized.
+
+- Fixed feature importances as computed with a forest of randomized trees
+ when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
+ By `Gilles Louppe`_.
+
+API changes summary
+-------------------
+
+- :mod:`sklearn.hmm` is deprecated. Its removal is planned
+ for the 0.17 release.
+
+- Use of :class:`covariance.EllipticEnvelop` has now been removed after
+ deprecation.
+ Please use :class:`covariance.EllipticEnvelope` instead.
+
+- :class:`cluster.Ward` is deprecated. Use
+ :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cluster.WardClustering` is deprecated. Use
+- :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cross_validation.Bootstrap` is deprecated.
+ :class:`cross_validation.KFold` or
+ :class:`cross_validation.ShuffleSplit` are recommended instead.
+
+- Direct support for the sequence of sequences (or list of lists) multilabel
+ format is deprecated. To convert to and from the supported binary
+ indicator matrix format, use
+ :class:`MultiLabelBinarizer `.
+ By `Joel Nothman`_.
+
+- Add score method to :class:`PCA ` following the model of
+ probabilistic PCA and deprecate
+ :class:`ProbabilisticPCA ` model whose
+ score implementation is not correct. The computation now also exploits the
+ matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
+
+- The score method of :class:`FactorAnalysis `
+ now returns the average log-likelihood of the samples. Use score_samples
+ to get log-likelihood of each sample. By `Alexandre Gramfort`_.
+
+- Generating boolean masks (the setting ``indices=False``)
+ from cross-validation generators is deprecated.
+ Support for masks will be removed in 0.17.
+ The generators have produced arrays of indices by default since 0.10.
+ By `Joel Nothman`_.
+
+- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
+ are now considered valid classification targets. This fixes a regression
+ from version 0.13 in some classifiers. By `Joel Nothman`_.
+
+- Fix wrong ``explained_variance_ratio_`` attribute in
+ :class:`RandomizedPCA `.
+ By `Alexandre Gramfort`_.
+
+- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
+ :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
+ This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
+ ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
+ object of length greater than one.
+ By `Manoj Kumar`_.
+
+- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
+ when fitting intercept and input data is sparse. The automatic grid
+ of alphas was not computed correctly and the scaling with normalize
+ was wrong. By `Manoj Kumar`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+ for decision trees, random forests and gradient tree boosting.
+ Previously, the count for the number of drawn features started only after
+ one non constant features in the split. This bug fix will affect
+ computational and generalization performance of those algorithms in the
+ presence of constant features. To get back previous generalization
+ performance, you should modify the value of ``max_features``.
+ By `Arnaud Joly`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+ for :class:`ensemble.ExtraTreesClassifier` and
+ :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
+ features in the split was counted as drawn. Now constant features are
+ counted as drawn. Furthermore at least one feature must be non constant
+ in order to make a valid split. This bug fix will affect
+ computational and generalization performance of extra trees in the
+ presence of constant features. To get back previous generalization
+ performance, you should modify the value of ``max_features``.
+ By `Arnaud Joly`_.
+
+- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
+ Previously it was broken for input of non-integer ``dtype`` and the
+ weighted array that was returned was wrong. By `Manoj Kumar`_.
+
+- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
+ when ``n_train + n_test > n``. By :user:`Ronald Phlypo `.
+
+
+People
+------
+
+List of contributors for release 0.15 by number of commits.
+
+* 312 Olivier Grisel
+* 275 Lars Buitinck
+* 221 Gael Varoquaux
+* 148 Arnaud Joly
+* 134 Johannes Schönberger
+* 119 Gilles Louppe
+* 113 Joel Nothman
+* 111 Alexandre Gramfort
+* 95 Jaques Grobler
+* 89 Denis Engemann
+* 83 Peter Prettenhofer
+* 83 Alexander Fabisch
+* 62 Mathieu Blondel
+* 60 Eustache Diemert
+* 60 Nelle Varoquaux
+* 49 Michael Bommarito
+* 45 Manoj-Kumar-S
+* 28 Kyle Kastner
+* 26 Andreas Mueller
+* 22 Noel Dawe
+* 21 Maheshakya Wijewardena
+* 21 Brooke Osborn
+* 21 Hamzeh Alsalhi
+* 21 Jake VanderPlas
+* 21 Philippe Gervais
+* 19 Bala Subrahmanyam Varanasi
+* 12 Ronald Phlypo
+* 10 Mikhail Korobov
+* 8 Thomas Unterthiner
+* 8 Jeffrey Blackburne
+* 8 eltermann
+* 8 bwignall
+* 7 Ankit Agrawal
+* 7 CJ Carey
+* 6 Daniel Nouri
+* 6 Chen Liu
+* 6 Michael Eickenberg
+* 6 ugurthemaster
+* 5 Aaron Schumacher
+* 5 Baptiste Lagarde
+* 5 Rajat Khanduja
+* 5 Robert McGibbon
+* 5 Sergio Pascual
+* 4 Alexis Metaireau
+* 4 Ignacio Rossi
+* 4 Virgile Fritsch
+* 4 Sebastian Säger
+* 4 Ilambharathi Kanniah
+* 4 sdenton4
+* 4 Robert Layton
+* 4 Alyssa
+* 4 Amos Waterland
+* 3 Andrew Tulloch
+* 3 murad
+* 3 Steven Maude
+* 3 Karol Pysniak
+* 3 Jacques Kvam
+* 3 cgohlke
+* 3 cjlin
+* 3 Michael Becker
+* 3 hamzeh
+* 3 Eric Jacobsen
+* 3 john collins
+* 3 kaushik94
+* 3 Erwin Marsi
+* 2 csytracy
+* 2 LK
+* 2 Vlad Niculae
+* 2 Laurent Direr
+* 2 Erik Shilts
+* 2 Raul Garreta
+* 2 Yoshiki Vázquez Baeza
+* 2 Yung Siang Liau
+* 2 abhishek thakur
+* 2 James Yu
+* 2 Rohit Sivaprasad
+* 2 Roland Szabo
+* 2 amormachine
+* 2 Alexis Mignon
+* 2 Oscar Carlsson
+* 2 Nantas Nardelli
+* 2 jess010
+* 2 kowalski87
+* 2 Andrew Clegg
+* 2 Federico Vaggi
+* 2 Simon Frid
+* 2 Félix-Antoine Fortin
+* 1 Ralf Gommers
+* 1 t-aft
+* 1 Ronan Amicel
+* 1 Rupesh Kumar Srivastava
+* 1 Ryan Wang
+* 1 Samuel Charron
+* 1 Samuel St-Jean
+* 1 Fabian Pedregosa
+* 1 Skipper Seabold
+* 1 Stefan Walk
+* 1 Stefan van der Walt
+* 1 Stephan Hoyer
+* 1 Allen Riddell
+* 1 Valentin Haenel
+* 1 Vijay Ramesh
+* 1 Will Myers
+* 1 Yaroslav Halchenko
+* 1 Yoni Ben-Meshulam
+* 1 Yury V. Zaytsev
+* 1 adrinjalali
+* 1 ai8rahim
+* 1 alemagnani
+* 1 alex
+* 1 benjamin wilson
+* 1 chalmerlowe
+* 1 dzikie drożdże
+* 1 jamestwebber
+* 1 matrixorz
+* 1 popo
+* 1 samuela
+* 1 François Boulogne
+* 1 Alexander Measure
+* 1 Ethan White
+* 1 Guilherme Trein
+* 1 Hendrik Heuer
+* 1 IvicaJovic
+* 1 Jan Hendrik Metzen
+* 1 Jean Michel Rouly
+* 1 Eduardo Ariño de la Rubia
+* 1 Jelle Zijlstra
+* 1 Eddy L O Jansson
+* 1 Denis
+* 1 John
+* 1 John Schmidt
+* 1 Jorge Cañardo Alastuey
+* 1 Joseph Perla
+* 1 Joshua Vredevoogd
+* 1 José Ricardo
+* 1 Julien Miotte
+* 1 Kemal Eren
+* 1 Kenta Sato
+* 1 David Cournapeau
+* 1 Kyle Kelley
+* 1 Daniele Medri
+* 1 Laurent Luce
+* 1 Laurent Pierron
+* 1 Luis Pedro Coelho
+* 1 DanielWeitzenfeld
+* 1 Craig Thompson
+* 1 Chyi-Kwei Yau
+* 1 Matthew Brett
+* 1 Matthias Feurer
+* 1 Max Linke
+* 1 Chris Filo Gorgolewski
+* 1 Charles Earl
+* 1 Michael Hanke
+* 1 Michele Orrù
+* 1 Bryan Lunt
+* 1 Brian Kearns
+* 1 Paul Butler
+* 1 Paweł Mandera
+* 1 Peter
+* 1 Andrew Ash
+* 1 Pietro Zambelli
+* 1 staubda
+
diff --git a/doc/whats_new/v0.16.rst b/doc/whats_new/v0.16.rst
new file mode 100644
index 0000000000000..33d8cc47e939a
--- /dev/null
+++ b/doc/whats_new/v0.16.rst
@@ -0,0 +1,541 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_16_1:
+
+Version 0.16.1
+===============
+
+**April 14, 2015**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Allow input data larger than ``block_size`` in
+ :class:`covariance.LedoitWolf` by `Andreas Müller`_.
+
+- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
+ caused unstable result in :class:`calibration.CalibratedClassifierCV` by
+ `Jan Hendrik Metzen`_.
+
+- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
+
+- Fix several stability and convergence issues in
+ :class:`cross_decomposition.CCA` and
+ :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
+
+- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
+ on fortran-ordered data.
+
+- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
+ and ``predict_proba`` by `Andreas Müller`_.
+
+- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
+
+.. _changes_0_16:
+
+Version 0.16
+============
+
+**March 26, 2015**
+
+Highlights
+-----------
+
+- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
+ requirements, bug-fixes and better default settings.
+
+- Multinomial Logistic regression and a path algorithm in
+ :class:`linear_model.LogisticRegressionCV`.
+
+- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
+
+- Probability callibration of classifiers using
+ :class:`calibration.CalibratedClassifierCV`.
+
+- :class:`cluster.Birch` clustering method for large-scale datasets.
+
+- Scalable approximate nearest neighbors search with Locality-sensitive
+ hashing forests in :class:`neighbors.LSHForest`.
+
+- Improved error messages and better validation when using malformed input data.
+
+- More robust integration with pandas dataframes.
+
+Changelog
+---------
+
+New features
+............
+
+- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
+ for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena`.
+
+- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
+ of Support Vector Regression which is much faster for large
+ sample sizes than :class:`svm.SVR` with linear kernel. By
+ `Fabian Pedregosa`_ and Qiang Luo.
+
+- Incremental fit for :class:`GaussianNB `.
+
+- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
+ :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
+
+- Added the :func:`metrics.label_ranking_average_precision_score` metrics.
+ By `Arnaud Joly`_.
+
+- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
+
+- Added :class:`linear_model.LogisticRegressionCV`. By
+ `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
+ and `Alexandre Gramfort`_.
+
+- Added ``warm_start`` constructor parameter to make it possible for any
+ trained forest model to grow additional trees incrementally. By
+ :user:`Laurent Direr`.
+
+- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
+ :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
+
+- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
+ algorithm that supports out-of-core learning with a ``partial_fit``
+ method. By `Kyle Kastner`_.
+
+- Averaged SGD for :class:`SGDClassifier `
+ and :class:`SGDRegressor ` By
+ :user:`Danny Sullivan `.
+
+- Added :func:`cross_val_predict `
+ function which computes cross-validated estimates. By `Luis Pedro Coelho`_
+
+- Added :class:`linear_model.TheilSenRegressor`, a robust
+ generalized-median-based estimator. By :user:`Florian Wilhelm `.
+
+- Added :func:`metrics.median_absolute_error`, a robust metric.
+ By `Gael Varoquaux`_ and :user:`Florian Wilhelm `.
+
+- Add :class:`cluster.Birch`, an online clustering algorithm. By
+ `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
+
+- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+ using two new solvers. By :user:`Clemens Brunner ` and `Martin Billinger`_.
+
+- Added :class:`kernel_ridge.KernelRidge`, an implementation of
+ kernelized ridge regression.
+ By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
+
+- All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
+ By `Mathieu Blondel`_.
+
+- Added :class:`cross_validation.PredefinedSplit` cross-validation
+ for fixed user-provided cross-validation folds.
+ By :user:`Thomas Unterthiner `.
+
+- Added :class:`calibration.CalibratedClassifierCV`, an approach for
+ calibrating the predicted probabilities of a classifier.
+ By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
+ and :user:`Balazs Kegl `.
+
+
+Enhancements
+............
+
+- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
+ to return distances between nodes for both structured and unstructured
+ versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
+ The same option was added in :func:`hierarchical.linkage_tree`.
+ By `Manoj Kumar`_
+
+- Add support for sample weights in scorer objects. Metrics with sample
+ weight support will automatically benefit from it. By `Noel Dawe`_ and
+ `Vlad Niculae`_.
+
+- Added ``newton-cg`` and `lbfgs` solver support in
+ :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
+
+- Add ``selection="random"`` parameter to implement stochastic coordinate
+ descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
+ and related. By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to
+ :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
+ By :user:`Jatin Shah `.
+
+- Support sparse multilabel indicator representation in
+ :class:`preprocessing.LabelBinarizer` and
+ :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi ` with thanks
+ to Rohit Sivaprasad), as well as evaluation metrics (by
+ `Joel Nothman`_).
+
+- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
+ By `Jatin Shah`.
+
+- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
+ as optional parameter. By `Saurabh Jha`.
+
+- Add ``sample_weight`` parameter to `metrics.hinge_loss`.
+ By `Saurabh Jha`.
+
+- Add ``multi_class="multinomial"`` option in
+ :class:`linear_model.LogisticRegression` to implement a Logistic
+ Regression solver that minimizes the cross-entropy or multinomial loss
+ instead of the default One-vs-Rest setting. Supports `lbfgs` and
+ `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
+ `newton-cg` by Simon Wu.
+
+- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
+ single pass, when giving the option ``sort=False``. By :user:`Dan
+ Blanchard `.
+
+- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
+ configured to work with estimators that may fail and raise errors on
+ individual folds. This option is controlled by the `error_score`
+ parameter. This does not affect errors raised on re-fit. By
+ :user:`Michal Romaniuk `.
+
+- Add ``digits`` parameter to `metrics.classification_report` to allow
+ report to show different precision of floating point numbers. By
+ :user:`Ian Gilmore `.
+
+- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
+ By :user:`Aaron Staple `.
+
+- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
+ handle unknown categorical features more gracefully during transform.
+ By `Manoj Kumar`_.
+
+- Added support for sparse input data to decision trees and their ensembles.
+ By `Fares Hedyati`_ and `Arnaud Joly`_.
+
+- Optimized :class:`cluster.AffinityPropagation` by reducing the number of
+ memory allocations of large temporary data-structures. By `Antony Lee`_.
+
+- Parellization of the computation of feature importances in random forest.
+ By `Olivier Grisel`_ and `Arnaud Joly`_.
+
+- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
+ in their constructor. By `Manoj Kumar`_.
+
+- Added decision function for :class:`multiclass.OneVsOneClassifier`
+ By `Raghav RV`_ and :user:`Kyle Beauchamp `.
+
+- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
+ support non-Euclidean metrics. By `Manoj Kumar`_
+
+- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
+ and family now accept callables that return a connectivity matrix.
+ By `Manoj Kumar`_.
+
+- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
+
+- :class:`cluster.DBSCAN` now supports sparse input and sample weights and
+ has been optimized: the inner loop has been rewritten in Cython and
+ radius neighbors queries are now computed in batch. By `Joel Nothman`_
+ and `Lars Buitinck`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+ frequency for :class:`ensemble.RandomForestClassifier`,
+ :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
+ and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
+
+- :class:`grid_search.RandomizedSearchCV` now does sampling without
+ replacement if all parameters are given as lists. By `Andreas Müller`_.
+
+- Parallelized calculation of :func:`pairwise_distances` is now supported
+ for scipy metrics and custom callables. By `Joel Nothman`_.
+
+- Allow the fitting and scoring of all clustering algorithms in
+ :class:`pipeline.Pipeline`. By `Andreas Müller`_.
+
+- More robust seeding and improved error messages in :class:`cluster.MeanShift`
+ by `Andreas Müller`_.
+
+- Make the stopping criterion for :class:`mixture.GMM`,
+ :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
+ number of samples by thresholding the average log-likelihood change
+ instead of its sum over all samples. By `Hervé Bredin`_.
+
+- The outcome of :func:`manifold.spectral_embedding` was made deterministic
+ by flipping the sign of eigenvectors. By :user:`Hasil Sharma `.
+
+- Significant performance and memory usage improvements in
+ :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
+
+- Numerical stability improvements for :class:`preprocessing.StandardScaler`
+ and :func:`preprocessing.scale`. By `Nicolas Goix`_
+
+- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
+ By `Rob Zinkov`_ and `Andreas Müller`_.
+
+- :func:`cross_validation.train_test_split` now preserves the input type,
+ instead of converting to numpy arrays.
+
+
+Documentation improvements
+..........................
+
+- Added example of using :class:`FeatureUnion` for heterogeneous input.
+ By :user:`Matt Terry `
+
+- Documentation on scorers was improved, to highlight the handling of loss
+ functions. By :user:`Matt Pico `.
+
+- A discrepancy between liblinear output and scikit-learn's wrappers
+ is now noted. By `Manoj Kumar`_.
+
+- Improved documentation generation: examples referring to a class or
+ function are now shown in a gallery on the class/function's API reference
+ page. By `Joel Nothman`_.
+
+- More explicit documentation of sample generators and of data
+ transformation. By `Joel Nothman`_.
+
+- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
+ used to point to empty pages stating that they are aliases of BinaryTree.
+ This has been fixed to show the correct class docs. By `Manoj Kumar`_.
+
+- Added silhouette plots for analysis of KMeans clustering using
+ :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
+ See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
+
+Bug fixes
+.........
+- Metaestimators now support ducktyping for the presence of ``decision_function``,
+ ``predict_proba`` and other methods. This fixes behavior of
+ :class:`grid_search.GridSearchCV`,
+ :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
+ :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
+ By `Joel Nothman`_
+
+- The ``scoring`` attribute of grid-search and cross-validation methods is no longer
+ ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
+ the base estimator doesn't have predict.
+
+- The function :func:`hierarchical.ward_tree` now returns the children in
+ the same order for both the structured and unstructured versions. By
+ `Matteo Visconti di Oleggio Castello`_.
+
+- :class:`feature_selection.RFECV` now correctly handles cases when
+ ``step`` is not equal to 1. By :user:`Nikolay Mayorov `
+
+- The :class:`decomposition.PCA` now undoes whitening in its
+ ``inverse_transform``. Also, its ``components_`` now always have unit
+ length. By :user:`Michael Eickenberg `.
+
+- Fix incomplete download of the dataset when
+ :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
+
+- Various fixes to the Gaussian processes subpackage by Vincent Dubourg
+ and Jan Hendrik Metzen.
+
+- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
+ appropriate error message and suggests a work around.
+ By :user:`Danny Sullivan `.
+
+- :class:`RBFSampler ` with ``gamma=g``
+ formerly approximated :func:`rbf_kernel `
+ with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
+ which may substantially change your results if you use a fixed value.
+ (If you cross-validated over ``gamma``, it probably doesn't matter
+ too much.) By :user:`Dougal Sutherland `.
+
+- Pipeline object delegate the ``classes_`` attribute to the underlying
+ estimator. It allows, for instance, to make bagging of a pipeline object.
+ By `Arnaud Joly`_
+
+- :class:`neighbors.NearestCentroid` now uses the median as the centroid
+ when metric is set to ``manhattan``. It was using the mean before.
+ By `Manoj Kumar`_
+
+- Fix numerical stability issues in :class:`linear_model.SGDClassifier`
+ and :class:`linear_model.SGDRegressor` by clipping large gradients and
+ ensuring that weight decay rescaling is always positive (for large
+ l2 regularization and large learning rate values).
+ By `Olivier Grisel`_
+
+- When `compute_full_tree` is set to "auto", the full tree is
+ built when n_clusters is high and is early stopped when n_clusters is
+ low, while the behavior should be vice-versa in
+ :class:`cluster.AgglomerativeClustering` (and friends).
+ This has been fixed By `Manoj Kumar`_
+
+- Fix lazy centering of data in :func:`linear_model.enet_path` and
+ :func:`linear_model.lasso_path`. It was centered around one. It has
+ been changed to be centered around the origin. By `Manoj Kumar`_
+
+- Fix handling of precomputed affinity matrices in
+ :class:`cluster.AgglomerativeClustering` when using connectivity
+ constraints. By :user:`Cathy Deng `
+
+- Correct ``partial_fit`` handling of ``class_prior`` for
+ :class:`sklearn.naive_bayes.MultinomialNB` and
+ :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
+
+- Fixed a crash in :func:`metrics.precision_recall_fscore_support`
+ when using unsorted ``labels`` in the multi-label setting.
+ By `Andreas Müller`_.
+
+- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
+ ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+ :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
+ data is not the same as fit data. By `Manoj Kumar`_.
+
+- Fix log-density calculation in the :class:`mixture.GMM` with
+ tied covariance. By `Will Dawson`_
+
+- Fixed a scaling error in :class:`feature_selection.SelectFdr`
+ where a factor ``n_features`` was missing. By `Andrew Tulloch`_
+
+- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
+ classes when using distance weighting and having identical data points.
+ By `Garret-R `_.
+
+- Fixed round off errors with non positive-definite covariance matrices
+ in GMM. By :user:`Alexis Mignon `.
+
+- Fixed a error in the computation of conditional probabilities in
+ :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
+
+- Make the method ``radius_neighbors`` of
+ :class:`neighbors.NearestNeighbors` return the samples lying on the
+ boundary for ``algorithm='brute'``. By `Yan Yi`_.
+
+- Flip sign of ``dual_coef_`` of :class:`svm.SVC`
+ to make it consistent with the documentation and
+ ``decision_function``. By Artem Sobolev.
+
+- Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
+ We now use the weighted average of targets (secondary method). By
+ `Andreas Müller`_ and `Michael Bommarito `_.
+
+API changes summary
+-------------------
+
+- :class:`GridSearchCV ` and
+ :func:`cross_val_score ` and other
+ meta-estimators don't convert pandas DataFrames into arrays any more,
+ allowing DataFrame specific operations in custom estimators.
+
+- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
+ :func:`predict_proba_ovr`,
+ :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
+ :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
+ are deprecated. Use the underlying estimators instead.
+
+- Nearest neighbors estimators used to take arbitrary keyword arguments
+ and pass these to their distance metric. This will no longer be supported
+ in scikit-learn 0.18; use the ``metric_params`` argument instead.
+
+- `n_jobs` parameter of the fit method shifted to the constructor of the
+ LinearRegression class.
+
+- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
+ now returns two probabilities per sample in the multiclass case; this
+ is consistent with other estimators and with the method's documentation,
+ but previous versions accidentally returned only the positive
+ probability. Fixed by Will Lamond and `Lars Buitinck`_.
+
+- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
+ to False. Setting precompute to "auto" was found to be slower when
+ n_samples > n_features since the computation of the Gram matrix is
+ computationally expensive and outweighs the benefit of fitting the Gram
+ for just one alpha.
+ ``precompute="auto"`` is now deprecated and will be removed in 0.18
+ By `Manoj Kumar`_.
+
+- Expose ``positive`` option in :func:`linear_model.enet_path` and
+ :func:`linear_model.enet_path` which constrains coefficients to be
+ positive. By `Manoj Kumar`_.
+
+- Users should now supply an explicit ``average`` parameter to
+ :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
+ :func:`sklearn.metrics.recall_score` and
+ :func:`sklearn.metrics.precision_score` when performing multiclass
+ or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
+
+- `scoring` parameter for cross validation now accepts `'f1_micro'`,
+ `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
+ only. Similar changes apply to `'precision'` and `'recall'`.
+ By `Joel Nothman`_.
+
+- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
+ :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
+ been removed. They were deprecated since 0.14
+
+- From now onwards, all estimators will uniformly raise ``NotFittedError``
+ (:class:`utils.validation.NotFittedError`), when any of the ``predict``
+ like methods are called before the model is fit. By `Raghav RV`_.
+
+- Input data validation was refactored for more consistent input
+ validation. The ``check_arrays`` function was replaced by ``check_array``
+ and ``check_X_y``. By `Andreas Müller`_.
+
+- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
+ ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+ :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
+ then for every sample this avoids setting the sample itself as the
+ first nearest neighbor. By `Manoj Kumar`_.
+
+- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
+ and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
+ set by the user. If set to True, then the sample itself is considered
+ as the first nearest neighbor.
+
+- `thresh` parameter is deprecated in favor of new `tol` parameter in
+ :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
+ section for details. By `Hervé Bredin`_.
+
+- Estimators will treat input with dtype object as numeric when possible.
+ By `Andreas Müller`_
+
+- Estimators now raise `ValueError` consistently when fitted on empty
+ data (less than 1 sample or less than 1 feature for 2D input).
+ By `Olivier Grisel`_.
+
+
+- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
+ :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
+ :class:`linear_model.PassiveAgressiveClassifier` and
+ :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
+
+- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
+ `random_state` parameter is deprecated. By :user:`Erich Schubert `.
+
+Code Contributors
+-----------------
+A. Flaxman, Aaron Schumacher, Aaron Staple, abhishek thakur, Akshay, akshayah3,
+Aldrian Obaja, Alexander Fabisch, Alexandre Gramfort, Alexis Mignon, Anders
+Aagaard, Andreas Mueller, Andreas van Cranenburgh, Andrew Tulloch, Andrew
+Walker, Antony Lee, Arnaud Joly, banilo, Barmaley.exe, Ben Davies, Benedikt
+Koehler, bhsu, Boris Feld, Borja Ayerdi, Boyuan Deng, Brent Pedersen, Brian
+Wignall, Brooke Osborn, Calvin Giles, Cathy Deng, Celeo, cgohlke, chebee7i,
+Christian Stade-Schuldt, Christof Angermueller, Chyi-Kwei Yau, CJ Carey,
+Clemens Brunner, Daiki Aminaka, Dan Blanchard, danfrankj, Danny Sullivan, David
+Fletcher, Dmitrijs Milajevs, Dougal J. Sutherland, Erich Schubert, Fabian
+Pedregosa, Florian Wilhelm, floydsoft, Félix-Antoine Fortin, Gael Varoquaux,
+Garrett-R, Gilles Louppe, gpassino, gwulfs, Hampus Bengtsson, Hamzeh Alsalhi,
+Hanna Wallach, Harry Mavroforakis, Hasil Sharma, Helder, Herve Bredin,
+Hsiang-Fu Yu, Hugues SALAMIN, Ian Gilmore, Ilambharathi Kanniah, Imran Haque,
+isms, Jake VanderPlas, Jan Dlabal, Jan Hendrik Metzen, Jatin Shah, Javier López
+Peña, jdcaballero, Jean Kossaifi, Jeff Hammerbacher, Joel Nothman, Jonathan
+Helmus, Joseph, Kaicheng Zhang, Kevin Markham, Kyle Beauchamp, Kyle Kastner,
+Lagacherie Matthieu, Lars Buitinck, Laurent Direr, leepei, Loic Esteve, Luis
+Pedro Coelho, Lukas Michelbacher, maheshakya, Manoj Kumar, Manuel, Mario
+Michael Krell, Martin, Martin Billinger, Martin Ku, Mateusz Susik, Mathieu
+Blondel, Matt Pico, Matt Terry, Matteo Visconti dOC, Matti Lyra, Max Linke,
+Mehdi Cherti, Michael Bommarito, Michael Eickenberg, Michal Romaniuk, MLG,
+mr.Shu, Nelle Varoquaux, Nicola Montecchio, Nicolas, Nikolay Mayorov, Noel
+Dawe, Okal Billy, Olivier Grisel, Óscar Nájera, Paolo Puggioni, Peter
+Prettenhofer, Pratap Vardhan, pvnguyen, queqichao, Rafael Carrascosa, Raghav R
+V, Rahiel Kasim, Randall Mason, Rob Zinkov, Robert Bradshaw, Saket Choudhary,
+Sam Nicholls, Samuel Charron, Saurabh Jha, sethdandridge, sinhrks, snuderl,
+Stefan Otte, Stefan van der Walt, Steve Tjoa, swu, Sylvain Zimmer, tejesh95,
+terrycojones, Thomas Delteil, Thomas Unterthiner, Tomas Kazmar, trevorstephens,
+tttthomasssss, Tzu-Ming Kuo, ugurcaliskan, ugurthemaster, Vinayak Mehta,
+Vincent Dubourg, Vjacheslav Murashkin, Vlad Niculae, wadawson, Wei Xue, Will
+Lamond, Wu Jiang, x0l, Xinfan Meng, Yan Yi, Yu-Chin
+
diff --git a/doc/whats_new/v0.17.rst b/doc/whats_new/v0.17.rst
new file mode 100644
index 0000000000000..35e895e5d4188
--- /dev/null
+++ b/doc/whats_new/v0.17.rst
@@ -0,0 +1,511 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_17_1:
+
+Version 0.17.1
+==============
+
+**February 18, 2016**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+
+- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
+ ``joblib.Parallel`` that can silently yield to wrong results when working
+ on datasets larger than 1MB:
+ https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
+
+- Fixed reading of Bunch pickles generated with scikit-learn
+ version <= 0.16. This can affect users who have already
+ downloaded a dataset with scikit-learn 0.16 and are loading it
+ with scikit-learn 0.17. See :issue:`6196` for
+ how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
+ Esteve`_.
+
+- Fixed a bug that prevented using ROC AUC score to perform grid search on
+ several CPU / cores on large arrays. See :issue:`6147`
+ By `Olivier Grisel`_.
+
+- Fixed a bug that prevented to properly set the ``presort`` parameter
+ in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
+ By Andrew McCulloh.
+
+- Fixed a joblib error when evaluating the perplexity of a
+ :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
+ By Chyi-Kwei Yau.
+
+
+.. _changes_0_17:
+
+Version 0.17
+============
+
+**November 5, 2015**
+
+Changelog
+---------
+
+New features
+............
+
+- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
+ calling `partial_fit`. By :user:`Giorgio Patrini `.
+
+- The new class :class:`ensemble.VotingClassifier` implements a
+ "majority rule" / "soft voting" ensemble classifier to combine
+ estimators for classification. By `Sebastian Raschka`_.
+
+- The new class :class:`preprocessing.RobustScaler` provides an
+ alternative to :class:`preprocessing.StandardScaler` for feature-wise
+ centering and range normalization that is robust to outliers.
+ By :user:`Thomas Unterthiner `.
+
+- The new class :class:`preprocessing.MaxAbsScaler` provides an
+ alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
+ range normalization when the data is already centered or sparse.
+ By :user:`Thomas Unterthiner `.
+
+- The new class :class:`preprocessing.FunctionTransformer` turns a Python
+ function into a ``Pipeline``-compatible transformer object.
+ By Joe Jevnik.
+
+- The new classes :class:`cross_validation.LabelKFold` and
+ :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
+ respectively similar to :class:`cross_validation.KFold` and
+ :class:`cross_validation.ShuffleSplit`, except that the folds are
+ conditioned on a label array. By `Brian McFee`_, :user:`Jean
+ Kossaifi ` and `Gilles Louppe`_.
+
+- :class:`decomposition.LatentDirichletAllocation` implements the Latent
+ Dirichlet Allocation topic model with online variational
+ inference. By :user:`Chyi-Kwei Yau `, with code based on an implementation
+ by Matt Hoffman. (:issue:`3659`)
+
+- The new solver ``sag`` implements a Stochastic Average Gradient descent
+ and is available in both :class:`linear_model.LogisticRegression` and
+ :class:`linear_model.Ridge`. This solver is very efficient for large
+ datasets. By :user:`Danny Sullivan ` and `Tom Dupre la Tour`_.
+ (:issue:`4738`)
+
+- The new solver ``cd`` implements a Coordinate Descent in
+ :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
+ still available setting new parameter ``solver`` to ``pg``, but is
+ deprecated and will be removed in 0.19, along with
+ :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
+ ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
+ ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
+ shuffling step in the ``cd`` solver.
+ By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
+
+Enhancements
+............
+- :class:`manifold.TSNE` now supports approximate optimization via the
+ Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
+ (:issue:`4025`)
+
+- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
+ as implemented in the ``mean_shift`` function. By :user:`Martino
+ Sorbaro `.
+
+- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
+ By `Jan Hendrik Metzen`_.
+
+- :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
+ By `Arnaud Joly`_.
+
+- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
+ By :user:`Cory Lorenz `.
+
+- Added the :func:`metrics.label_ranking_loss` metric.
+ By `Arnaud Joly`_.
+
+- Added the :func:`metrics.cohen_kappa_score` metric.
+
+- Added a ``warm_start`` constructor parameter to the bagging ensemble
+ models to increase the size of the ensemble. By :user:`Tim Head `.
+
+- Added option to use multi-output regression metrics without averaging.
+ By Konstantin Shmelkov and :user:`Michael Eickenberg`.
+
+- Added ``stratify`` option to :func:`cross_validation.train_test_split`
+ for stratified splitting. By Miroslav Batchkarov.
+
+- The :func:`tree.export_graphviz` function now supports aesthetic
+ improvements for :class:`tree.DecisionTreeClassifier` and
+ :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
+ by their majority class or impurity, showing variable names, and using
+ node proportions instead of raw sample counts. By `Trevor Stephens`_.
+
+- Improved speed of ``newton-cg`` solver in
+ :class:`linear_model.LogisticRegression`, by avoiding loss computation.
+ By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
+
+- The ``class_weight="auto"`` heuristic in classifiers supporting
+ ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
+ option, which has a simpler formula and interpretation.
+ By `Hanna Wallach`_ and `Andreas Müller`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+ frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
+ `Trevor Stephens`_.
+
+- Added backlinks from the API reference pages to the user guide. By
+ `Andreas Müller`_.
+
+- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
+ :func:`sklearn.metrics.fbeta_score`,
+ :func:`sklearn.metrics.recall_score` and
+ :func:`sklearn.metrics.precision_score` has been extended.
+ It is now possible to ignore one or more labels, such as where
+ a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
+
+- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
+ By `Trevor Stephens`_.
+
+- Provide an option for sparse output from
+ :func:`sklearn.metrics.pairwise.cosine_similarity`. By
+ :user:`Jaidev Deshpande `.
+
+- Add :func:`minmax_scale` to provide a function interface for
+ :class:`MinMaxScaler`. By :user:`Thomas Unterthiner `.
+
+- ``dump_svmlight_file`` now handles multi-label datasets.
+ By Chih-Wei Chang.
+
+- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
+ By `Tom Dupre la Tour`_.
+
+- The "Wisconsin Breast Cancer" classical two-class classification dataset
+ is now included in scikit-learn, available with
+ :func:`sklearn.dataset.load_breast_cancer`.
+
+- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
+ short tasks. This makes it possible for scikit-learn to benefit from
+ parallelism when many very short tasks are executed in parallel, for
+ instance by the :class:`grid_search.GridSearchCV` meta-estimator
+ with ``n_jobs > 1`` used with a large grid of parameters on a small
+ dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
+
+- For more details about changes in joblib 0.9.3 see the release notes:
+ https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
+
+- Improved speed (3 times per iteration) of
+ :class:`decomposition.DictLearning` with coordinate descent method
+ from :class:`linear_model.Lasso`. By :user:`Arthur Mensch `.
+
+- Parallel processing (threaded) for queries of nearest neighbors
+ (using the ball-tree) by Nikolay Mayorov.
+
+- Allow :func:`datasets.make_multilabel_classification` to output
+ a sparse ``y``. By Kashif Rasul.
+
+- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
+ distances, allowing memory-efficient distance precomputation. By
+ `Joel Nothman`_.
+
+- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
+ for retrieving the leaf indices samples are predicted as. By
+ :user:`Daniel Galvez ` and `Gilles Louppe`_.
+
+- Speed up decision tree regressors, random forest regressors, extra trees
+ regressors and gradient boosting estimators by computing a proxy
+ of the impurity improvement during the tree growth. The proxy quantity is
+ such that the split that maximizes this value also maximizes the impurity
+ improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber `
+ and `Gilles Louppe`_.
+
+- Speed up tree based methods by reducing the number of computations needed
+ when computing the impurity measure taking into account linear
+ relationship of the computed statistics. The effect is particularly
+ visible with extra trees and on datasets with categorical or sparse
+ features. By `Arnaud Joly`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+ :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
+ method for retrieving the leaf indices each sample ends up in under
+ each try. By :user:`Jacob Schreiber `.
+
+- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
+ By Sonny Hu. (:issue:`#4881`)
+
+- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
+ the stopping criterion. By Santi Villalba. (:issue:`5186`)
+
+- Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
+ , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
+
+- Added optional parameter ``warm_start`` in
+ :class:`linear_model.LogisticRegression`. If set to True, the solvers
+ ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
+ coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
+
+- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
+ the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
+ Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
+
+- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
+ and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
+ the same. This allows gradient boosters to turn off presorting when building
+ deep trees or using sparse data. By :user:`Jacob Schreiber `.
+
+- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
+ default. By :user:`Graham Clenaghan `.
+
+- Added :class:`feature_selection.SelectFromModel` meta-transformer which can
+ be used along with estimators that have `coef_` or `feature_importances_`
+ attribute to select important features of the input data. By
+ :user:`Maheshakya Wijewardena `, `Joel Nothman`_ and `Manoj Kumar`_.
+
+- Added :func:`metrics.pairwise.laplacian_kernel`. By `Clyde Fare `_.
+
+- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
+ for the Elastic-Net subproblem via the ``enet_tol`` parameter.
+
+- Improved verbosity in :class:`decomposition.DictionaryLearning`.
+
+- :class:`ensemble.RandomForestClassifier` and
+ :class:`ensemble.RandomForestRegressor` no longer explicitly store the
+ samples used in bagging, resulting in a much reduced memory footprint for
+ storing random forest models.
+
+- Added ``positive`` option to :class:`linear_model.Lars` and
+ :func:`linear_model.lars_path` to force coefficients to be positive.
+ (:issue:`5131`)
+
+- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
+ to provide precomputed squared norms for ``X``.
+
+- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
+
+- Added the :func:`preprocessing.min_max_scale` function.
+
+Bug fixes
+.........
+
+- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
+ multi-label output. By `Andreas Müller`_.
+
+- Fixed the output shape of :class:`linear_model.RANSACRegressor` to
+ ``(n_samples, )``. By `Andreas Müller`_.
+
+- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
+ `Andreas Müller`_.
+
+- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
+ lot of memory for large discrete grids. By `Joel Nothman`_.
+
+- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
+ in the final fit. By `Manoj Kumar`_.
+
+- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
+ oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan `.
+
+- All regressors now consistently handle and warn when given ``y`` that is of
+ shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
+ (:issue:`5431`)
+
+- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
+ `Lars Buitinck`_.
+
+- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
+ matrices when using shrinkage. By `Martin Billinger`_.
+
+- Fixed :func:`cross_validation.cross_val_predict` for estimators with
+ sparse predictions. By Buddha Prakash.
+
+- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
+ to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
+ (:issue:`5182`)
+
+- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
+ when called with ``average=True``. By :user:`Andrew Lamb `.
+ (:issue:`5282`)
+
+- Dataset fetchers use different filenames under Python 2 and Python 3 to
+ avoid pickling compatibility issues. By `Olivier Grisel`_.
+ (:issue:`5355`)
+
+- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
+ results to depend on scale. By `Jake Vanderplas`_.
+
+- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
+ when fitting the intercept in the case of sparse data. The fix
+ automatically changes the solver to 'sag' in this case.
+ :issue:`5360` by `Tom Dupre la Tour`_.
+
+- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
+ with a large number of features and fewer samples. (:issue:`4478`)
+ By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini `.
+
+- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+ platform dependent output, and failed on `fit_transform`.
+ By :user:`Arthur Mensch `.
+
+- Fixes to the ``Bunch`` class used to store datasets.
+
+- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
+ ``percentiles`` parameter.
+
+- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
+ leads to inconsistent results when pickling.
+
+- Fixed the conditions on when a precomputed Gram matrix needs to
+ be recomputed in :class:`linear_model.LinearRegression`,
+ :class:`linear_model.OrthogonalMatchingPursuit`,
+ :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
+
+- Fixed inconsistent memory layout in the coordinate descent solver
+ that affected :class:`linear_model.DictionaryLearning` and
+ :class:`covariance.GraphLasso`. (:issue:`5337`)
+ By `Olivier Grisel`_.
+
+- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
+ parameter.
+
+- Nearest Neighbor estimators with custom distance metrics can now be pickled.
+ (:issue:`4362`)
+
+- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
+ were not properly handled when performing grid-searches.
+
+- Fixed a bug in :class:`linear_model.LogisticRegression` and
+ :class:`linear_model.LogisticRegressionCV` when using
+ ``class_weight='balanced'```or ``class_weight='auto'``.
+ By `Tom Dupre la Tour`_.
+
+- Fixed bug :issue:`5495` when
+ doing OVR(SVC(decision_function_shape="ovr")). Fixed by
+ :user:`Elvis Dohmatob `.
+
+
+API changes summary
+-------------------
+- Attribute `data_min`, `data_max` and `data_range` in
+ :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
+ from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
+ and `data_range_`. By :user:`Giorgio Patrini `.
+
+- All Scaler classes now have an `scale_` attribute, the feature-wise
+ rescaling applied by their `transform` methods. The old attribute `std_`
+ in :class:`preprocessing.StandardScaler` is deprecated and superseded
+ by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini `.
+
+- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
+ parameter to make their decision function of shape ``(n_samples, n_classes)``
+ by setting ``decision_function_shape='ovr'``. This will be the default behavior
+ starting in 0.19. By `Andreas Müller`_.
+
+- Passing 1D data arrays as input to estimators is now deprecated as it
+ caused confusion in how the array elements should be interpreted
+ as features or as samples. All data arrays are now expected
+ to be explicitly shaped ``(n_samples, n_features)``.
+ By :user:`Vighnesh Birodkar `.
+
+- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
+ :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
+ :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- The ``store_covariance`` and ``tol`` parameters have been moved from
+ the fit method to the constructor in
+ :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
+ ``store_covariances`` and ``tol`` parameters have been moved from the
+ fit method to the constructor in
+ :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- Models inheriting from ``_LearntSelectorMixin`` will no longer support the
+ transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression,
+ DecisionTrees, SVMs and SGD related models). Wrap these models around the
+ metatransfomer :class:`feature_selection.SelectFromModel` to remove
+ features (according to `coefs_` or `feature_importances_`)
+ which are below a certain threshold value instead.
+
+- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
+ to ensure consistency of ``predict(X)`` and ``labels_``. By
+ :user:`Vighnesh Birodkar `.
+
+- Classifier and Regressor models are now tagged as such using the
+ ``_estimator_type`` attribute.
+
+- Cross-validation iterators always provide indices into training and test set,
+ not boolean masks.
+
+- The ``decision_function`` on all regressors was deprecated and will be
+ removed in 0.19. Use ``predict`` instead.
+
+- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
+ Use :func:`datasets.fetch_lfw_pairs` instead.
+
+- The deprecated ``hmm`` module was removed.
+
+- The deprecated ``Bootstrap`` cross-validation iterator was removed.
+
+- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
+ Use :class:`clustering.AgglomerativeClustering` instead.
+
+- :func:`cross_validation.check_cv` is now a public function.
+
+- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
+ and will be removed in 0.19.
+
+- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
+ to the constructor.
+
+- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
+ method. Use the construction parameter instead.
+
+- The deprecated support for the sequence of sequences (or list of lists) multilabel
+ format was removed. To convert to and from the supported binary
+ indicator matrix format, use
+ :class:`MultiLabelBinarizer `.
+
+- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
+ change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
+
+- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
+ :class:`preprocessing.LabelBinarizer` were removed.
+
+- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
+ gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
+ Use ``gamma="auto"`` instead.
+
+Code Contributors
+-----------------
+Aaron Schumacher, Adithya Ganesh, akitty, Alexandre Gramfort, Alexey Grigorev,
+Ali Baharev, Allen Riddell, Ando Saabas, Andreas Mueller, Andrew Lamb, Anish
+Shah, Ankur Ankan, Anthony Erlinger, Ari Rouvinen, Arnaud Joly, Arnaud Rachez,
+Arthur Mensch, banilo, Barmaley.exe, benjaminirving, Boyuan Deng, Brett Naul,
+Brian McFee, Buddha Prakash, Chi Zhang, Chih-Wei Chang, Christof Angermueller,
+Christoph Gohlke, Christophe Bourguignat, Christopher Erick Moody, Chyi-Kwei
+Yau, Cindy Sridharan, CJ Carey, Clyde-fare, Cory Lorenz, Dan Blanchard, Daniel
+Galvez, Daniel Kronovet, Danny Sullivan, Data1010, David, David D Lowe, David
+Dotson, djipey, Dmitry Spikhalskiy, Donne Martin, Dougal J. Sutherland, Dougal
+Sutherland, edson duarte, Eduardo Caro, Eric Larson, Eric Martin, Erich
+Schubert, Fernando Carrillo, Frank C. Eckert, Frank Zalkow, Gael Varoquaux,
+Ganiev Ibraim, Gilles Louppe, Giorgio Patrini, giorgiop, Graham Clenaghan,
+Gryllos Prokopis, gwulfs, Henry Lin, Hsuan-Tien Lin, Immanuel Bayer, Ishank
+Gulati, Jack Martin, Jacob Schreiber, Jaidev Deshpande, Jake Vanderplas, Jan
+Hendrik Metzen, Jean Kossaifi, Jeffrey04, Jeremy, jfraj, Jiali Mei,
+Joe Jevnik, Joel Nothman, John Kirkham, John Wittenauer, Joseph, Joshua Loyal,
+Jungkook Park, KamalakerDadi, Kashif Rasul, Keith Goodman, Kian Ho, Konstantin
+Shmelkov, Kyler Brown, Lars Buitinck, Lilian Besson, Loic Esteve, Louis Tiao,
+maheshakya, Maheshakya Wijewardena, Manoj Kumar, MarkTab marktab.net, Martin
+Ku, Martin Spacek, MartinBpr, martinosorb, MaryanMorel, Masafumi Oyamada,
+Mathieu Blondel, Matt Krump, Matti Lyra, Maxim Kolganov, mbillinger, mhg,
+Michael Heilman, Michael Patterson, Miroslav Batchkarov, Nelle Varoquaux,
+Nicolas, Nikolay Mayorov, Olivier Grisel, Omer Katz, Óscar Nájera, Pauli
+Virtanen, Peter Fischer, Peter Prettenhofer, Phil Roth, pianomania, Preston
+Parry, Raghav RV, Rob Zinkov, Robert Layton, Rohan Ramanath, Saket Choudhary,
+Sam Zhang, santi, saurabh.bansod, scls19fr, Sebastian Raschka, Sebastian
+Saeger, Shivan Sornarajah, SimonPL, sinhrks, Skipper Seabold, Sonny Hu, sseg,
+Stephen Hoover, Steven De Gryze, Steven Seguin, Theodore Vasiloudis, Thomas
+Unterthiner, Tiago Freitas Pereira, Tian Wang, Tim Head, Timothy Hopper,
+tokoroten, Tom Dupré la Tour, Trevor Stephens, Valentin Stolbunov, Vighnesh
+Birodkar, Vinayak Mehta, Vincent, Vincent Michel, vstolbunov, wangz10, Wei Xue,
+Yucheng Low, Yury Zhauniarovich, Zac Stewart, zhai_pro, Zichen Wang
+
diff --git a/doc/whats_new/v0.18.rst b/doc/whats_new/v0.18.rst
new file mode 100644
index 0000000000000..ad240d5782793
--- /dev/null
+++ b/doc/whats_new/v0.18.rst
@@ -0,0 +1,816 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_18_2:
+
+Version 0.18.2
+==============
+
+**June 20, 2017**
+
+.. topic:: Last release with Python 2.6 support
+
+ Scikit-learn 0.18 is the last major release of scikit-learn to support Python 2.6.
+ Later versions of scikit-learn will require Python 2.7 or above.
+
+
+Changelog
+---------
+
+- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
+ `Loic Esteve`_.
+
+- Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
+ :issue:`9149`.
+
+Code Contributors
+-----------------
+Aman Dalmia, Loic Esteve, Nate Guerin, Sergei Lebedev
+
+
+.. _changes_0_18_1:
+
+Version 0.18.1
+==============
+
+**November 11, 2016**
+
+Changelog
+---------
+
+Enhancements
+............
+
+- Improved ``sample_without_replacement`` speed by utilizing
+ numpy.random.permutation for most cases. As a result,
+ samples may differ in this release for a fixed random state.
+ Affected estimators:
+
+ - :class:`ensemble.BaggingClassifier`
+ - :class:`ensemble.BaggingRegressor`
+ - :class:`linear_model.RANSACRegressor`
+ - :class:`model_selection.RandomizedSearchCV`
+ - :class:`random_projection.SparseRandomProjection`
+
+ This also affects the :meth:`datasets.make_classification`
+ method.
+
+Bug fixes
+.........
+
+- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
+ parameters were not being utilised by :class:`manifold.TSNE`.
+ :issue:`6497` by :user:`Sebastian Säger `
+
+- Fix bug for svm's decision values when ``decision_function_shape``
+ is ``ovr`` in :class:`svm.SVC`.
+ :class:`svm.SVC`'s decision_function was incorrect from versions
+ 0.17.0 through 0.18.0.
+ :issue:`7724` by `Bing Tian Dai`_
+
+- Attribute ``explained_variance_ratio`` of
+ :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
+ with SVD and Eigen solver are now of the same length. :issue:`7632`
+ by :user:`JPFrancoia `
+
+- Fixes issue in :ref:`univariate_feature_selection` where score
+ functions were not accepting multi-label targets. :issue:`7676`
+ by :user:`Mohammed Affan `
+
+- Fixed setting parameters when calling ``fit`` multiple times on
+ :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
+
+- Fixes issue in ``partial_fit`` method of
+ :class:`multiclass.OneVsRestClassifier` when number of classes used in
+ ``partial_fit`` was less than the total number of classes in the
+ data. :issue:`7786` by `Srivatsan Ramesh`_
+
+- Fixes issue in :class:`calibration.CalibratedClassifierCV` where
+ the sum of probabilities of each class for a data was not 1, and
+ ``CalibratedClassifierCV`` now handles the case where the training set
+ has less number of classes than the total data. :issue:`7799` by
+ `Srivatsan Ramesh`_
+
+- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
+ exactly implement Benjamini-Hochberg procedure. It formerly may have
+ selected fewer features than it should.
+ :issue:`7490` by :user:`Peng Meng `.
+
+- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
+ integer inputs. :issue:`6282` by `Jake Vanderplas`_.
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+ regressors now assumes uniform sample weights by default if the
+ ``sample_weight`` argument is not passed to the ``fit`` function.
+ Previously, the parameter was silently ignored. :issue:`7301`
+ by :user:`Nelson Liu `.
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+ `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe
+ :issue:`7680` by :user:`Ibraim Ganiev `.
+
+- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
+ attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
+ Krivich `.
+
+- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
+ string labels. :issue:`5874` by `Raghav RV`_.
+
+- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
+ an error when ``stratify`` is a list of string labels. :issue:`7593` by
+ `Raghav RV`_.
+
+- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
+ :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
+ because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
+ `Raghav RV`_.
+
+- All cross-validation utilities in :mod:`sklearn.model_selection` now
+ permit one time cross-validation splitters for the ``cv`` parameter. Also
+ non-deterministic cross-validation splitters (where multiple calls to
+ ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
+ The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
+ parameter setting on the split produced by the first ``split`` call
+ to the cross-validation splitter. :issue:`7660` by `Raghav RV`_.
+
+- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
+ returned an invalid CSR matrix.
+ :issue:`7750` by :user:`CJ Carey `.
+
+- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
+ small negative distance. :issue:`7732` by :user:`Artsion `.
+
+API changes summary
+-------------------
+
+Trees and forests
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+ regressors now assumes uniform sample weights by default if the
+ ``sample_weight`` argument is not passed to the ``fit`` function.
+ Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
+ Liu `.
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe.
+ :issue:`7680` by :user:`Ibraim Ganiev `.
+
+
+Linear, kernelized and related models
+
+- Length of ``explained_variance_ratio`` of
+ :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+ changed for both Eigen and SVD solvers. The attribute has now a length
+ of min(n_components, n_classes - 1). :issue:`7632`
+ by :user:`JPFrancoia `
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+ ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
+
+.. _changes_0_18:
+
+Version 0.18
+============
+
+**September 28, 2016**
+
+.. topic:: Last release with Python 2.6 support
+
+ Scikit-learn 0.18 will be the last version of scikit-learn to support Python 2.6.
+ Later versions of scikit-learn will require Python 2.7 or above.
+
+.. _model_selection_changes:
+
+Model Selection Enhancements and API Changes
+--------------------------------------------
+
+- **The model_selection module**
+
+ The new module :mod:`sklearn.model_selection`, which groups together the
+ functionalities of formerly :mod:`sklearn.cross_validation`,
+ :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
+ possibilities such as nested cross-validation and better manipulation of
+ parameter searches with Pandas.
+
+ Many things will stay the same but there are some key differences. Read
+ below to know more about the changes.
+
+- **Data-independent CV splitters enabling nested cross-validation**
+
+ The new cross-validation splitters, defined in the
+ :mod:`sklearn.model_selection`, are no longer initialized with any
+ data-dependent parameters such as ``y``. Instead they expose a
+ :func:`split` method that takes in the data and yields a generator for the
+ different splits.
+
+ This change makes it possible to use the cross-validation splitters to
+ perform nested cross-validation, facilitated by
+ :class:`model_selection.GridSearchCV` and
+ :class:`model_selection.RandomizedSearchCV` utilities.
+
+- **The enhanced cv_results_ attribute**
+
+ The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
+ and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
+ ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
+ array corresponding to the parameter settings (i.e. search candidates).
+
+ The ``cv_results_`` dict can be easily imported into ``pandas`` as a
+ ``DataFrame`` for exploring the search results.
+
+ The ``cv_results_`` arrays include scores for each cross-validation split
+ (with keys such as ``'split0_test_score'``), as well as their mean
+ (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
+
+ The ranks for the search candidates (based on their mean
+ cross-validation score) is available at ``cv_results_['rank_test_score']``.
+
+ The parameter values for each parameter is stored separately as numpy
+ masked object arrays. The value, for that search candidate, is masked if
+ the corresponding parameter is not applicable. Additionally a list of all
+ the parameter dicts are stored at ``cv_results_['params']``.
+
+- **Parameters n_folds and n_iter renamed to n_splits**
+
+ Some parameter names have changed:
+ The ``n_folds`` parameter in new :class:`model_selection.KFold`,
+ :class:`model_selection.GroupKFold` (see below for the name change),
+ and :class:`model_selection.StratifiedKFold` is now renamed to
+ ``n_splits``. The ``n_iter`` parameter in
+ :class:`model_selection.ShuffleSplit`, the new class
+ :class:`model_selection.GroupShuffleSplit` and
+ :class:`model_selection.StratifiedShuffleSplit` is now renamed to
+ ``n_splits``.
+
+- **Rename of splitter classes which accepts group labels along with data**
+
+ The cross-validation splitters ``LabelKFold``,
+ ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
+ been renamed to :class:`model_selection.GroupKFold`,
+ :class:`model_selection.GroupShuffleSplit`,
+ :class:`model_selection.LeaveOneGroupOut` and
+ :class:`model_selection.LeavePGroupsOut` respectively.
+
+ Note the change from singular to plural form in
+ :class:`model_selection.LeavePGroupsOut`.
+
+- **Fit parameter labels renamed to groups**
+
+ The ``labels`` parameter in the :func:`split` method of the newly renamed
+ splitters :class:`model_selection.GroupKFold`,
+ :class:`model_selection.LeaveOneGroupOut`,
+ :class:`model_selection.LeavePGroupsOut`,
+ :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
+ following the new nomenclature of their class names.
+
+- **Parameter n_labels renamed to n_groups**
+
+ The parameter ``n_labels`` in the newly renamed
+ :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
+
+- Training scores and Timing information
+
+ ``cv_results_`` also includes the training scores for each
+ cross-validation split (with keys such as ``'split0_train_score'``), as
+ well as their mean (``'mean_train_score'``) and standard deviation
+ (``'std_train_score'``). To avoid the cost of evaluating training score,
+ set ``return_train_score=False``.
+
+ Additionally the mean and standard deviation of the times taken to split,
+ train and score the model across all the cross-validation splits is
+ available at the key ``'mean_time'`` and ``'std_time'`` respectively.
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and Regressors
+
+- The Gaussian Process module has been reimplemented and now offers classification
+ and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
+ and :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
+ implementation supports kernel engineering, gradient-based hyperparameter optimization or
+ sampling of functions from GP prior and GP posterior. Extensive documentation and
+ examples are provided. By `Jan Hendrik Metzen`_.
+
+- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron `
+ :issue:`3204` by :user:`Issam H. Laradji `
+
+- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
+ :issue:`5291` by `Manoj Kumar`_.
+
+- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
+ converts single output regressors to multi-output regressors by fitting
+ one regressor per output. By :user:`Tim Head `.
+
+Other estimators
+
+- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
+ replace former mixture models, employing faster inference
+ for sounder results. :issue:`7295` by :user:`Wei Xue ` and
+ :user:`Thierry Guillemot `.
+
+- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+ and it is available calling with parameter ``svd_solver='randomized'``.
+ The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
+ behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
+ calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
+ the best solver is selected depending on the size of the input and the
+ number of components requested. :issue:`5299` by :user:`Giorgio Patrini `.
+
+- Added two functions for mutual information estimation:
+ :func:`feature_selection.mutual_info_classif` and
+ :func:`feature_selection.mutual_info_regression`. These functions can be
+ used in :class:`feature_selection.SelectKBest` and
+ :class:`feature_selection.SelectPercentile` as score functions.
+ By :user:`Andrea Bravi ` and :user:`Nikolay Mayorov `.
+
+- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
+ random forests. By `Nicolas Goix`_.
+
+- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
+ Elkan's fast K-Means algorithm. By `Andreas Müller`_.
+
+Model selection and evaluation
+
+- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+ Index which measures the similarity of two clusterings of a set of points
+ By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `.
+
+- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+ and Harabaz score to evaluate the resulting clustering of a set of points.
+ By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `.
+
+- Added new cross-validation splitter
+ :class:`model_selection.TimeSeriesSplit` to handle time series data.
+ :issue:`6586` by :user:`YenChen Lin `
+
+- The cross-validation iterators are replaced by cross-validation splitters
+ available from :mod:`sklearn.model_selection`, allowing for nested
+ cross-validation. See :ref:`model_selection_changes` for more information.
+ :issue:`4294` by `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
+ the mean absolute error. This criterion can also be used in
+ :class:`ensemble.ExtraTreesRegressor`,
+ :class:`ensemble.RandomForestRegressor`, and the gradient boosting
+ estimators. :issue:`6667` by :user:`Nelson Liu `.
+
+- Added weighted impurity-based early stopping criterion for decision tree
+ growth. :issue:`6954` by :user:`Nelson Liu `
+
+- The random forest, extra tree and decision tree estimators now has a
+ method ``decision_path`` which returns the decision path of samples in
+ the tree. By `Arnaud Joly`_.
+
+- A new example has been added unveiling the decision tree structure.
+ By `Arnaud Joly`_.
+
+- Random forest, extra trees, decision trees and gradient boosting estimator
+ accept the parameter ``min_samples_split`` and ``min_samples_leaf``
+ provided as a percentage of the training samples. By :user:`yelite ` and `Arnaud Joly`_.
+
+- Gradient boosting estimators accept the parameter ``criterion`` to specify
+ to splitting criterion used in built decision trees.
+ :issue:`6667` by :user:`Nelson Liu `.
+
+- The memory footprint is reduced (sometimes greatly) for
+ :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
+ i.e, :class:`ensemble.BaggingClassifier`,
+ :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
+ by dynamically generating attribute ``estimators_samples_`` only when it is
+ needed. By :user:`David Staub `.
+
+- Added ``n_jobs`` and ``sample_weight`` parameters for
+ :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
+ :issue:`5805` by :user:`Ibraim Ganiev `.
+
+Linear, kernelized and related models
+
+- In :class:`linear_model.LogisticRegression`, the SAG solver is now
+ available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
+
+- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
+ :class:`svm.LinearSVR` now support ``sample_weight``.
+ By :user:`Imaculate `.
+
+- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
+ error on the samples for every trial. By `Manoj Kumar`_.
+
+- Prediction of out-of-sample events with Isotonic Regression
+ (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
+ data). By :user:`Jonathan Arfa `.
+
+- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
+ `O(n^2)` behavior in pathological cases, and is also generally faster
+ (:issue:`#6691`). By `Antony Lee`_.
+
+- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
+ through the parameter ``priors``. By :user:`Guillaume Lemaitre `.
+
+- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
+ now works with ``np.float32`` input data without converting it
+ into ``np.float64``. This allows to reduce the memory
+ consumption. :issue:`6913` by :user:`YenChen Lin `.
+
+- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
+ now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
+ :issue:`5762` by :user:`Utkarsh Upadhyay `.
+
+Decomposition, manifold learning and clustering
+
+- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
+ data matrix of original shape. By :user:`Anish Shah `.
+
+- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
+ with ``np.float32`` and ``np.float64`` input data without converting it.
+ This allows to reduce the memory consumption by using ``np.float32``.
+ :issue:`6846` by :user:`Sebastian Säger ` and
+ :user:`YenChen Lin `.
+
+Preprocessing and feature selection
+
+- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
+ :issue:`5929` by :user:`Konstantin Podshumok `.
+
+- :class:`feature_extraction.FeatureHasher` now accepts string values.
+ :issue:`6173` by :user:`Ryad Zenine ` and
+ :user:`Devashish Deshpande `.
+
+- Keyword arguments can now be supplied to ``func`` in
+ :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
+ parameter. By `Brian McFee`_.
+
+- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
+ now accept score functions that take X, y as input and return only the scores.
+ By :user:`Nikolay Mayorov `.
+
+Model evaluation and meta-estimators
+
+- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
+ now support ``partial_fit``. By :user:`Asish Panda ` and
+ :user:`Philipp Dowling `.
+
+- Added support for substituting or disabling :class:`pipeline.Pipeline`
+ and :class:`pipeline.FeatureUnion` components using the ``set_params``
+ interface that powers :mod:`sklearn.grid_search`.
+ See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
+ By `Joel Nothman`_ and :user:`Robert McGibbon `.
+
+- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
+ (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
+ into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
+ more information. :issue:`6697` by `Raghav RV`_.
+
+- Generalization of :func:`model_selection.cross_val_predict`.
+ One can pass method names such as `predict_proba` to be used in the cross
+ validation framework instead of the default `predict`.
+ By :user:`Ori Ziv ` and :user:`Sears Merritt `.
+
+- The training scores and time taken for training followed by scoring for
+ each search candidate are now available at the ``cv_results_`` dict.
+ See :ref:`model_selection_changes` for more information.
+ :issue:`7325` by :user:`Eugene Chen ` and `Raghav RV`_.
+
+Metrics
+
+- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
+ the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+ :issue:`7239` by :user:`Hong Guangguo ` with help from
+ :user:`Mads Jensen ` and :user:`Nelson Liu `.
+
+- Support sparse contingency matrices in cluster evaluation
+ (:mod:`metrics.cluster.supervised`) to scale to a large number of
+ clusters.
+ :issue:`7419` by :user:`Gregory Stupp ` and `Joel Nothman`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
+ By :user:`Jatin Shah ` and `Raghav RV`_.
+
+- Speed up :func:`metrics.silhouette_score` by using vectorized operations.
+ By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
+ By :user:`Bernardo Stein `.
+
+Miscellaneous
+
+- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
+ the score on the test folds in parallel. By `Manoj Kumar`_
+
+- Codebase does not contain C/C++ cython generated files: they are
+ generated during build. Distribution packages will still contain generated
+ C/C++ files. By :user:`Arthur Mensch `.
+
+- Reduce the memory usage for 32-bit float input arrays of
+ :func:`utils.sparse_func.mean_variance_axis` and
+ :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
+ fused types. By :user:`YenChen Lin `.
+
+- The :func:`ignore_warnings` now accept a category argument to ignore only
+ the warnings of a specified type. By :user:`Thierry Guillemot `.
+
+- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
+ :func:`load_iris` dataset
+ :issue:`7049`,
+ :func:`load_breast_cancer` dataset
+ :issue:`7152`,
+ :func:`load_digits` dataset,
+ :func:`load_diabetes` dataset,
+ :func:`load_linnerud` dataset,
+ :func:`load_boston` dataset
+ :issue:`7154` by
+ :user:`Manvendra Singh`.
+
+- Simplification of the ``clone`` function, deprecate support for estimators
+ that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
+
+- When unpickling a scikit-learn estimator in a different version than the one
+ the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
+ on model persistence ` for more details. (:issue:`7248`)
+ By `Andreas Müller`_.
+
+Bug fixes
+.........
+
+Trees and ensembles
+
+- Random forest, extra trees, decision trees and gradient boosting
+ won't accept anymore ``min_samples_split=1`` as at least 2 samples
+ are required to split a decision tree node. By `Arnaud Joly`_
+
+- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
+ ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
+ by `Sebastian Raschka`_.
+
+- Fix bug where :class:`ensemble.AdaBoostClassifier` and
+ :class:`ensemble.AdaBoostRegressor` would perform poorly if the
+ ``random_state`` was fixed
+ (:issue:`7411`). By `Joel Nothman`_.
+
+- Fix bug in ensembles with randomization where the ensemble would not
+ set ``random_state`` on base estimators in a pipeline or similar nesting.
+ (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
+ :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
+ and :class:`ensemble.AdaBoostRegressor` will now differ from previous
+ versions. By `Joel Nothman`_.
+
+Linear, kernelized and related models
+
+- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
+ :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
+ (:issue:`6764`). By :user:`Wenhua Yang `.
+
+- Fix bug in :class:`linear_model.LogisticRegressionCV` where
+ ``solver='liblinear'`` did not accept ``class_weights='balanced``.
+ (:issue:`6817`). By `Tom Dupre la Tour`_.
+
+- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
+ occurred when there were outliers being labelled and a weight function
+ specified (:issue:`6902`). By
+ `LeonieBorne `_.
+
+- Fix :class:`linear_model.ElasticNet` sparse decision function to match
+ output with dense in the multioutput case.
+
+Decomposition, manifold learning and clustering
+
+- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+ :issue:`5141` by :user:`Giorgio Patrini `.
+
+- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
+ In practice this is enough for obtaining a good approximation of the
+ true eigenvalues/vectors in the presence of noise. When `n_components` is
+ small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
+ a higher number. This improves precision with few components.
+ :issue:`5299` by :user:`Giorgio Patrini`.
+
+- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
+ and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+ New features) is fixed. `components_` are stored with no whitening.
+ :issue:`5299` by :user:`Giorgio Patrini `.
+
+- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
+ Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer `.
+
+- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
+ occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
+ :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
+ and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
+ :user:`Peter Fischer `.
+
+- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
+ of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
+ correct results. By :user:`JPFrancoia `
+
+Preprocessing and feature selection
+
+- :func:`preprocessing.data._transform_selected` now always passes a copy
+ of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
+ Oliveira `_.
+
+Model evaluation and meta-estimators
+
+- :class:`model_selection.StratifiedKFold` now raises error if all n_labels
+ for individual classes is less than n_folds.
+ :issue:`6182` by :user:`Devashish Deshpande `.
+
+- Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
+ where train and test sample could overlap in some edge cases,
+ see :issue:`6121` for
+ more details. By `Loic Esteve`_.
+
+- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
+ return splits of size ``train_size`` and ``test_size`` in all cases
+ (:issue:`6472`). By `Andreas Müller`_.
+
+- Cross-validation of :class:`OneVsOneClassifier` and
+ :class:`OneVsRestClassifier` now works with precomputed kernels.
+ :issue:`7350` by :user:`Russell Smith `.
+
+- Fix incomplete ``predict_proba`` method delegation from
+ :class:`model_selection.GridSearchCV` to
+ :class:`linear_model.SGDClassifier` (:issue:`7159`)
+ by `Yichuan Liu `_.
+
+Metrics
+
+- Fix bug in :func:`metrics.silhouette_score` in which clusters of
+ size 1 were incorrectly scored. They should get a score of 0.
+ By `Joel Nothman`_.
+
+- Fix bug in :func:`metrics.silhouette_samples` so that it now works with
+ arbitrary labels, not just those ranging from 0 to n_clusters - 1.
+
+- Fix bug where expected and adjusted mutual information were incorrect if
+ cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
+
+- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
+ boolean arrays when required in ``scipy.spatial.distance``.
+ :issue:`5460` by `Tom Dupre la Tour`_.
+
+- Fix sparse input support in :func:`metrics.silhouette_score` as well as
+ example examples/text/document_clustering.py. By :user:`YenChen Lin `.
+
+- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
+ longer round ``y_score`` values when creating ROC curves; this was causing
+ problems for users with very small differences in scores (:issue:`7353`).
+
+Miscellaneous
+
+- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
+ that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
+ (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
+
+- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
+ power iterations are requested, since it applies LU normalization by default.
+ If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
+ Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
+ :issue:`5141` by :user:`Giorgio Patrini `.
+
+- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
+ with them as parameters, could not be passed to :func:`base.clone`.
+ By `Loic Esteve`_.
+
+- :func:`datasets.load_svmlight_file` now is able to read long int QID values.
+ :issue:`7101` by :user:`Ibraim Ganiev `.
+
+
+API changes summary
+-------------------
+
+Linear, kernelized and related models
+
+- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
+ Use ``loss`` instead. By `Manoj Kumar`_.
+
+- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
+ :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa `.
+
+Decomposition, manifold learning and clustering
+
+- The old :class:`mixture.DPGMM` is deprecated in favor of the new
+ :class:`mixture.BayesianGaussianMixture` (with the parameter
+ ``weight_concentration_prior_type='dirichlet_process'``).
+ The new class solves the computational
+ problems of the old class and computes the Gaussian mixture with a
+ Dirichlet process prior faster than before.
+ :issue:`7295` by :user:`Wei Xue ` and :user:`Thierry Guillemot `.
+
+- The old :class:`mixture.VBGMM` is deprecated in favor of the new
+ :class:`mixture.BayesianGaussianMixture` (with the parameter
+ ``weight_concentration_prior_type='dirichlet_distribution'``).
+ The new class solves the computational
+ problems of the old class and computes the Variational Bayesian Gaussian
+ mixture faster than before.
+ :issue:`6651` by :user:`Wei Xue ` and :user:`Thierry Guillemot `.
+
+- The old :class:`mixture.GMM` is deprecated in favor of the new
+ :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
+ faster than before and some of computational problems have been solved.
+ :issue:`6666` by :user:`Wei Xue ` and :user:`Thierry Guillemot `.
+
+Model evaluation and meta-estimators
+
+- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
+ :mod:`sklearn.learning_curve` have been deprecated and the classes and
+ functions have been reorganized into the :mod:`sklearn.model_selection`
+ module. Ref :ref:`model_selection_changes` for more information.
+ :issue:`4294` by `Raghav RV`_.
+
+- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
+ and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
+ the attribute ``cv_results_``.
+ Ref :ref:`model_selection_changes` for more information.
+ :issue:`6697` by `Raghav RV`_.
+
+- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+ by the new parameter ``n_splits`` since it can provide a consistent
+ and unambiguous interface to represent the number of train-test splits.
+ :issue:`7187` by :user:`YenChen Lin `.
+
+- ``classes`` parameter was renamed to ``labels`` in
+ :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell `.
+
+- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
+ ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
+ :class:`model_selection.GroupKFold`,
+ :class:`model_selection.GroupShuffleSplit`,
+ :class:`model_selection.LeaveOneGroupOut`
+ and :class:`model_selection.LeavePGroupsOut` respectively.
+ Also the parameter ``labels`` in the :func:`split` method of the newly
+ renamed splitters :class:`model_selection.LeaveOneGroupOut` and
+ :class:`model_selection.LeavePGroupsOut` is renamed to
+ ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
+ the parameter ``n_labels`` is renamed to ``n_groups``.
+ :issue:`6660` by `Raghav RV`_.
+
+- Error and loss names for ``scoring`` parameters are now prefixed by
+ ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
+ are deprecated and will be removed in version 0.20.
+ :issue:`7261` by :user:`Tim Head `.
+
+Code Contributors
+-----------------
+Aditya Joshi, Alejandro, Alexander Fabisch, Alexander Loginov, Alexander
+Minyushkin, Alexander Rudy, Alexandre Abadie, Alexandre Abraham, Alexandre
+Gramfort, Alexandre Saint, alexfields, Alvaro Ulloa, alyssaq, Amlan Kar,
+Andreas Mueller, andrew giessel, Andrew Jackson, Andrew McCulloh, Andrew
+Murray, Anish Shah, Arafat, Archit Sharma, Ariel Rokem, Arnaud Joly, Arnaud
+Rachez, Arthur Mensch, Ash Hoover, asnt, b0noI, Behzad Tabibian, Bernardo,
+Bernhard Kratzwald, Bhargav Mangipudi, blakeflei, Boyuan Deng, Brandon Carter,
+Brett Naul, Brian McFee, Caio Oliveira, Camilo Lamus, Carol Willing, Cass,
+CeShine Lee, Charles Truong, Chyi-Kwei Yau, CJ Carey, codevig, Colin Ni, Dan
+Shiebler, Daniel, Daniel Hnyk, David Ellis, David Nicholson, David Staub, David
+Thaler, David Warshaw, Davide Lasagna, Deborah, definitelyuncertain, Didi
+Bar-Zev, djipey, dsquareindia, edwinENSAE, Elias Kuthe, Elvis DOHMATOB, Ethan
+White, Fabian Pedregosa, Fabio Ticconi, fisache, Florian Wilhelm, Francis,
+Francis O'Donovan, Gael Varoquaux, Ganiev Ibraim, ghg, Gilles Louppe, Giorgio
+Patrini, Giovanni Cherubin, Giovanni Lanzani, Glenn Qian, Gordon
+Mohr, govin-vatsan, Graham Clenaghan, Greg Reda, Greg Stupp, Guillaume
+Lemaitre, Gustav Mörtberg, halwai, Harizo Rajaona, Harry Mavroforakis,
+hashcode55, hdmetor, Henry Lin, Hobson Lane, Hugo Bowne-Anderson,
+Igor Andriushchenko, Imaculate, Inki Hwang, Isaac Sijaranamual,
+Ishank Gulati, Issam Laradji, Iver Jordal, jackmartin, Jacob Schreiber, Jake
+Vanderplas, James Fiedler, James Routley, Jan Zikes, Janna Brettingen, jarfa, Jason
+Laska, jblackburne, jeff levesque, Jeffrey Blackburne, Jeffrey04, Jeremy Hintz,
+jeremynixon, Jeroen, Jessica Yung, Jill-Jênn Vie, Jimmy Jia, Jiyuan Qian, Joel
+Nothman, johannah, John, John Boersma, John Kirkham, John Moeller,
+jonathan.striebel, joncrall, Jordi, Joseph Munoz, Joshua Cook, JPFrancoia,
+jrfiedler, JulianKahnert, juliathebrave, kaichogami, KamalakerDadi, Kenneth
+Lyons, Kevin Wang, kingjr, kjell, Konstantin Podshumok, Kornel Kielczewski,
+Krishna Kalyan, krishnakalyan3, Kvle Putnam, Kyle Jackson, Lars Buitinck,
+ldavid, LeiG, LeightonZhang, Leland McInnes, Liang-Chi Hsieh, Lilian Besson,
+lizsz, Loic Esteve, Louis Tiao, Léonie Borne, Mads Jensen, Maniteja Nandana,
+Manoj Kumar, Manvendra Singh, Marco, Mario Krell, Mark Bao, Mark Szepieniec,
+Martin Madsen, MartinBpr, MaryanMorel, Massil, Matheus, Mathieu Blondel,
+Mathieu Dubois, Matteo, Matthias Ekman, Max Moroz, Michael Scherer, michiaki
+ariga, Mikhail Korobov, Moussa Taifi, mrandrewandrade, Mridul Seth, nadya-p,
+Naoya Kanai, Nate George, Nelle Varoquaux, Nelson Liu, Nick James,
+NickleDave, Nico, Nicolas Goix, Nikolay Mayorov, ningchi, nlathia,
+okbalefthanded, Okhlopkov, Olivier Grisel, Panos Louridas, Paul Strickland,
+Perrine Letellier, pestrickland, Peter Fischer, Pieter, Ping-Yao, Chang,
+practicalswift, Preston Parry, Qimu Zheng, Rachit Kansal, Raghav RV,
+Ralf Gommers, Ramana.S, Rammig, Randy Olson, Rob Alexander, Robert Lutz,
+Robin Schucker, Rohan Jain, Ruifeng Zheng, Ryan Yu, Rémy Léone, saihttam,
+Saiwing Yeung, Sam Shleifer, Samuel St-Jean, Sartaj Singh, Sasank Chilamkurthy,
+saurabh.bansod, Scott Andrews, Scott Lowe, seales, Sebastian Raschka, Sebastian
+Saeger, Sebastián Vanrell, Sergei Lebedev, shagun Sodhani, shanmuga cv,
+Shashank Shekhar, shawpan, shengxiduan, Shota, shuckle16, Skipper Seabold,
+sklearn-ci, SmedbergM, srvanrell, Sébastien Lerique, Taranjeet, themrmax,
+Thierry, Thierry Guillemot, Thomas, Thomas Hallock, Thomas Moreau, Tim Head,
+tKammy, toastedcornflakes, Tom, TomDLT, Toshihiro Kamishima, tracer0tong, Trent
+Hauck, trevorstephens, Tue Vo, Varun, Varun Jewalikar, Viacheslav, Vighnesh
+Birodkar, Vikram, Villu Ruusmann, Vinayak Mehta, walter, waterponey, Wenhua
+Yang, Wenjian Huang, Will Welch, wyseguy7, xyguo, yanlend, Yaroslav Halchenko,
+yelite, Yen, YenChenLin, Yichuan Liu, Yoav Ram, Yoshiki, Zheng RuiFeng, zivori, Óscar Nájera
+
diff --git a/doc/whats_new/v0.19.rst b/doc/whats_new/v0.19.rst
new file mode 100644
index 0000000000000..eb29ab1599b31
--- /dev/null
+++ b/doc/whats_new/v0.19.rst
@@ -0,0 +1,923 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_19:
+
+Version 0.19
+============
+
+**Release Candidate (0.19b2) July 17, 2017**
+
+Highlights
+----------
+
+We are excited to release a number of great new features including
+:class:`neighbors.LocalOutlierFactor` for anomaly detection,
+:class:`preprocessing.QuantileTransformer` for robust feature transformation,
+and the :class:`multioutput.ClassifierChain` meta-estimator to simply account
+for dependencies between classes in multilabel problems. We have some new
+algorithms in existing estimators, such as multiplicative update in
+:class:`decomposition.NMF` and multinomial
+:class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``).
+
+Cross validation is now able to return the results from multiple metric
+evaluations. The new :func:`model_selection.cross_validate` can return many
+scores on the test data as well as training set performance and timings, and we
+have extended the ``scoring`` and ``refit`` parameters for grid/randomized
+search :ref:`to handle multiple metrics `.
+
+You can also learn faster. For instance, the :ref:`new option to cache
+transformations ` in :class:`pipeline.Pipeline` makes grid
+search over pipelines including slow transformations much more efficient. And
+you can predict faster: if you're sure you know what you're doing, you can turn
+off validating that the input is finite using :func:`config_context`.
+
+We've made some important fixes too. We've fixed a longstanding implementation
+error in :func:`metrics.average_precision_score`, so please be cautious with
+prior results reported from that function. A number of errors in the
+:class:`manifold.TSNE` implementation have been fixed, particularly in the
+default Barnes-Hut approximation. :class:`semi_supervised.LabelSpreading` and
+:class:`semi_supervised.LabelPropagation` have had substantial fixes.
+LabelPropagation was previously broken. LabelSpreading should now correctly
+respect its alpha parameter.
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
+- :class:`cross_decomposition.PLSRegression`
+ with ``scale=True`` (bug fix)
+- :class:`ensemble.GradientBoostingClassifier` and
+ :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
+- gradient boosting ``loss='quantile'`` (bug fix)
+- :class:`ensemble.IsolationForest` (bug fix)
+- :class:`feature_selection.SelectFdr` (bug fix)
+- :class:`linear_model.RANSACRegressor` (bug fix)
+- :class:`linear_model.LassoLars` (bug fix)
+- :class:`linear_model.LassoLarsIC` (bug fix)
+- :class:`manifold.TSNE` (bug fix)
+- :class:`neighbors.NearestCentroid` (bug fix)
+- :class:`semi_supervised.LabelSpreading` (bug fix)
+- :class:`semi_supervised.LabelPropagation` (bug fix)
+- tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- Added :class:`multioutput.ClassifierChain` for multi-label
+ classification. By `Adam Kleczewski `_.
+
+- Added solver ``'saga'`` that implements the improved version of Stochastic
+ Average Gradient, in :class:`linear_model.LogisticRegression` and
+ :class:`linear_model.Ridge`. It allows the use of L1 penalty with
+ multinomial logistic loss, and behaves marginally better than 'sag'
+ during the first epochs of ridge and logistic regression.
+ :issue:`8446` by `Arthur Mensch`_.
+
+Other estimators
+
+- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
+ detection based on nearest neighbors.
+ :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
+
+- Added :class:`preprocessing.QuantileTransformer` class and
+ :func:`preprocessing.quantile_transform` function for features
+ normalization based on quantiles.
+ :issue:`8363` by :user:`Denis Engemann `,
+ :user:`Guillaume Lemaitre `, `Olivier Grisel`_, `Raghav RV`_,
+ :user:`Thierry Guillemot `, and `Gael Varoquaux`_.
+
+- The new solver ``'mu'`` implements a Multiplicate Update in
+ :class:`decomposition.NMF`, allowing the optimization of all
+ beta-divergences, including the Frobenius norm, the generalized
+ Kullback-Leibler divergence and the Itakura-Saito divergence.
+ :issue:`5295` by `Tom Dupre la Tour`_.
+
+Model selection and evaluation
+
+- :class:`model_selection.GridSearchCV` and
+ :class:`model_selection.RandomizedSearchCV` now support simultaneous
+ evaluation of multiple metrics. Refer to the
+ :ref:`multimetric_grid_search` section of the user guide for more
+ information. :issue:`7388` by `Raghav RV`_
+
+- Added the :func:`model_selection.cross_validate` which allows evaluation
+ of multiple metrics. This function returns a dict with more useful
+ information from cross-validation such as the train scores, fit times and
+ score times.
+ Refer to :ref:`multimetric_cross_validation` section of the userguide
+ for more information. :issue:`7388` by `Raghav RV`_
+
+- Added :func:`metrics.mean_squared_log_error`, which computes
+ the mean square error of the logarithmic transformation of targets,
+ particularly useful for targets with an exponential trend.
+ :issue:`7655` by :user:`Karan Desai `.
+
+- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
+ compute Discounted cumulative gain (DCG) and Normalized discounted
+ cumulative gain (NDCG).
+ :issue:`7739` by :user:`David Gasquez `.
+
+- Added the :class:`model_selection.RepeatedKFold` and
+ :class:`model_selection.RepeatedStratifiedKFold`.
+ :issue:`8120` by `Neeraj Gangwar`_.
+
+Miscellaneous
+
+- Validation that input data contains no NaN or inf can now be suppressed
+ using :func:`config_context`, at your own risk. This will save on runtime,
+ and may be particularly useful for prediction time. :issue:`7548` by
+ `Joel Nothman`_.
+
+- Added a test to ensure parameter listing in docstrings match the
+ function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
+ `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- The ``min_weight_fraction_leaf`` constraint in tree construction is now
+ more efficient, taking a fast path to declare a node a leaf if its weight
+ is less than 2 * the minimum. Note that the constructed tree will be
+ different from previous versions where ``min_weight_fraction_leaf`` is
+ used. :issue:`7441` by :user:`Nelson Liu `.
+
+- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
+ now support sparse input for prediction.
+ :issue:`6101` by :user:`Ibraim Ganiev `.
+
+- :class:`ensemble.VotingClassifier` now allows changing estimators by using
+ :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
+ removed by setting it to ``None``.
+ :issue:`7674` by :user:`Yichuan Liu `.
+
+- :func:`tree.export_graphviz` now shows configurable number of decimal
+ places. :issue:`8698` by :user:`Guillaume Lemaitre `.
+
+- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
+ to change output shape of `transform` method to 2 dimensional.
+ :issue:`7794` by :user:`Ibraim Ganiev ` and
+ :user:`Herilalaina Rakotoarison `.
+
+Linear, kernelized and related models
+
+- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
+ :class:`linear_model.PassiveAggressiveClassifier`,
+ :class:`linear_model.PassiveAggressiveRegressor` and
+ :class:`linear_model.Perceptron` now expose ``max_iter`` and
+ ``tol`` parameters, to handle convergence more precisely.
+ ``n_iter`` parameter is deprecated, and the fitted estimator exposes
+ a ``n_iter_`` attribute, with actual number of iterations before
+ convergence. :issue:`5036` by `Tom Dupre la Tour`_.
+
+- Added ``average`` parameter to perform weight averaging in
+ :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
+ by :user:`Andrea Esuli `.
+
+- :class:`linear_model.RANSACRegressor` no longer throws an error
+ when calling ``fit`` if no inliers are found in its first iteration.
+ Furthermore, causes of skipped iterations are tracked in newly added
+ attributes, ``n_skips_*``.
+ :issue:`7914` by :user:`Michael Horrell `.
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+ is a lot faster with ``return_std=True``. :issue:`8591` by
+ :user:`Hadrien Bertrand