diff --git a/.travis.yml b/.travis.yml
index 2563b54dc6741..d79723c969458 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,7 +38,7 @@ matrix:
     # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
-           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
+           NUMPY_VERSION="1.13" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.2"
            CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
     # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
@@ -49,7 +49,7 @@ matrix:
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"
            DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
-           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
+           NUMPY_VERSION="1.13" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 63c8da5aafeac..b3f785254c2ae 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -109,7 +109,7 @@ conda update --yes --quiet conda
 conda create -n $CONDA_ENV_NAME --yes --quiet python numpy scipy \
   cython nose coverage matplotlib sphinx=1.6.2 pillow
 source activate testenv
-pip install numpydoc
+pip install sphinx-gallery numpydoc
 
 # Build and install scikit-learn in dev mode
 python setup.py develop
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 8cd774d649338..1b0832b19ab9c 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -39,22 +39,30 @@ if [[ "$DISTRIB" == "conda" ]]; then
 
     # Configure the conda environment and put it in the path using the
     # provided versions
+    if [[ "$USE_PYTEST" == "true" ]]; then
+        TEST_RUNNER_PACKAGE=pytest
+    else
+        TEST_RUNNER_PACKAGE=nose
+    fi
+
     if [[ "$INSTALL_MKL" == "true" ]]; then
-        conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
-            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+        conda create -n testenv --yes python=$PYTHON_VERSION pip \
+            $TEST_RUNNER_PACKAGE numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
             mkl cython=$CYTHON_VERSION \
             ${PANDAS_VERSION+pandas=$PANDAS_VERSION}
             
     else
-        conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
-            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+        conda create -n testenv --yes python=$PYTHON_VERSION pip \
+            $TEST_RUNNER_PACKAGE numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
             nomkl cython=$CYTHON_VERSION \
             ${PANDAS_VERSION+pandas=$PANDAS_VERSION}
     fi
     source activate testenv
 
-    # Install nose-timer via pip
-    pip install nose-timer
+    if [[ $USE_PYTEST != "true" ]]; then
+        # Install nose-timer via pip
+        pip install nose-timer
+    fi
 
 elif [[ "$DISTRIB" == "ubuntu" ]]; then
     # At the time of writing numpy 1.9.1 is included in the travis
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index cdcfbe01b3b8b..f7d3ab2a32e0e 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -43,10 +43,13 @@ run_tests() {
     fi
     $TEST_CMD sklearn
 
-    # Test doc (only with nose until we switch completely to pytest)
-    if [[ "$USE_PYTEST" != "true" ]]; then
-        # Going back to git checkout folder needed for make test-doc
-        cd $OLDPWD
+    # Going back to git checkout folder needed to test documentation
+    cd $OLDPWD
+
+    if [[ "$USE_PYTEST" == "true" ]]; then
+        pytest $(find doc -name '*.rst' | sort)
+    else
+        # Makefile is using nose
         make test-doc
     fi
 }
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/doc/README.md b/doc/README.md
index 141db3d7a8da5..82240fb701aa3 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -1,8 +1,13 @@
 # Documentation for scikit-learn
 
 This section contains the full manual and web page as displayed in
-http://scikit-learn.org. To generate the full web page, including
-the example gallery (this might take a while):
+http://scikit-learn.org.
+Building the website requires the sphinx and sphinx-gallery packages:
+
+    pip install sphinx sphinx-gallery
+
+To generate the full web page, including the example gallery (this might take a
+while):
 
     make html
 
@@ -16,7 +21,6 @@ To build the PDF manual, run
 
     make latexpdf
 
-
 The website is hosted at github and can be updated manually (for releases)
 by pushing to the https://github.com/scikit-learn/scikit-learn.github.io repository.
 
diff --git a/doc/datasets/conftest.py b/doc/datasets/conftest.py
new file mode 100644
index 0000000000000..0ccc0bced9ee7
--- /dev/null
+++ b/doc/datasets/conftest.py
@@ -0,0 +1,75 @@
+from os.path import exists
+from os.path import join
+
+import numpy as np
+
+from sklearn.utils.testing import SkipTest
+from sklearn.utils.testing import check_skip_network
+from sklearn.datasets import get_data_home
+from sklearn.utils.testing import install_mldata_mock
+from sklearn.utils.testing import uninstall_mldata_mock
+
+
+def setup_labeled_faces():
+    data_home = get_data_home()
+    if not exists(join(data_home, 'lfw_home')):
+        raise SkipTest("Skipping dataset loading doctests")
+
+
+def setup_mldata():
+    # setup mock urllib2 module to avoid downloading from mldata.org
+    install_mldata_mock({
+        'mnist-original': {
+            'data': np.empty((70000, 784)),
+            'label': np.repeat(np.arange(10, dtype='d'), 7000),
+        },
+        'iris': {
+            'data': np.empty((150, 4)),
+        },
+        'datasets-uci-iris': {
+            'double0': np.empty((150, 4)),
+            'class': np.empty((150,)),
+        },
+    })
+
+
+def teardown_mldata():
+    uninstall_mldata_mock()
+
+
+def setup_rcv1():
+    check_skip_network()
+    # skip the test in rcv1.rst if the dataset is not already loaded
+    rcv1_dir = join(get_data_home(), "RCV1")
+    if not exists(rcv1_dir):
+        raise SkipTest("Download RCV1 dataset to run this test.")
+
+
+def setup_twenty_newsgroups():
+    data_home = get_data_home()
+    if not exists(join(data_home, '20news_home')):
+        raise SkipTest("Skipping dataset loading doctests")
+
+
+def setup_working_with_text_data():
+    check_skip_network()
+
+
+def pytest_runtest_setup(item):
+    fname = item.fspath.strpath
+    if fname.endswith('datasets/labeled_faces.rst'):
+        setup_labeled_faces()
+    elif fname.endswith('datasets/mldata.rst'):
+        setup_mldata()
+    elif fname.endswith('datasets/rcv1.rst'):
+        setup_rcv1()
+    elif fname.endswith('datasets/twenty_newsgroups.rst'):
+        setup_twenty_newsgroups()
+    elif fname.endswith('datasets/working_with_text_data.rst'):
+        setup_working_with_text_data()
+
+
+def pytest_runtest_teardown(item):
+    fname = item.fspath.strpath
+    if fname.endswith('datasets/mldata.rst'):
+        teardown_mldata()
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
index 5083317cffc53..b94dfd7620a24 100644
--- a/doc/datasets/mldata.rst
+++ b/doc/datasets/mldata.rst
@@ -3,6 +3,11 @@
 
     >>> import numpy as np
     >>> import os
+    >>> import tempfile
+    >>> # Create a temporary folder for the data fetcher
+    >>> custom_data_home = tempfile.mkdtemp()
+    >>> os.makedirs(os.path.join(custom_data_home, 'mldata'))
+
 
 .. _mldata:
 
@@ -70,3 +75,8 @@ defaults to individual datasets:
     ...                      data_home=custom_data_home)
     >>> iris3 = fetch_mldata('datasets-UCI iris', target_name='class',
     ...                      data_name='double0', data_home=custom_data_home)
+
+
+..
+    >>> import shutil
+    >>> shutil.rmtree(custom_data_home)
diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py
index 37d9f9af05dc3..0ee5cccaa0f5e 100644
--- a/doc/datasets/mldata_fixture.py
+++ b/doc/datasets/mldata_fixture.py
@@ -3,26 +3,12 @@
 Mock urllib2 access to mldata.org and create a temporary data folder.
 """
 
-from os import makedirs
-from os.path import join
 import numpy as np
-import tempfile
-import shutil
 
-from sklearn import datasets
 from sklearn.utils.testing import install_mldata_mock
 from sklearn.utils.testing import uninstall_mldata_mock
 
 
-def globs(globs):
-    # Create a temporary folder for the data fetcher
-    global custom_data_home
-    custom_data_home = tempfile.mkdtemp()
-    makedirs(join(custom_data_home, 'mldata'))
-    globs['custom_data_home'] = custom_data_home
-    return globs
-
-
 def setup_module():
     # setup mock urllib2 module to avoid downloading from mldata.org
     install_mldata_mock({
@@ -42,4 +28,3 @@ def setup_module():
 
 def teardown_module():
     uninstall_mldata_mock()
-    shutil.rmtree(custom_data_home)
diff --git a/doc/index.rst b/doc/index.rst
index e835de46a660e..ecea32e3229b9 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -207,13 +207,13 @@
                     <li><em>On-going development:</em>
                     <a href="/dev/whats_new.html"><em>What's new</em> (Changelog)</a>
                     </li>
-                    <li><em>September 2016.</em> scikit-learn 0.18.0 is available for download (<a href="whats_new.html#version-0-18">Changelog</a>).
+                    <li><em>September 2016.</em> scikit-learn 0.18.0 is available for download (<a href="whats_new/v0.18.html">Changelog</a>).
                     </li>
-                    <li><em>November 2015.</em> scikit-learn 0.17.0 is available for download (<a href="whats_new.html#version-0-17">Changelog</a>).
+                    <li><em>November 2015.</em> scikit-learn 0.17.0 is available for download (<a href="whats_new/v0.17.html">Changelog</a>).
                     </li>
-                    <li><em>March 2015.</em> scikit-learn 0.16.0 is available for download (<a href="whats_new.html#version-0-16">Changelog</a>).
+                    <li><em>March 2015.</em> scikit-learn 0.16.0 is available for download (<a href="whats_new/v0.16.html">Changelog</a>).
                     </li>
-                    <li><em>July 2014.</em> scikit-learn 0.15.0 is available for download (<a href="whats_new.html#version-0-15">Changelog</a>).
+                    <li><em>July 2014.</em> scikit-learn 0.15.0 is available for download (<a href="whats_new/v0.15.html">Changelog</a>).
                     </li>
                     <li><em>July 14-20th, 2014: international sprint.</em>
                     During this week-long sprint, we gathered 18 of the core
@@ -227,7 +227,7 @@
                     <a href="http://www.inria.fr/">Inria</a>,
                     and <a href="http://www.tinyclues.com/">tinyclues</a>.
                     </li>
-                    <li><em>August 2013.</em> scikit-learn 0.14 is available for download (<a href="whats_new.html#version-0-14">Changelog</a>).
+                    <li><em>August 2013.</em> scikit-learn 0.14 is available for download (<a href="whats_new/v0.14.html">Changelog</a>).
                     </li>
                     </ul>
                 </div>
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index b47726979351f..c68bb7ef275b0 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -273,7 +273,7 @@ validation strategies.
 .. _iid_cv:
 
 Cross-validation iterators for i.i.d. data
-==========================================
+------------------------------------------
 
 Assuming that some data is Independent and Identically Distributed (i.i.d.) is
 making the assumption that all samples stem from the same generative process
@@ -294,7 +294,7 @@ devices) it safer to use :ref:`group-wise cross-validation <group_cv>`.
 
 
 K-fold
-------
+^^^^^^
 
 :class:`KFold` divides all the samples in :math:`k` groups of samples,
 called folds (if :math:`k = n`, this is equivalent to the *Leave One
@@ -323,7 +323,7 @@ Thus, one can create the training/test sets using numpy indexing::
 
 
 Repeated K-Fold
----------------
+^^^^^^^^^^^^^^^
 
 :class:`RepeatedKFold` repeats K-Fold n times. It can be used when one
 requires to run :class:`KFold` n times, producing different splits in
@@ -350,7 +350,7 @@ with different randomization in each repetition.
 
 
 Leave One Out (LOO)
--------------------
+^^^^^^^^^^^^^^^^^^^
 
 :class:`LeaveOneOut` (or LOO) is a simple cross-validation. Each learning
 set is created by taking all the samples except one, the test set being
@@ -408,7 +408,7 @@ fold cross validation should be preferred to LOO.
 
 
 Leave P Out (LPO)
------------------
+^^^^^^^^^^^^^^^^^
 
 :class:`LeavePOut` is very similar to :class:`LeaveOneOut` as it creates all
 the possible training/test sets by removing :math:`p` samples from the complete
@@ -435,7 +435,7 @@ Example of Leave-2-Out on a dataset with 4 samples::
 .. _ShuffleSplit:
 
 Random permutations cross-validation a.k.a. Shuffle & Split
------------------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :class:`ShuffleSplit`
 
@@ -465,7 +465,7 @@ validation that allows a finer control on the number of iterations and
 the proportion of samples on each side of the train / test split.
 
 Cross-validation iterators with stratification based on class labels.
-=====================================================================
+---------------------------------------------------------------------
 
 Some classification problems can exhibit a large imbalance in the distribution
 of the target classes: for instance there could be several times more negative
@@ -475,7 +475,7 @@ stratified sampling as implemented in :class:`StratifiedKFold` and
 approximately preserved in each train and validation fold.
 
 Stratified k-fold
------------------
+^^^^^^^^^^^^^^^^^
 
 :class:`StratifiedKFold` is a variation of *k-fold* which returns *stratified*
 folds: each set contains approximately the same percentage of samples of each
@@ -500,7 +500,7 @@ with different randomization in each repetition.
 
 
 Stratified Shuffle Split
-------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^
 
 :class:`StratifiedShuffleSplit` is a variation of *ShuffleSplit*, which returns
 stratified splits, *i.e* which creates splits by preserving the same
@@ -509,7 +509,7 @@ percentage for each target class as in the complete set.
 .. _group_cv:
 
 Cross-validation iterators for grouped data.
-============================================
+--------------------------------------------
 
 The i.i.d. assumption is broken if the underlying generative process yield
 groups of dependent samples.
@@ -530,7 +530,7 @@ parameter.
 
 
 Group k-fold
-------------
+^^^^^^^^^^^^
 
 :class:`GroupKFold` is a variation of k-fold which ensures that the same group is
 not represented in both testing and training sets. For example if the data is
@@ -560,7 +560,7 @@ size due to the imbalance in the data.
 
 
 Leave One Group Out
--------------------
+^^^^^^^^^^^^^^^^^^^
 
 :class:`LeaveOneGroupOut` is a cross-validation scheme which holds out
 the samples according to a third-party provided array of integer groups. This
@@ -591,7 +591,7 @@ groups could be the year of collection of the samples and thus allow
 for cross-validation against time-based splits.
 
 Leave P Groups Out
-------------------
+^^^^^^^^^^^^^^^^^^
 
 :class:`LeavePGroupsOut` is similar as :class:`LeaveOneGroupOut`, but removes
 samples related to :math:`P` groups for each training/test set.
@@ -611,7 +611,7 @@ Example of Leave-2-Group Out::
   [0 1] [2 3 4 5]
 
 Group Shuffle Split
--------------------
+^^^^^^^^^^^^^^^^^^^
 
 The :class:`GroupShuffleSplit` iterator behaves as a combination of
 :class:`ShuffleSplit` and :class:`LeavePGroupsOut`, and generates a
@@ -643,7 +643,7 @@ generated by :class:`LeavePGroupsOut`.
 
 
 Predefined Fold-Splits / Validation-Sets
-========================================
+----------------------------------------
 
 For some datasets, a pre-defined split of the data into training- and
 validation fold or into several cross-validation folds already
@@ -656,7 +656,7 @@ samples that are part of the validation set, and to -1 for all other samples.
 .. _timeseries_cv:
 
 Cross validation of time series data
-====================================
+------------------------------------
 
 Time series data is characterised by the correlation between observations
 that are near in time (*autocorrelation*). However, classical
@@ -671,7 +671,7 @@ solution is provided by :class:`TimeSeriesSplit`.
 
 
 Time Series Split
------------------
+^^^^^^^^^^^^^^^^^
 
 :class:`TimeSeriesSplit` is a variation of *k-fold* which
 returns first :math:`k` folds as train set and the :math:`(k+1)` th
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 386865d3d0a8a..62d566fe150ba 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -212,13 +212,12 @@ Then ``dual_coef_`` looks like this:
 Scores and probabilities
 ------------------------
 
-The :class:`SVC` method ``decision_function`` gives per-class scores 
-for each sample (or a single score per sample in the binary case).
-When the constructor option ``probability`` is set to ``True``,
-class membership probability estimates
-(from the methods ``predict_proba`` and ``predict_log_proba``) are enabled.
-In the binary case, the probabilities are calibrated using Platt scaling:
-logistic regression on the SVM's scores,
+The ``decision_function`` method of :class:`SVC` and :class:`NuSVC` gives
+per-class scores for each sample (or a single score per sample in the binary
+case). When the constructor option ``probability`` is set to ``True``,
+class membership probability estimates (from the methods ``predict_proba`` and
+``predict_log_proba``) are enabled. In the binary case, the probabilities are
+calibrated using Platt scaling: logistic regression on the SVM's scores,
 fit by an additional cross-validation on the training data.
 In the multiclass case, this is extended as per Wu et al. (2004).
 
@@ -245,7 +244,7 @@ and use ``decision_function`` instead of ``predict_proba``.
  
  * Platt
    `"Probabilistic outputs for SVMs and comparisons to regularized likelihood methods"
-   <http://www.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf>`.
+   <http://www.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf>`_.
 
 Unbalanced problems
 --------------------
@@ -399,7 +398,7 @@ Tips on Practical Use
     function can be configured to be almost the same as the :class:`LinearSVC`
     model.
 
-  * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`nuSVC` and
+  * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and
     :class:`NuSVR`, the size of the kernel cache has a strong impact on run
     times for larger problems.  If you have enough RAM available, it is
     recommended to set ``cache_size`` to a higher value than the default of
@@ -423,10 +422,24 @@ Tips on Practical Use
     positive and few negative), set ``class_weight='balanced'`` and/or try
     different penalty parameters ``C``.
 
-  * The underlying :class:`LinearSVC` implementation uses a random
-    number generator to select features when fitting the model. It is
-    thus not uncommon, to have slightly different results for the same
-    input data. If that happens, try with a smaller tol parameter.
+  * **Randomness of the underlying implementations**: The underlying 
+    implementations of :class:`SVC` and :class:`NuSVC` use a random number
+    generator only to shuffle the data for probability estimation (when
+    ``probability`` is set to ``True``). This randomness can be controlled
+    with the ``random_state`` parameter. If ``probability`` is set to ``False``
+    these estimators are not random and ``random_state`` has no effect on the
+    results. The underlying :class:`OneClassSVM` implementation is similar to
+    the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation
+    is provided for :class:`OneClassSVM`, it is not random.
+
+    The underlying :class:`LinearSVC` implementation uses a random number
+    generator to select features when fitting the model with a dual coordinate
+    descent (i.e when ``dual`` is set to ``True``). It is thus not uncommon,
+    to have slightly different results for the same input data. If that
+    happens, try with a smaller tol parameter. This randomness can also be
+    controlled with the ``random_state`` parameter. When ``dual`` is
+    set to ``False`` the underlying implementation of :class:`LinearSVC` is
+    not random and ``random_state`` has no effect on the results.
 
   * Using L1 penalization as provided by ``LinearSVC(loss='l2', penalty='l1',
     dual=False)`` yields a sparse solution, i.e. only a subset of feature
diff --git a/doc/sphinxext/sphinx_gallery/__init__.py b/doc/sphinxext/sphinx_gallery/__init__.py
deleted file mode 100644
index e113f97d2a2c7..0000000000000
--- a/doc/sphinxext/sphinx_gallery/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Sphinx Gallery
-==============
-
-"""
-import os
-__version__ = '0.1.11'
-
-
-def glr_path_static():
-    """Returns path to packaged static files"""
-    return os.path.abspath(os.path.join(os.path.dirname(__file__), '_static'))
diff --git a/doc/sphinxext/sphinx_gallery/_static/broken_example.png b/doc/sphinxext/sphinx_gallery/_static/broken_example.png
deleted file mode 100644
index 4fea24e7df478..0000000000000
Binary files a/doc/sphinxext/sphinx_gallery/_static/broken_example.png and /dev/null differ
diff --git a/doc/sphinxext/sphinx_gallery/_static/gallery.css b/doc/sphinxext/sphinx_gallery/_static/gallery.css
deleted file mode 100644
index 37047a9b91175..0000000000000
--- a/doc/sphinxext/sphinx_gallery/_static/gallery.css
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
-Sphinx-Gallery has compatible CSS to fix default sphinx themes
-Tested for Sphinx 1.3.1 for all themes: default, alabaster, sphinxdoc,
-scrolls, agogo, traditional, nature, haiku, pyramid
-Tested for Read the Docs theme 0.1.7 */
-.sphx-glr-thumbcontainer {
-  background: #fff;
-  border: solid #fff 1px;
-  -moz-border-radius: 5px;
-  -webkit-border-radius: 5px;
-  border-radius: 5px;
-  box-shadow: none;
-  float: left;
-  margin: 5px;
-  min-height: 230px;
-  padding-top: 5px;
-  position: relative;
-}
-.sphx-glr-thumbcontainer:hover {
-  border: solid #b4ddfc 1px;
-  box-shadow: 0 0 15px rgba(142, 176, 202, 0.5);
-}
-.sphx-glr-thumbcontainer a.internal {
-  bottom: 0;
-  display: block;
-  left: 0;
-  padding: 150px 10px 0;
-  position: absolute;
-  right: 0;
-  top: 0;
-}
-/* Next one is to avoid Sphinx traditional theme to cover all the
-thumbnail with its default link Background color */
-.sphx-glr-thumbcontainer a.internal:hover {
-  background-color: transparent;
-}
-
-.sphx-glr-thumbcontainer p {
-  margin: 0 0 .1em 0;
-}
-.sphx-glr-thumbcontainer .figure {
-  margin: 10px;
-  width: 160px;
-}
-.sphx-glr-thumbcontainer img {
-  display: inline;
-  max-height: 160px;
-  width: 160px;
-}
-.sphx-glr-thumbcontainer[tooltip]:hover:after {
-  background: rgba(0, 0, 0, 0.8);
-  -webkit-border-radius: 5px;
-  -moz-border-radius: 5px;
-  border-radius: 5px;
-  color: #fff;
-  content: attr(tooltip);
-  left: 95%;
-  padding: 5px 15px;
-  position: absolute;
-  z-index: 98;
-  width: 220px;
-  bottom: 52%;
-}
-.sphx-glr-thumbcontainer[tooltip]:hover:before {
-  border: solid;
-  border-color: #333 transparent;
-  border-width: 18px 0 0 20px;
-  bottom: 58%;
-  content: '';
-  left: 85%;
-  position: absolute;
-  z-index: 99;
-}
-
-.highlight-pytb pre {
-  background-color: #ffe4e4;
-  border: 1px solid #f66;
-  margin-top: 10px;
-  padding: 7px;
-}
-
-.sphx-glr-script-out {
-  color: #888;
-  margin: 0;
-}
-.sphx-glr-script-out .highlight {
-  background-color: transparent;
-  margin-left: 2.5em;
-  margin-top: -1.4em;
-}
-.sphx-glr-script-out .highlight pre {
-  background-color: #fafae2;
-  border: 0;
-  max-height: 30em;
-  overflow: auto;
-  padding-left: 1ex;
-  margin: 0px;
-  word-break: break-word;
-}
-.sphx-glr-script-out + p {
-  margin-top: 1.8em;
-}
-blockquote.sphx-glr-script-out {
-  margin-left: 0pt;
-}
-
-div.sphx-glr-footer {
-    text-align: center;
-}
-
-div.sphx-glr-download {
-  display: inline-block;
-  margin: 1em auto 1ex 2ex;
-  vertical-align: middle;
-}
-
-div.sphx-glr-download a {
-  background-color: #ffc;
-  background-image: linear-gradient(to bottom, #FFC, #d5d57e);
-  border-radius: 4px;
-  border: 1px solid #c2c22d;
-  color: #000;
-  display: inline-block;
-  /* Not valid in old browser, hence we keep the line above to override */
-  display: table-caption;
-  font-weight: bold;
-  padding: 1ex;
-  text-align: center;
-}
-
-/* The last child of a download button is the file name */
-div.sphx-glr-download a span:last-child {
-    font-size: smaller;
-}
-
-@media (min-width: 20em) {
-    div.sphx-glr-download a {
-	min-width: 10em;
-    }
-}
-
-@media (min-width: 30em) {
-    div.sphx-glr-download a {
-	min-width: 13em;
-    }
-}
-
-@media (min-width: 40em) {
-    div.sphx-glr-download a {
-	min-width: 16em;
-    }
-}
-
-
-div.sphx-glr-download code.download {
-  display: inline-block;
-  white-space: normal;
-  word-break: normal;
-  overflow-wrap: break-word;
-  /* border and background are given by the enclosing 'a' */
-  border: none;
-  background: none;
-}
-
-div.sphx-glr-download a:hover {
-  box-shadow: inset 0 1px 0 rgba(255,255,255,.1), 0 1px 5px rgba(0,0,0,.25);
-  text-decoration: none;
-  background-image: none;
-  background-color: #d5d57e;
-}
-
-ul.sphx-glr-horizontal {
-  list-style: none;
-  padding: 0;
-}
-ul.sphx-glr-horizontal li {
-  display: inline;
-}
-ul.sphx-glr-horizontal img {
-  height: auto !important;
-}
-
-p.sphx-glr-signature a.reference.external {
-  -moz-border-radius: 5px;
-  -webkit-border-radius: 5px;
-  border-radius: 5px;
-  padding: 3px;
-  font-size: 75%;
-  text-align: right;
-  margin-left: auto;
-  display: table;
-}
diff --git a/doc/sphinxext/sphinx_gallery/_static/no_image.png b/doc/sphinxext/sphinx_gallery/_static/no_image.png
deleted file mode 100644
index 8c2d48d5d3f00..0000000000000
Binary files a/doc/sphinxext/sphinx_gallery/_static/no_image.png and /dev/null differ
diff --git a/doc/sphinxext/sphinx_gallery/backreferences.py b/doc/sphinxext/sphinx_gallery/backreferences.py
deleted file mode 100644
index 32e4dd913f901..0000000000000
--- a/doc/sphinxext/sphinx_gallery/backreferences.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Backreferences Generator
-========================
-
-Parses example file code in order to keep track of used functions
-"""
-
-from __future__ import print_function
-import ast
-import os
-
-
-# Try Python 2 first, otherwise load from Python 3
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-
-class NameFinder(ast.NodeVisitor):
-    """Finds the longest form of variable names and their imports in code
-
-    Only retains names from imported modules.
-    """
-
-    def __init__(self):
-        super(NameFinder, self).__init__()
-        self.imported_names = {}
-        self.accessed_names = set()
-
-    def visit_Import(self, node, prefix=''):
-        for alias in node.names:
-            local_name = alias.asname or alias.name
-            self.imported_names[local_name] = prefix + alias.name
-
-    def visit_ImportFrom(self, node):
-        self.visit_Import(node, node.module + '.')
-
-    def visit_Name(self, node):
-        self.accessed_names.add(node.id)
-
-    def visit_Attribute(self, node):
-        attrs = []
-        while isinstance(node, ast.Attribute):
-            attrs.append(node.attr)
-            node = node.value
-
-        if isinstance(node, ast.Name):
-            # This is a.b, not e.g. a().b
-            attrs.append(node.id)
-            self.accessed_names.add('.'.join(reversed(attrs)))
-        else:
-            # need to get a in a().b
-            self.visit(node)
-
-    def get_mapping(self):
-        for name in self.accessed_names:
-            local_name = name.split('.', 1)[0]
-            remainder = name[len(local_name):]
-            if local_name in self.imported_names:
-                # Join import path to relative path
-                full_name = self.imported_names[local_name] + remainder
-                yield name, full_name
-
-
-def get_short_module_name(module_name, obj_name):
-    """ Get the shortest possible module name """
-    parts = module_name.split('.')
-    short_name = module_name
-    for i in range(len(parts) - 1, 0, -1):
-        short_name = '.'.join(parts[:i])
-        try:
-            exec('from %s import %s' % (short_name, obj_name))
-        except Exception:  # libraries can throw all sorts of exceptions...
-            # get the last working module name
-            short_name = '.'.join(parts[:(i + 1)])
-            break
-    return short_name
-
-
-def identify_names(code):
-    """Builds a codeobj summary by identifying and resolving used names
-
-    >>> code = '''
-    ... from a.b import c
-    ... import d as e
-    ... print(c)
-    ... e.HelloWorld().f.g
-    ... '''
-    >>> for name, o in sorted(identify_names(code).items()):
-    ...     print(name, o['name'], o['module'], o['module_short'])
-    c c a.b a.b
-    e.HelloWorld HelloWorld d d
-    """
-    finder = NameFinder()
-    try:
-        finder.visit(ast.parse(code))
-    except SyntaxError:
-        return {}
-
-    example_code_obj = {}
-    for name, full_name in finder.get_mapping():
-        # name is as written in file (e.g. np.asarray)
-        # full_name includes resolved import path (e.g. numpy.asarray)
-        splitted = full_name.rsplit('.', 1)
-        if len(splitted) == 1:
-            # module without attribute. This is not useful for
-            # backreferences
-            continue
-
-        module, attribute = splitted
-        # get shortened module name
-        module_short = get_short_module_name(module, attribute)
-        cobj = {'name': attribute, 'module': module,
-                'module_short': module_short}
-        example_code_obj[name] = cobj
-    return example_code_obj
-
-
-def scan_used_functions(example_file, gallery_conf):
-    """save variables so we can later add links to the documentation"""
-    example_code_obj = identify_names(open(example_file).read())
-    if example_code_obj:
-        codeobj_fname = example_file[:-3] + '_codeobj.pickle'
-        with open(codeobj_fname, 'wb') as fid:
-            pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)
-
-    backrefs = set('{module_short}.{name}'.format(**entry)
-                   for entry in example_code_obj.values()
-                   if entry['module'].startswith(gallery_conf['doc_module']))
-
-    return backrefs
-
-
-THUMBNAIL_TEMPLATE = """
-.. raw:: html
-
-    <div class="sphx-glr-thumbcontainer" tooltip="{snippet}">
-
-.. only:: html
-
-    .. figure:: /{thumbnail}
-
-        :ref:`sphx_glr_{ref_name}`
-
-.. raw:: html
-
-    </div>
-"""
-
-BACKREF_THUMBNAIL_TEMPLATE = THUMBNAIL_TEMPLATE + """
-.. only:: not html
-
-    * :ref:`sphx_glr_{ref_name}`
-"""
-
-
-def _thumbnail_div(full_dir, fname, snippet, is_backref=False):
-    """Generates RST to place a thumbnail in a gallery"""
-    thumb = os.path.join(full_dir, 'images', 'thumb',
-                         'sphx_glr_%s_thumb.png' % fname[:-3])
-
-    # Inside rst files forward slash defines paths
-    thumb = thumb.replace(os.sep, "/")
-
-    ref_name = os.path.join(full_dir, fname).replace(os.path.sep, '_')
-
-    template = BACKREF_THUMBNAIL_TEMPLATE if is_backref else THUMBNAIL_TEMPLATE
-    return template.format(snippet=snippet, thumbnail=thumb, ref_name=ref_name)
-
-
-def write_backreferences(seen_backrefs, gallery_conf,
-                         target_dir, fname, snippet):
-    """Writes down back reference files, which include a thumbnail list
-    of examples using a certain module"""
-    if gallery_conf['backreferences_dir'] is None:
-        return
-
-    example_file = os.path.join(target_dir, fname)
-    build_target_dir = os.path.relpath(target_dir, gallery_conf['src_dir'])
-    backrefs = scan_used_functions(example_file, gallery_conf)
-    for backref in backrefs:
-        include_path = os.path.join(gallery_conf['src_dir'],
-                                    gallery_conf['backreferences_dir'],
-                                    '%s.examples' % backref)
-        seen = backref in seen_backrefs
-        with open(include_path, 'a' if seen else 'w') as ex_file:
-            if not seen:
-                heading = '\n\nExamples using ``%s``' % backref
-                ex_file.write(heading + '\n')
-                ex_file.write('^' * len(heading) + '\n')
-            ex_file.write(_thumbnail_div(build_target_dir, fname, snippet,
-                                         is_backref=True))
-            seen_backrefs.add(backref)
diff --git a/doc/sphinxext/sphinx_gallery/docs_resolv.py b/doc/sphinxext/sphinx_gallery/docs_resolv.py
deleted file mode 100644
index 0f9943b683d1c..0000000000000
--- a/doc/sphinxext/sphinx_gallery/docs_resolv.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Link resolver objects
-=====================
-"""
-from __future__ import print_function
-import gzip
-import os
-import posixpath
-import re
-import shelve
-import sys
-
-from sphinx.util.console import fuchsia
-
-# Try Python 2 first, otherwise load from Python 3
-try:
-    import cPickle as pickle
-    import urllib2 as urllib
-    from urllib2 import HTTPError, URLError
-except ImportError:
-    import pickle
-    import urllib.request
-    import urllib.error
-    import urllib.parse
-    from urllib.error import HTTPError, URLError
-
-from io import StringIO
-
-
-def _get_data(url):
-    """Helper function to get data over http or from a local file"""
-    if url.startswith('http://'):
-        # Try Python 2, use Python 3 on exception
-        try:
-            resp = urllib.urlopen(url)
-            encoding = resp.headers.dict.get('content-encoding', 'plain')
-        except AttributeError:
-            resp = urllib.request.urlopen(url)
-            encoding = resp.headers.get('content-encoding', 'plain')
-        data = resp.read()
-        if encoding == 'plain':
-            pass
-        elif encoding == 'gzip':
-            data = StringIO(data)
-            data = gzip.GzipFile(fileobj=data).read()
-        else:
-            raise RuntimeError('unknown encoding')
-    else:
-        with open(url, 'r') as fid:
-            data = fid.read()
-
-    return data
-
-
-def get_data(url, gallery_dir):
-    """Persistent dictionary usage to retrieve the search indexes"""
-
-    # shelve keys need to be str in python 2
-    if sys.version_info[0] == 2 and isinstance(url, unicode):
-        url = url.encode('utf-8')
-
-    cached_file = os.path.join(gallery_dir, 'searchindex')
-    search_index = shelve.open(cached_file)
-    if url in search_index:
-        data = search_index[url]
-    else:
-        data = _get_data(url)
-        search_index[url] = data
-    search_index.close()
-
-    return data
-
-
-def _select_block(str_in, start_tag, end_tag):
-    """Select first block delimited by start_tag and end_tag"""
-    start_pos = str_in.find(start_tag)
-    if start_pos < 0:
-        raise ValueError('start_tag not found')
-    depth = 0
-    for pos in range(start_pos, len(str_in)):
-        if str_in[pos] == start_tag:
-            depth += 1
-        elif str_in[pos] == end_tag:
-            depth -= 1
-
-        if depth == 0:
-            break
-    sel = str_in[start_pos + 1:pos]
-    return sel
-
-
-def _parse_dict_recursive(dict_str):
-    """Parse a dictionary from the search index"""
-    dict_out = dict()
-    pos_last = 0
-    pos = dict_str.find(':')
-    while pos >= 0:
-        key = dict_str[pos_last:pos]
-        if dict_str[pos + 1] == '[':
-            # value is a list
-            pos_tmp = dict_str.find(']', pos + 1)
-            if pos_tmp < 0:
-                raise RuntimeError('error when parsing dict')
-            value = dict_str[pos + 2: pos_tmp].split(',')
-            # try to convert elements to int
-            for i in range(len(value)):
-                try:
-                    value[i] = int(value[i])
-                except ValueError:
-                    pass
-        elif dict_str[pos + 1] == '{':
-            # value is another dictionary
-            subdict_str = _select_block(dict_str[pos:], '{', '}')
-            value = _parse_dict_recursive(subdict_str)
-            pos_tmp = pos + len(subdict_str)
-        else:
-            raise ValueError('error when parsing dict: unknown elem')
-
-        key = key.strip('"')
-        if len(key) > 0:
-            dict_out[key] = value
-
-        pos_last = dict_str.find(',', pos_tmp)
-        if pos_last < 0:
-            break
-        pos_last += 1
-        pos = dict_str.find(':', pos_last)
-
-    return dict_out
-
-
-def parse_sphinx_searchindex(searchindex):
-    """Parse a Sphinx search index
-
-    Parameters
-    ----------
-    searchindex : str
-        The Sphinx search index (contents of searchindex.js)
-
-    Returns
-    -------
-    filenames : list of str
-        The file names parsed from the search index.
-    objects : dict
-        The objects parsed from the search index.
-    """
-    # Make sure searchindex uses UTF-8 encoding
-    if hasattr(searchindex, 'decode'):
-        searchindex = searchindex.decode('UTF-8')
-
-    # parse objects
-    query = 'objects:'
-    pos = searchindex.find(query)
-    if pos < 0:
-        raise ValueError('"objects:" not found in search index')
-
-    sel = _select_block(searchindex[pos:], '{', '}')
-    objects = _parse_dict_recursive(sel)
-
-    # parse filenames
-    query = 'filenames:'
-    pos = searchindex.find(query)
-    if pos < 0:
-        raise ValueError('"filenames:" not found in search index')
-    filenames = searchindex[pos + len(query) + 1:]
-    filenames = filenames[:filenames.find(']')]
-    filenames = [f.strip('"') for f in filenames.split(',')]
-
-    return filenames, objects
-
-
-class SphinxDocLinkResolver(object):
-    """ Resolve documentation links using searchindex.js generated by Sphinx
-
-    Parameters
-    ----------
-    doc_url : str
-        The base URL of the project website.
-    searchindex : str
-        Filename of searchindex, relative to doc_url.
-    extra_modules_test : list of str
-        List of extra module names to test.
-    relative : bool
-        Return relative links (only useful for links to documentation of this
-        package).
-    """
-
-    def __init__(self, doc_url, gallery_dir, searchindex='searchindex.js',
-                 extra_modules_test=None, relative=False):
-        self.doc_url = doc_url
-        self.gallery_dir = gallery_dir
-        self.relative = relative
-        self._link_cache = {}
-
-        self.extra_modules_test = extra_modules_test
-        self._page_cache = {}
-        if doc_url.startswith('http://'):
-            if relative:
-                raise ValueError('Relative links are only supported for local '
-                                 'URLs (doc_url cannot start with "http://)"')
-            searchindex_url = doc_url + '/' + searchindex
-        else:
-            searchindex_url = os.path.join(doc_url, searchindex)
-
-        # detect if we are using relative links on a Windows system
-        if os.name.lower() == 'nt' and not doc_url.startswith('http://'):
-            if not relative:
-                raise ValueError('You have to use relative=True for the local'
-                                 ' package on a Windows system.')
-            self._is_windows = True
-        else:
-            self._is_windows = False
-
-        # download and initialize the search index
-        sindex = get_data(searchindex_url, gallery_dir)
-        filenames, objects = parse_sphinx_searchindex(sindex)
-
-        self._searchindex = dict(filenames=filenames, objects=objects)
-
-    def _get_link(self, cobj):
-        """Get a valid link, False if not found"""
-
-        fname_idx = None
-        full_name = cobj['module_short'] + '.' + cobj['name']
-        if full_name in self._searchindex['objects']:
-            value = self._searchindex['objects'][full_name]
-            if isinstance(value, dict):
-                value = value[next(iter(value.keys()))]
-            fname_idx = value[0]
-        elif cobj['module_short'] in self._searchindex['objects']:
-            value = self._searchindex['objects'][cobj['module_short']]
-            if cobj['name'] in value.keys():
-                fname_idx = value[cobj['name']][0]
-
-        if fname_idx is not None:
-            fname = self._searchindex['filenames'][fname_idx]
-            # In 1.5+ Sphinx seems to have changed from .rst.html to only
-            # .html extension in converted files. But URLs could be
-            # built with < 1.5 or >= 1.5 regardless of what we're currently
-            # building with, so let's just check both :(
-            fnames = [fname + '.html', os.path.splitext(fname)[0] + '.html']
-            for fname in fnames:
-                try:
-                    if self._is_windows:
-                        fname = fname.replace('/', '\\')
-                        link = os.path.join(self.doc_url, fname)
-                    else:
-                        link = posixpath.join(self.doc_url, fname)
-
-                    if hasattr(link, 'decode'):
-                        link = link.decode('utf-8', 'replace')
-
-                    if link in self._page_cache:
-                        html = self._page_cache[link]
-                    else:
-                        html = get_data(link, self.gallery_dir)
-                        self._page_cache[link] = html
-                except (HTTPError, URLError, IOError):
-                    pass
-                else:
-                    break
-            else:
-                raise
-
-            # test if cobj appears in page
-            comb_names = [cobj['module_short'] + '.' + cobj['name']]
-            if self.extra_modules_test is not None:
-                for mod in self.extra_modules_test:
-                    comb_names.append(mod + '.' + cobj['name'])
-            url = False
-            if hasattr(html, 'decode'):
-                # Decode bytes under Python 3
-                html = html.decode('utf-8', 'replace')
-
-            for comb_name in comb_names:
-                if hasattr(comb_name, 'decode'):
-                    # Decode bytes under Python 3
-                    comb_name = comb_name.decode('utf-8', 'replace')
-                if comb_name in html:
-                    url = link + u'#' + comb_name
-            link = url
-        else:
-            link = False
-
-        return link
-
-    def resolve(self, cobj, this_url):
-        """Resolve the link to the documentation, returns None if not found
-
-        Parameters
-        ----------
-        cobj : dict
-            Dict with information about the "code object" for which we are
-            resolving a link.
-            cobj['name'] : function or class name (str)
-            cobj['module_short'] : shortened module name (str)
-            cobj['module'] : module name (str)
-        this_url: str
-            URL of the current page. Needed to construct relative URLs
-            (only used if relative=True in constructor).
-
-        Returns
-        -------
-        link : str | None
-            The link (URL) to the documentation.
-        """
-        full_name = cobj['module_short'] + '.' + cobj['name']
-        link = self._link_cache.get(full_name, None)
-        if link is None:
-            # we don't have it cached
-            link = self._get_link(cobj)
-            # cache it for the future
-            self._link_cache[full_name] = link
-
-        if link is False or link is None:
-            # failed to resolve
-            return None
-
-        if self.relative:
-            link = os.path.relpath(link, start=this_url)
-            if self._is_windows:
-                # replace '\' with '/' so it on the web
-                link = link.replace('\\', '/')
-
-            # for some reason, the relative link goes one directory too high up
-            link = link[3:]
-
-        return link
-
-
-def _embed_code_links(app, gallery_conf, gallery_dir):
-    # Add resolvers for the packages for which we want to show links
-    doc_resolvers = {}
-
-    src_gallery_dir = os.path.join(app.builder.srcdir, gallery_dir)
-    for this_module, url in gallery_conf['reference_url'].items():
-        try:
-            if url is None:
-                doc_resolvers[this_module] = SphinxDocLinkResolver(
-                    app.builder.outdir,
-                    src_gallery_dir,
-                    relative=True)
-            else:
-                doc_resolvers[this_module] = SphinxDocLinkResolver(url,
-                                                                   src_gallery_dir)
-
-        except HTTPError as e:
-            print("The following HTTP Error has occurred:\n")
-            print(e.code)
-        except URLError as e:
-            print("\n...\n"
-                  "Warning: Embedding the documentation hyperlinks requires "
-                  "Internet access.\nPlease check your network connection.\n"
-                  "Unable to continue embedding `{0}` links due to a URL "
-                  "Error:\n".format(this_module))
-            print(e.args)
-
-    html_gallery_dir = os.path.abspath(os.path.join(app.builder.outdir,
-                                                    gallery_dir))
-
-    # patterns for replacement
-    link_pattern = ('<a href="%s" title="View documentation for %s">%s</a>')
-    orig_pattern = '<span class="n">%s</span>'
-    period = '<span class="o">.</span>'
-
-    # This could be turned into a generator if necessary, but should be okay
-    flat = [[dirpath, filename]
-            for dirpath, _, filenames in os.walk(html_gallery_dir)
-            for filename in filenames]
-    iterator = app.status_iterator(
-        flat, os.path.basename(html_gallery_dir), colorfunc=fuchsia,
-        length=len(flat), stringify_func=lambda x: os.path.basename(x[1]))
-    for dirpath, fname in iterator:
-        full_fname = os.path.join(html_gallery_dir, dirpath, fname)
-        subpath = dirpath[len(html_gallery_dir) + 1:]
-        pickle_fname = os.path.join(src_gallery_dir, subpath,
-                                    fname[:-5] + '_codeobj.pickle')
-
-        if os.path.exists(pickle_fname):
-            # we have a pickle file with the objects to embed links for
-            with open(pickle_fname, 'rb') as fid:
-                example_code_obj = pickle.load(fid)
-            fid.close()
-            str_repl = {}
-            # generate replacement strings with the links
-            for name, cobj in example_code_obj.items():
-                this_module = cobj['module'].split('.')[0]
-
-                if this_module not in doc_resolvers:
-                    continue
-
-                try:
-                    link = doc_resolvers[this_module].resolve(cobj,
-                                                              full_fname)
-                except (HTTPError, URLError) as e:
-                    if isinstance(e, HTTPError):
-                        extra = e.code
-                    else:
-                        extra = e.reason
-                    print("\n\t\tError resolving %s.%s: %r (%s)"
-                          % (cobj['module'], cobj['name'], e, extra))
-                    continue
-
-                if link is not None:
-                    parts = name.split('.')
-                    name_html = period.join(orig_pattern % part
-                                            for part in parts)
-                    full_function_name = '%s.%s' % (
-                        cobj['module'], cobj['name'])
-                    str_repl[name_html] = link_pattern % (
-                        link, full_function_name, name_html)
-            # do the replacement in the html file
-
-            # ensure greediness
-            names = sorted(str_repl, key=len, reverse=True)
-            regex_str = '|'.join(re.escape(name) for name in names)
-            regex = re.compile(regex_str)
-
-            def substitute_link(match):
-                return str_repl[match.group()]
-
-            if len(str_repl) > 0:
-                with open(full_fname, 'rb') as fid:
-                    lines_in = fid.readlines()
-                with open(full_fname, 'wb') as fid:
-                    for line in lines_in:
-                        line = line.decode('utf-8')
-                        line = regex.sub(substitute_link, line)
-                        fid.write(line.encode('utf-8'))
-
-
-def embed_code_links(app, exception):
-    """Embed hyperlinks to documentation into example code"""
-    if exception is not None:
-        return
-
-    # No need to waste time embedding hyperlinks when not running the examples
-    # XXX: also at the time of writing this fixes make html-noplot
-    # for some reason I don't fully understand
-    if not app.builder.config.plot_gallery:
-        return
-
-    # XXX: Whitelist of builders for which it makes sense to embed
-    # hyperlinks inside the example html. Note that the link embedding
-    # require searchindex.js to exist for the links to the local doc
-    # and there does not seem to be a good way of knowing which
-    # builders creates a searchindex.js.
-    if app.builder.name not in ['html', 'readthedocs']:
-        return
-
-    print('Embedding documentation hyperlinks in examples..')
-
-    gallery_conf = app.config.sphinx_gallery_conf
-
-    gallery_dirs = gallery_conf['gallery_dirs']
-    if not isinstance(gallery_dirs, list):
-        gallery_dirs = [gallery_dirs]
-
-    for gallery_dir in gallery_dirs:
-        _embed_code_links(app, gallery_conf, gallery_dir)
diff --git a/doc/sphinxext/sphinx_gallery/downloads.py b/doc/sphinxext/sphinx_gallery/downloads.py
deleted file mode 100644
index 6b5b3df17fc87..0000000000000
--- a/doc/sphinxext/sphinx_gallery/downloads.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Utilities for downloadable items
-================================
-
-"""
-# Author: Óscar Nájera
-# License: 3-clause BSD
-
-from __future__ import absolute_import, division, print_function
-
-import os
-import zipfile
-
-CODE_DOWNLOAD = """
-\n.. container:: sphx-glr-footer
-
-\n  .. container:: sphx-glr-download
-
-     :download:`Download Python source code: {0} <{0}>`\n
-
-\n  .. container:: sphx-glr-download
-
-     :download:`Download Jupyter notebook: {1} <{1}>`\n"""
-
-CODE_ZIP_DOWNLOAD = """
-\n.. container:: sphx-glr-footer
-
-\n  .. container:: sphx-glr-download
-
-    :download:`Download all examples in Python source code: {0} </{1}>`\n
-
-\n  .. container:: sphx-glr-download
-
-    :download:`Download all examples in Jupyter notebooks: {2} </{3}>`\n"""
-
-
-def python_zip(file_list, gallery_path, extension='.py'):
-    """Stores all files in file_list into an zip file
-
-    Parameters
-    ----------
-    file_list : list of strings
-        Holds all the file names to be included in zip file
-    gallery_path : string
-        path to where the zipfile is stored
-    extension : str
-        '.py' or '.ipynb' In order to deal with downloads of python
-        sources and jupyter notebooks the file extension from files in
-        file_list will be removed and replace with the value of this
-        variable while generating the zip file
-    Returns
-    -------
-    zipname : string
-        zip file name, written as `target_dir_{python,jupyter}.zip`
-        depending on the extension
-    """
-    zipname = os.path.basename(gallery_path)
-    zipname += '_python' if extension == '.py' else '_jupyter'
-    zipname = os.path.join(gallery_path, zipname + '.zip')
-
-    zipf = zipfile.ZipFile(zipname, mode='w')
-    for fname in file_list:
-        file_src = os.path.splitext(fname)[0] + extension
-        zipf.write(file_src, os.path.relpath(file_src, gallery_path))
-    zipf.close()
-
-    return zipname
-
-
-def list_downloadable_sources(target_dir):
-    """Returns a list of python source files is target_dir
-
-    Parameters
-    ----------
-    target_dir : string
-        path to the directory where python source file are
-    Returns
-    -------
-    list
-        list of paths to all Python source files in `target_dir`
-    """
-    return [os.path.join(target_dir, fname)
-            for fname in os.listdir(target_dir)
-            if fname.endswith('.py')]
-
-
-def generate_zipfiles(gallery_dir):
-    """
-    Collects all Python source files and Jupyter notebooks in
-    gallery_dir and makes zipfiles of them
-
-    Parameters
-    ----------
-    gallery_dir : string
-        path of the gallery to collect downloadable sources
-
-    Return
-    ------
-    download_rst: string
-        RestructuredText to include download buttons to the generated files
-    """
-
-    listdir = list_downloadable_sources(gallery_dir)
-    for directory in sorted(os.listdir(gallery_dir)):
-        if os.path.isdir(os.path.join(gallery_dir, directory)):
-            target_dir = os.path.join(gallery_dir, directory)
-            listdir.extend(list_downloadable_sources(target_dir))
-
-    py_zipfile = python_zip(listdir, gallery_dir)
-    jy_zipfile = python_zip(listdir, gallery_dir, ".ipynb")
-
-    def rst_path(filepath):
-        return filepath.replace(os.sep, '/')
-
-    dw_rst = CODE_ZIP_DOWNLOAD.format(os.path.basename(py_zipfile),
-                                      rst_path(py_zipfile),
-                                      os.path.basename(jy_zipfile),
-                                      rst_path(jy_zipfile))
-    return dw_rst
diff --git a/doc/sphinxext/sphinx_gallery/gen_gallery.py b/doc/sphinxext/sphinx_gallery/gen_gallery.py
deleted file mode 100644
index 1a1ce299fab1c..0000000000000
--- a/doc/sphinxext/sphinx_gallery/gen_gallery.py
+++ /dev/null
@@ -1,304 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-Sphinx-Gallery Generator
-========================
-
-Attaches Sphinx-Gallery to Sphinx in order to generate the galleries
-when building the documentation.
-"""
-
-
-from __future__ import division, print_function, absolute_import
-import copy
-import re
-import os
-
-from . import glr_path_static
-from .gen_rst import generate_dir_rst, SPHX_GLR_SIG
-from .docs_resolv import embed_code_links
-from .downloads import generate_zipfiles
-
-try:
-    FileNotFoundError
-except NameError:
-    # Python2
-    FileNotFoundError = IOError
-
-DEFAULT_GALLERY_CONF = {
-    'filename_pattern': re.escape(os.sep) + 'plot',
-    'examples_dirs': os.path.join('..', 'examples'),
-    'gallery_dirs': 'auto_examples',
-    'backreferences_dir': None,
-    'doc_module': (),
-    'reference_url': {},
-    # build options
-    'plot_gallery': True,
-    'download_all_examples': True,
-    'abort_on_example_error': False,
-    'failing_examples': {},
-    'expected_failing_examples': set(),
-}
-
-
-def clean_gallery_out(build_dir):
-    """Deletes images under the sphx_glr namespace in the build directory"""
-    # Sphinx hack: sphinx copies generated images to the build directory
-    #  each time the docs are made.  If the desired image name already
-    #  exists, it appends a digit to prevent overwrites.  The problem is,
-    #  the directory is never cleared.  This means that each time you build
-    #  the docs, the number of images in the directory grows.
-    #
-    # This question has been asked on the sphinx development list, but there
-    #  was no response: http://osdir.com/ml/sphinx-dev/2011-02/msg00123.html
-    #
-    # The following is a hack that prevents this behavior by clearing the
-    #  image build directory from gallery images each time the docs are built.
-    #  If sphinx changes their layout between versions, this will not
-    #  work (though it should probably not cause a crash).
-    # Tested successfully on Sphinx 1.0.7
-
-    build_image_dir = os.path.join(build_dir, '_images')
-    if os.path.exists(build_image_dir):
-        filelist = os.listdir(build_image_dir)
-        for filename in filelist:
-            if filename.startswith('sphx_glr') and filename.endswith('png'):
-                os.remove(os.path.join(build_image_dir, filename))
-
-
-def parse_config(app):
-    """Process the Sphinx Gallery configuration"""
-    # TODO: Test this behavior.
-    try:
-        plot_gallery = eval(app.builder.config.plot_gallery)
-    except TypeError:
-        plot_gallery = bool(app.builder.config.plot_gallery)
-
-    gallery_conf = copy.deepcopy(DEFAULT_GALLERY_CONF)
-    gallery_conf.update(app.config.sphinx_gallery_conf)
-    gallery_conf.update(plot_gallery=plot_gallery)
-    gallery_conf.update(
-        abort_on_example_error=app.builder.config.abort_on_example_error)
-    gallery_conf['src_dir'] = app.builder.srcdir
-
-    backreferences_warning = """\n========
-Sphinx-Gallery now requires you to set the configuration variable
-'backreferences_dir' in your config to activate the
-backreferences. That is mini galleries clustered by the functions used
-in the example scripts. Have a look at it in sphinx-gallery
-
-https://sphinx-gallery.readthedocs.io/en/stable/index.html#examples-using-numpy-linspace
-"""
-
-    if gallery_conf.get("mod_example_dir", False):
-        update_msg = """\nFor a quick fix try replacing 'mod_example_dir'
-by 'backreferences_dir' in your conf.py file. If that does not solve the
-present issue read carefully how to update in the online documentation
-
-https://sphinx-gallery.readthedocs.io/en/latest/advanced_configuration.html#references-to-examples"""
-
-        gallery_conf['backreferences_dir'] = gallery_conf['mod_example_dir']
-        app.warn("Old configuration for backreferences detected \n"
-                 "using the configuration variable `mod_example_dir`\n"
-                 + backreferences_warning
-                 + update_msg, prefix="DeprecationWarning: ")
-
-    elif gallery_conf['backreferences_dir'] is None:
-        no_care_msg = """
-If you don't care about this features set in your conf.py
-'backreferences_dir': False\n"""
-
-        app.warn(backreferences_warning + no_care_msg)
-
-        gallery_conf['backreferences_dir'] = os.path.join(
-            'modules', 'generated')
-        app.warn("using old default 'backreferences_dir':'{}'.\n"
-                 " This will be disabled in future releases\n".format(
-                     gallery_conf['backreferences_dir']),
-                 prefix="DeprecationWarning: ")
-
-    # this assures I can call the config in other places
-    app.config.sphinx_gallery_conf = gallery_conf
-    app.config.html_static_path.append(glr_path_static())
-
-    return gallery_conf
-
-
-def _prepare_sphx_glr_dirs(gallery_conf, srcdir):
-    """Creates necessary folders for sphinx_gallery files """
-    examples_dirs = gallery_conf['examples_dirs']
-    gallery_dirs = gallery_conf['gallery_dirs']
-
-    if not isinstance(examples_dirs, list):
-        examples_dirs = [examples_dirs]
-    if not isinstance(gallery_dirs, list):
-        gallery_dirs = [gallery_dirs]
-
-    if bool(gallery_conf['backreferences_dir']):
-        backreferences_dir = os.path.join(
-            srcdir, gallery_conf['backreferences_dir'])
-        if not os.path.exists(backreferences_dir):
-            os.makedirs(backreferences_dir)
-
-    return examples_dirs, gallery_dirs
-
-
-def generate_gallery_rst(app):
-    """Generate the Main examples gallery reStructuredText
-
-    Start the sphinx-gallery configuration and recursively scan the examples
-    directories in order to populate the examples gallery
-    """
-    print('Generating gallery')
-    gallery_conf = parse_config(app)
-
-    clean_gallery_out(app.builder.outdir)
-
-    seen_backrefs = set()
-
-    computation_times = []
-    examples_dirs, gallery_dirs = _prepare_sphx_glr_dirs(gallery_conf,
-                                                         app.builder.srcdir)
-
-    for examples_dir, gallery_dir in zip(examples_dirs, gallery_dirs):
-        examples_dir = os.path.join(app.builder.srcdir, examples_dir)
-        gallery_dir = os.path.join(app.builder.srcdir, gallery_dir)
-
-        for workdir in [examples_dir, gallery_dir]:
-            if not os.path.exists(workdir):
-                os.makedirs(workdir)
-        # Here we don't use an os.walk, but we recurse only twice: flat is
-        # better than nested.
-        this_fhindex, this_computation_times = generate_dir_rst(
-            examples_dir, gallery_dir, gallery_conf, seen_backrefs)
-        if this_fhindex == "":
-            raise FileNotFoundError("Main example directory {0} does not "
-                                    "have a README.txt file. Please write "
-                                    "one to introduce your gallery."
-                                    .format(examples_dir))
-
-        computation_times += this_computation_times
-
-        # we create an index.rst with all examples
-        fhindex = open(os.path.join(gallery_dir, 'index.rst'), 'w')
-        # :orphan: to suppress "not included in TOCTREE" sphinx warnings
-        fhindex.write(":orphan:\n\n" + this_fhindex)
-        for directory in sorted(os.listdir(examples_dir)):
-            if os.path.isdir(os.path.join(examples_dir, directory)):
-                src_dir = os.path.join(examples_dir, directory)
-                target_dir = os.path.join(gallery_dir, directory)
-                this_fhindex, this_computation_times = generate_dir_rst(src_dir, target_dir, gallery_conf,
-                                                                        seen_backrefs)
-                fhindex.write(this_fhindex)
-                computation_times += this_computation_times
-
-        if gallery_conf['download_all_examples']:
-            download_fhindex = generate_zipfiles(gallery_dir)
-            fhindex.write(download_fhindex)
-
-        fhindex.write(SPHX_GLR_SIG)
-        fhindex.flush()
-
-    if gallery_conf['plot_gallery']:
-        print("Computation time summary:")
-        for time_elapsed, fname in sorted(computation_times)[::-1]:
-            if time_elapsed is not None:
-                print("\t- %s : %.2g sec" % (fname, time_elapsed))
-            else:
-                print("\t- %s : not run" % fname)
-
-
-def touch_empty_backreferences(app, what, name, obj, options, lines):
-    """Generate empty back-reference example files
-
-    This avoids inclusion errors/warnings if there are no gallery
-    examples for a class / module that is being parsed by autodoc"""
-
-    if not bool(app.config.sphinx_gallery_conf['backreferences_dir']):
-        return
-
-    examples_path = os.path.join(app.srcdir,
-                                 app.config.sphinx_gallery_conf[
-                                     "backreferences_dir"],
-                                 "%s.examples" % name)
-
-    if not os.path.exists(examples_path):
-        # touch file
-        open(examples_path, 'w').close()
-
-
-def sumarize_failing_examples(app, exception):
-    """Collects the list of falling examples during build and prints them with the traceback
-
-    Raises ValueError if there where failing examples
-    """
-    if exception is not None:
-        return
-
-    # Under no-plot Examples are not run so nothing to summarize
-    if not app.config.sphinx_gallery_conf['plot_gallery']:
-        return
-
-    gallery_conf = app.config.sphinx_gallery_conf
-    failing_examples = set(gallery_conf['failing_examples'].keys())
-    expected_failing_examples = set([os.path.normpath(os.path.join(app.srcdir, path))
-                                     for path in
-                                     gallery_conf['expected_failing_examples']])
-
-    examples_expected_to_fail = failing_examples.intersection(
-        expected_failing_examples)
-    expected_fail_msg = []
-    if examples_expected_to_fail:
-        expected_fail_msg.append("\n\nExamples failing as expected:")
-        for fail_example in examples_expected_to_fail:
-            expected_fail_msg.append(fail_example + ' failed leaving traceback:\n' +
-                                     gallery_conf['failing_examples'][fail_example] + '\n')
-        print("\n".join(expected_fail_msg))
-
-    examples_not_expected_to_fail = failing_examples.difference(
-        expected_failing_examples)
-    fail_msgs = []
-    if examples_not_expected_to_fail:
-        fail_msgs.append("Unexpected failing examples:")
-        for fail_example in examples_not_expected_to_fail:
-            fail_msgs.append(fail_example + ' failed leaving traceback:\n' +
-                             gallery_conf['failing_examples'][fail_example] + '\n')
-
-    examples_not_expected_to_pass = expected_failing_examples.difference(
-        failing_examples)
-    if examples_not_expected_to_pass:
-        fail_msgs.append("Examples expected to fail, but not failling:\n" +
-                         "Please remove these examples from\n" +
-                         "sphinx_gallery_conf['expected_failing_examples']\n" +
-                         "in your conf.py file"
-                         "\n".join(examples_not_expected_to_pass))
-
-    if fail_msgs:
-        raise ValueError("Here is a summary of the problems encountered when "
-                         "running the examples\n\n" + "\n".join(fail_msgs) +
-                         "\n" + "-" * 79)
-
-
-def get_default_config_value(key):
-    def default_getter(conf):
-        return conf['sphinx_gallery_conf'].get(key, DEFAULT_GALLERY_CONF[key])
-    return default_getter
-
-
-def setup(app):
-    """Setup sphinx-gallery sphinx extension"""
-    app.add_config_value('sphinx_gallery_conf', DEFAULT_GALLERY_CONF, 'html')
-    for key in ['plot_gallery', 'abort_on_example_error']:
-        app.add_config_value(key, get_default_config_value(key), 'html')
-
-    app.add_stylesheet('gallery.css')
-    # Sphinx < 1.6 calls it `_extensions`, >= 1.6 is `extensions`.
-    extensions_attr = '_extensions' if hasattr(app, '_extensions') else 'extensions'
-    if 'sphinx.ext.autodoc' in getattr(app, extensions_attr):
-        app.connect('autodoc-process-docstring', touch_empty_backreferences)
-
-    app.connect('builder-inited', generate_gallery_rst)
-
-    app.connect('build-finished', sumarize_failing_examples)
-    app.connect('build-finished', embed_code_links)
diff --git a/doc/sphinxext/sphinx_gallery/gen_rst.py b/doc/sphinxext/sphinx_gallery/gen_rst.py
deleted file mode 100644
index c2a0b95545499..0000000000000
--- a/doc/sphinxext/sphinx_gallery/gen_rst.py
+++ /dev/null
@@ -1,641 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author: Óscar Nájera
-# License: 3-clause BSD
-"""
-RST file generator
-==================
-
-Generate the rst files for the examples by iterating over the python
-example files.
-
-Files that generate images should start with 'plot'
-
-"""
-# Don't use unicode_literals here (be explicit with u"..." instead) otherwise
-# tricky errors come up with exec(code_blocks, ...) calls
-from __future__ import division, print_function, absolute_import
-from time import time
-import codecs
-import hashlib
-import os
-import re
-import shutil
-import subprocess
-import sys
-import traceback
-import warnings
-
-
-# Try Python 2 first, otherwise load from Python 3
-try:
-    # textwrap indent only exists in python 3
-    from textwrap import indent
-except ImportError:
-    def indent(text, prefix, predicate=None):
-        """Adds 'prefix' to the beginning of selected lines in 'text'.
-
-        If 'predicate' is provided, 'prefix' will only be added to the lines
-        where 'predicate(line)' is True. If 'predicate' is not provided,
-        it will default to adding 'prefix' to all non-empty lines that do not
-        consist solely of whitespace characters.
-        """
-        if predicate is None:
-            def predicate(line):
-                return line.strip()
-
-        def prefixed_lines():
-            for line in text.splitlines(True):
-                yield (prefix + line if predicate(line) else line)
-        return ''.join(prefixed_lines())
-
-from io import StringIO
-
-# make sure that the Agg backend is set before importing any
-# matplotlib
-import matplotlib
-matplotlib.use('agg')
-matplotlib_backend = matplotlib.get_backend()
-
-if matplotlib_backend != 'agg':
-    mpl_backend_msg = (
-        "Sphinx-Gallery relies on the matplotlib 'agg' backend to "
-        "render figures and write them to files. You are "
-        "currently using the {} backend. Sphinx-Gallery will "
-        "terminate the build now, because changing backends is "
-        "not well supported by matplotlib. We advise you to move "
-        "sphinx_gallery imports before any matplotlib-dependent "
-        "import. Moving sphinx_gallery imports at the top of "
-        "your conf.py file should fix this issue")
-
-    raise ValueError(mpl_backend_msg.format(matplotlib_backend))
-
-import matplotlib.pyplot as plt
-
-from . import glr_path_static
-from .backreferences import write_backreferences, _thumbnail_div
-from .downloads import CODE_DOWNLOAD
-from .py_source_parser import (get_docstring_and_rest,
-                               split_code_and_text_blocks)
-
-from .notebook import jupyter_notebook, save_notebook
-
-try:
-    basestring
-except NameError:
-    basestring = str
-    unicode = str
-
-
-###############################################################################
-
-
-class Tee(object):
-    """A tee object to redirect streams to multiple outputs"""
-
-    def __init__(self, file1, file2):
-        self.file1 = file1
-        self.file2 = file2
-
-    def write(self, data):
-        self.file1.write(data)
-        self.file2.write(data)
-
-    def flush(self):
-        self.file1.flush()
-        self.file2.flush()
-
-    # When called from a local terminal seaborn needs it in Python3
-    def isatty(self):
-        self.file1.isatty()
-
-
-class MixedEncodingStringIO(StringIO):
-    """Helper when both ASCII and unicode strings will be written"""
-
-    def write(self, data):
-        if not isinstance(data, unicode):
-            data = data.decode('utf-8')
-        StringIO.write(self, data)
-
-
-###############################################################################
-# The following strings are used when we have several pictures: we use
-# an html div tag that our CSS uses to turn the lists into horizontal
-# lists.
-HLIST_HEADER = """
-.. rst-class:: sphx-glr-horizontal
-
-"""
-
-HLIST_IMAGE_TEMPLATE = """
-    *
-
-      .. image:: /%s
-            :scale: 47
-"""
-
-SINGLE_IMAGE = """
-.. image:: /%s
-    :align: center
-"""
-
-
-# This one could contain unicode
-CODE_OUTPUT = u""".. rst-class:: sphx-glr-script-out
-
- Out::
-
-{0}\n"""
-
-
-SPHX_GLR_SIG = """\n.. rst-class:: sphx-glr-signature
-
-    `Generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_\n"""
-
-
-def codestr2rst(codestr, lang='python'):
-    """Return reStructuredText code block from code string"""
-    code_directive = "\n.. code-block:: {0}\n\n".format(lang)
-    indented_block = indent(codestr, ' ' * 4)
-    return code_directive + indented_block
-
-
-def extract_thumbnail_number(text):
-    """ Pull out the thumbnail image number specified in the docstring. """
-
-    # check whether the user has specified a specific thumbnail image
-    pattr = re.compile(
-        r"^\s*#\s*sphinx_gallery_thumbnail_number\s*=\s*([0-9]+)\s*$",
-        flags=re.MULTILINE)
-    match = pattr.search(text)
-
-    if match is None:
-        # by default, use the first figure created
-        thumbnail_number = 1
-    else:
-        thumbnail_number = int(match.groups()[0])
-
-    return thumbnail_number
-
-
-def extract_intro(filename):
-    """ Extract the first paragraph of module-level docstring. max:95 char"""
-
-    docstring, _ = get_docstring_and_rest(filename)
-
-    # lstrip is just in case docstring has a '\n\n' at the beginning
-    paragraphs = docstring.lstrip().split('\n\n')
-    if len(paragraphs) > 1:
-        first_paragraph = re.sub('\n', ' ', paragraphs[1])
-        first_paragraph = (first_paragraph[:95] + '...'
-                           if len(first_paragraph) > 95 else first_paragraph)
-    else:
-        raise ValueError(
-            "Example docstring should have a header for the example title "
-            "and at least a paragraph explaining what the example is about. "
-            "Please check the example file:\n {}\n".format(filename))
-
-    return first_paragraph
-
-
-def get_md5sum(src_file):
-    """Returns md5sum of file"""
-
-    with open(src_file, 'rb') as src_data:
-        src_content = src_data.read()
-
-        src_md5 = hashlib.md5(src_content).hexdigest()
-    return src_md5
-
-
-def md5sum_is_current(src_file):
-    """Checks whether src_file has the same md5 hash as the one on disk"""
-
-    src_md5 = get_md5sum(src_file)
-
-    src_md5_file = src_file + '.md5'
-    if os.path.exists(src_md5_file):
-        with open(src_md5_file, 'r') as file_checksum:
-            ref_md5 = file_checksum.read()
-
-        return src_md5 == ref_md5
-
-    return False
-
-
-def save_figures(image_path, fig_count, gallery_conf):
-    """Save all open matplotlib figures of the example code-block
-
-    Parameters
-    ----------
-    image_path : str
-        Path where plots are saved (format string which accepts figure number)
-    fig_count : int
-        Previous figure number count. Figure number add from this number
-    gallery_conf : dict
-        Contains the configuration of Sphinx-Gallery
-
-    Returns
-    -------
-    images_rst : str
-        rst code to embed the images in the document
-    fig_num : int
-        number of figures saved
-    """
-    figure_list = []
-
-    for fig_num in plt.get_fignums():
-        # Set the fig_num figure as the current figure as we can't
-        # save a figure that's not the current figure.
-        fig = plt.figure(fig_num)
-        kwargs = {}
-        to_rgba = matplotlib.colors.colorConverter.to_rgba
-        for attr in ['facecolor', 'edgecolor']:
-            fig_attr = getattr(fig, 'get_' + attr)()
-            default_attr = matplotlib.rcParams['figure.' + attr]
-            if to_rgba(fig_attr) != to_rgba(default_attr):
-                kwargs[attr] = fig_attr
-
-        current_fig = image_path.format(fig_count + fig_num)
-        fig.savefig(current_fig, **kwargs)
-        figure_list.append(current_fig)
-
-    if gallery_conf.get('find_mayavi_figures', False):
-        from mayavi import mlab
-        e = mlab.get_engine()
-        last_matplotlib_fig_num = fig_count + len(figure_list)
-        total_fig_num = last_matplotlib_fig_num + len(e.scenes)
-        mayavi_fig_nums = range(last_matplotlib_fig_num + 1, total_fig_num + 1)
-
-        for scene, mayavi_fig_num in zip(e.scenes, mayavi_fig_nums):
-            current_fig = image_path.format(mayavi_fig_num)
-            mlab.savefig(current_fig, figure=scene)
-            # make sure the image is not too large
-            scale_image(current_fig, current_fig, 850, 999)
-            figure_list.append(current_fig)
-        mlab.close(all=True)
-
-    return figure_rst(figure_list, gallery_conf['src_dir'])
-
-
-def figure_rst(figure_list, sources_dir):
-    """Given a list of paths to figures generate the corresponding rst
-
-    Depending on whether we have one or more figures, we use a
-    single rst call to 'image' or a horizontal list.
-
-    Parameters
-    ----------
-    figure_list : list of str
-        Strings are the figures' absolute paths
-    sources_dir : str
-        absolute path of Sphinx documentation sources
-
-    Returns
-    -------
-    images_rst : str
-        rst code to embed the images in the document
-    fig_num : int
-        number of figures saved
-    """
-
-    figure_paths = [os.path.relpath(figure_path, sources_dir)
-                    .replace(os.sep, '/').lstrip('/')
-                    for figure_path in figure_list]
-    images_rst = ""
-    if len(figure_paths) == 1:
-        figure_name = figure_paths[0]
-        images_rst = SINGLE_IMAGE % figure_name
-    elif len(figure_paths) > 1:
-        images_rst = HLIST_HEADER
-        for figure_name in figure_paths:
-            images_rst += HLIST_IMAGE_TEMPLATE % figure_name
-
-    return images_rst, len(figure_list)
-
-
-def scale_image(in_fname, out_fname, max_width, max_height):
-    """Scales an image with the same aspect ratio centered in an
-       image with a given max_width and max_height
-       if in_fname == out_fname the image can only be scaled down
-    """
-    # local import to avoid testing dependency on PIL:
-    try:
-        from PIL import Image
-    except ImportError:
-        import Image
-    img = Image.open(in_fname)
-    width_in, height_in = img.size
-    scale_w = max_width / float(width_in)
-    scale_h = max_height / float(height_in)
-
-    if height_in * scale_w <= max_height:
-        scale = scale_w
-    else:
-        scale = scale_h
-
-    if scale >= 1.0 and in_fname == out_fname:
-        return
-
-    width_sc = int(round(scale * width_in))
-    height_sc = int(round(scale * height_in))
-
-    # resize the image
-    img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
-
-    # insert centered
-    thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
-    pos_insert = ((max_width - width_sc) // 2, (max_height - height_sc) // 2)
-    thumb.paste(img, pos_insert)
-
-    thumb.save(out_fname)
-    # Use optipng to perform lossless compression on the resized image if
-    # software is installed
-    if os.environ.get('SKLEARN_DOC_OPTIPNG', False):
-        try:
-            subprocess.call(["optipng", "-quiet", "-o", "9", out_fname])
-        except Exception:
-            warnings.warn('Install optipng to reduce the size of the \
-                          generated images')
-
-
-def save_thumbnail(image_path_template, src_file, gallery_conf):
-    """Save the thumbnail image"""
-    # read specification of the figure to display as thumbnail from main text
-    _, content = get_docstring_and_rest(src_file)
-    thumbnail_number = extract_thumbnail_number(content)
-    thumbnail_image_path = image_path_template.format(thumbnail_number)
-
-    thumb_dir = os.path.join(os.path.dirname(thumbnail_image_path), 'thumb')
-    if not os.path.exists(thumb_dir):
-        os.makedirs(thumb_dir)
-
-    base_image_name = os.path.splitext(os.path.basename(src_file))[0]
-    thumb_file = os.path.join(thumb_dir,
-                              'sphx_glr_%s_thumb.png' % base_image_name)
-
-    if src_file in gallery_conf['failing_examples']:
-        broken_img = os.path.join(glr_path_static(), 'broken_example.png')
-        scale_image(broken_img, thumb_file, 200, 140)
-
-    elif os.path.exists(thumbnail_image_path):
-        scale_image(thumbnail_image_path, thumb_file, 400, 280)
-
-    elif not os.path.exists(thumb_file):
-        # create something to replace the thumbnail
-        default_thumb_file = os.path.join(glr_path_static(), 'no_image.png')
-        default_thumb_file = gallery_conf.get("default_thumb_file",
-                                              default_thumb_file)
-        scale_image(default_thumb_file, thumb_file, 200, 140)
-
-
-def generate_dir_rst(src_dir, target_dir, gallery_conf, seen_backrefs):
-    """Generate the gallery reStructuredText for an example directory"""
-    if not os.path.exists(os.path.join(src_dir, 'README.txt')):
-        print(80 * '_')
-        print('Example directory %s does not have a README.txt file' %
-              src_dir)
-        print('Skipping this directory')
-        print(80 * '_')
-        return "", []  # because string is an expected return type
-
-    with open(os.path.join(src_dir, 'README.txt')) as fid:
-        fhindex = fid.read()
-    # Add empty lines to avoid bug in issue #165
-    fhindex += "\n\n"
-
-    if not os.path.exists(target_dir):
-        os.makedirs(target_dir)
-    sorted_listdir = [fname for fname in sorted(os.listdir(src_dir))
-                      if fname.endswith('.py')]
-    entries_text = []
-    computation_times = []
-    build_target_dir = os.path.relpath(target_dir, gallery_conf['src_dir'])
-    for fname in sorted_listdir:
-        amount_of_code, time_elapsed = \
-            generate_file_rst(fname, target_dir, src_dir, gallery_conf)
-        computation_times.append((time_elapsed, fname))
-        new_fname = os.path.join(src_dir, fname)
-        intro = extract_intro(new_fname)
-        this_entry = _thumbnail_div(build_target_dir, fname, intro) + """
-
-.. toctree::
-   :hidden:
-
-   /%s\n""" % os.path.join(build_target_dir, fname[:-3]).replace(os.sep, '/')
-        entries_text.append((amount_of_code, this_entry))
-
-        if gallery_conf['backreferences_dir']:
-            write_backreferences(seen_backrefs, gallery_conf,
-                                 target_dir, fname, intro)
-
-    # sort to have the smallest entries in the beginning
-    entries_text.sort()
-
-    for _, entry_text in entries_text:
-        fhindex += entry_text
-
-    # clear at the end of the section
-    fhindex += """.. raw:: html\n
-    <div style='clear:both'></div>\n\n"""
-
-    return fhindex, computation_times
-
-
-def execute_code_block(code_block, example_globals,
-                       block_vars, gallery_conf):
-    """Executes the code block of the example file"""
-    time_elapsed = 0
-    stdout = ''
-
-    # If example is not suitable to run, skip executing its blocks
-    if not block_vars['execute_script']:
-        return stdout, time_elapsed
-
-    plt.close('all')
-    cwd = os.getcwd()
-    # Redirect output to stdout and
-    orig_stdout = sys.stdout
-    src_file = block_vars['src_file']
-
-    try:
-        # First cd in the original example dir, so that any file
-        # created by the example get created in this directory
-        os.chdir(os.path.dirname(src_file))
-        my_buffer = MixedEncodingStringIO()
-        my_stdout = Tee(sys.stdout, my_buffer)
-        sys.stdout = my_stdout
-
-        t_start = time()
-        # don't use unicode_literals at the top of this file or you get
-        # nasty errors here on Py2.7
-        exec(code_block, example_globals)
-        time_elapsed = time() - t_start
-
-        sys.stdout = orig_stdout
-
-        my_stdout = my_buffer.getvalue().strip().expandtabs()
-        # raise RuntimeError
-        if my_stdout:
-            stdout = CODE_OUTPUT.format(indent(my_stdout, u' ' * 4))
-        os.chdir(cwd)
-        images_rst, fig_num = save_figures(block_vars['image_path'],
-                                           block_vars['fig_count'], gallery_conf)
-
-    except Exception:
-        formatted_exception = traceback.format_exc()
-
-        fail_example_warning = 80 * '_' + '\n' + \
-            '%s failed to execute correctly:' % src_file + \
-            formatted_exception + 80 * '_' + '\n'
-        warnings.warn(fail_example_warning)
-
-        fig_num = 0
-        images_rst = codestr2rst(formatted_exception, lang='pytb')
-
-        # Breaks build on first example error
-        # XXX This check can break during testing e.g. if you uncomment the
-        # `raise RuntimeError` by the `my_stdout` call, maybe use `.get()`?
-        if gallery_conf['abort_on_example_error']:
-            raise
-        # Stores failing file
-        gallery_conf['failing_examples'][src_file] = formatted_exception
-        block_vars['execute_script'] = False
-
-    finally:
-        os.chdir(cwd)
-        sys.stdout = orig_stdout
-
-    code_output = u"\n{0}\n\n{1}\n\n".format(images_rst, stdout)
-    block_vars['fig_count'] += fig_num
-
-    return code_output, time_elapsed
-
-
-def clean_modules():
-    """Remove "unload" seaborn from the name space
-
-    After a script is executed it can load a variety of setting that one
-    does not want to influence in other examples in the gallery."""
-
-    # Horrible code to 'unload' seaborn, so that it resets
-    # its default when is load
-    # Python does not support unloading of modules
-    # https://bugs.python.org/issue9072
-    for module in list(sys.modules.keys()):
-        if 'seaborn' in module:
-            del sys.modules[module]
-
-    # Reset Matplotlib to default
-    plt.rcdefaults()
-
-
-def generate_file_rst(fname, target_dir, src_dir, gallery_conf):
-    """Generate the rst file for a given example.
-
-    Returns
-    -------
-    amount_of_code : int
-        character count of the corresponding python script in file
-    time_elapsed : float
-        seconds required to run the script
-    """
-
-    src_file = os.path.normpath(os.path.join(src_dir, fname))
-    example_file = os.path.join(target_dir, fname)
-    shutil.copyfile(src_file, example_file)
-    script_blocks = split_code_and_text_blocks(src_file)
-    amount_of_code = sum([len(bcontent)
-                          for blabel, bcontent in script_blocks
-                          if blabel == 'code'])
-
-    if md5sum_is_current(example_file):
-        return amount_of_code, 0
-
-    image_dir = os.path.join(target_dir, 'images')
-    if not os.path.exists(image_dir):
-        os.makedirs(image_dir)
-
-    base_image_name = os.path.splitext(fname)[0]
-    image_fname = 'sphx_glr_' + base_image_name + '_{0:03}.png'
-    build_image_dir = os.path.relpath(image_dir, gallery_conf['src_dir'])
-    image_path_template = os.path.join(image_dir, image_fname)
-
-    ref_fname = os.path.relpath(example_file, gallery_conf['src_dir'])
-    ref_fname = ref_fname.replace(os.path.sep, '_')
-    example_rst = """\n\n.. _sphx_glr_{0}:\n\n""".format(ref_fname)
-
-    filename_pattern = gallery_conf.get('filename_pattern')
-    execute_script = re.search(filename_pattern, src_file) and gallery_conf[
-        'plot_gallery']
-    example_globals = {
-        # A lot of examples contains 'print(__doc__)' for example in
-        # scikit-learn so that running the example prints some useful
-        # information. Because the docstring has been separated from
-        # the code blocks in sphinx-gallery, __doc__ is actually
-        # __builtin__.__doc__ in the execution context and we do not
-        # want to print it
-        '__doc__': '',
-        # Examples may contain if __name__ == '__main__' guards
-        # for in example scikit-learn if the example uses multiprocessing
-        '__name__': '__main__',
-        # Don't ever support __file__: Issues #166 #212
-    }
-
-    # A simple example has two blocks: one for the
-    # example introduction/explanation and one for the code
-    is_example_notebook_like = len(script_blocks) > 2
-    time_elapsed = 0
-    block_vars = {'execute_script': execute_script, 'fig_count': 0,
-                  'image_path': image_path_template, 'src_file': src_file}
-    if block_vars['execute_script']:
-        print('Executing file %s' % src_file)
-    for blabel, bcontent in script_blocks:
-        if blabel == 'code':
-            code_output, rtime = execute_code_block(bcontent,
-                                                    example_globals,
-                                                    block_vars,
-                                                    gallery_conf)
-
-            time_elapsed += rtime
-
-            if is_example_notebook_like:
-                example_rst += codestr2rst(bcontent) + '\n'
-                example_rst += code_output
-            else:
-                example_rst += code_output
-                if 'sphx-glr-script-out' in code_output:
-                    # Add some vertical space after output
-                    example_rst += "\n\n|\n\n"
-                example_rst += codestr2rst(bcontent) + '\n'
-
-        else:
-            example_rst += bcontent + '\n\n'
-
-    clean_modules()
-
-    # Writes md5 checksum if example has build correctly
-    # not failed and was initially meant to run(no-plot shall not cache md5sum)
-    if block_vars['execute_script']:
-        with open(example_file + '.md5', 'w') as file_checksum:
-            file_checksum.write(get_md5sum(example_file))
-
-    save_thumbnail(image_path_template, src_file, gallery_conf)
-
-    time_m, time_s = divmod(time_elapsed, 60)
-    example_nb = jupyter_notebook(script_blocks)
-    save_notebook(example_nb, example_file.replace('.py', '.ipynb'))
-    with codecs.open(os.path.join(target_dir, base_image_name + '.rst'),
-                     mode='w', encoding='utf-8') as f:
-        example_rst += "**Total running time of the script:**" \
-                       " ({0: .0f} minutes {1: .3f} seconds)\n\n".format(
-                           time_m, time_s)
-        example_rst += CODE_DOWNLOAD.format(fname,
-                                            fname.replace('.py', '.ipynb'))
-        example_rst += SPHX_GLR_SIG
-        f.write(example_rst)
-
-    if block_vars['execute_script']:
-        print("{0} ran in : {1:.2g} seconds\n".format(src_file, time_elapsed))
-
-    return amount_of_code, time_elapsed
diff --git a/doc/sphinxext/sphinx_gallery/notebook.py b/doc/sphinxext/sphinx_gallery/notebook.py
deleted file mode 100644
index a0cfdbd7881d6..0000000000000
--- a/doc/sphinxext/sphinx_gallery/notebook.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Parser for Jupyter notebooks
-============================
-
-Class that holds the Jupyter notebook information
-
-"""
-# Author: Óscar Nájera
-# License: 3-clause BSD
-
-from __future__ import division, absolute_import, print_function
-from functools import partial
-import argparse
-import json
-import re
-import sys
-from .py_source_parser import split_code_and_text_blocks
-
-
-def jupyter_notebook_skeleton():
-    """Returns a dictionary with the elements of a Jupyter notebook"""
-    py_version = sys.version_info
-    notebook_skeleton = {
-        "cells": [],
-        "metadata": {
-            "kernelspec": {
-                "display_name": "Python " + str(py_version[0]),
-                "language": "python",
-                "name": "python" + str(py_version[0])
-            },
-            "language_info": {
-                "codemirror_mode": {
-                    "name": "ipython",
-                    "version": py_version[0]
-                },
-                "file_extension": ".py",
-                "mimetype": "text/x-python",
-                "name": "python",
-                "nbconvert_exporter": "python",
-                "pygments_lexer": "ipython" + str(py_version[0]),
-                "version": '{0}.{1}.{2}'.format(*sys.version_info[:3])
-            }
-        },
-        "nbformat": 4,
-        "nbformat_minor": 0
-    }
-    return notebook_skeleton
-
-
-def directive_fun(match, directive):
-    """Helper to fill in directives"""
-    directive_to_alert = dict(note="info", warning="danger")
-    return ('<div class="alert alert-{0}"><h4>{1}</h4><p>{2}</p></div>'
-            .format(directive_to_alert[directive], directive.capitalize(),
-                    match.group(1).strip()))
-
-
-def rst2md(text):
-    """Converts the RST text from the examples docstrigs and comments
-    into markdown text for the Jupyter notebooks"""
-
-    top_heading = re.compile(r'^=+$\s^([\w\s-]+)^=+$', flags=re.M)
-    text = re.sub(top_heading, r'# \1', text)
-
-    math_eq = re.compile(r'^\.\. math::((?:.+)?(?:\n+^  .+)*)', flags=re.M)
-    text = re.sub(math_eq,
-                  lambda match: r'\begin{{align}}{0}\end{{align}}'.format(
-                      match.group(1).strip()),
-                  text)
-    inline_math = re.compile(r':math:`(.+?)`', re.DOTALL)
-    text = re.sub(inline_math, r'$\1$', text)
-
-    directives = ('warning', 'note')
-    for directive in directives:
-        directive_re = re.compile(r'^\.\. %s::((?:.+)?(?:\n+^  .+)*)'
-                                  % directive, flags=re.M)
-        text = re.sub(directive_re,
-                      partial(directive_fun, directive=directive), text)
-
-    links = re.compile(r'^ *\.\. _.*:.*$\n', flags=re.M)
-    text = re.sub(links, '', text)
-
-    refs = re.compile(r':ref:`')
-    text = re.sub(refs, '`', text)
-
-    contents = re.compile(r'^\s*\.\. contents::.*$(\n +:\S+: *$)*\n',
-                          flags=re.M)
-    text = re.sub(contents, '', text)
-
-    images = re.compile(
-        r'^\.\. image::(.*$)(?:\n *:alt:(.*$)\n)?(?: +:\S+:.*$\n)*',
-        flags=re.M)
-    text = re.sub(
-        images, lambda match: '![{1}]({0})\n'.format(
-            match.group(1).strip(), (match.group(2) or '').strip()), text)
-
-    return text
-
-
-def jupyter_notebook(script_blocks):
-    """Generate a Jupyter notebook file cell-by-cell
-
-    Parameters
-    ----------
-    script_blocks: list
-        script execution cells
-    """
-
-    work_notebook = jupyter_notebook_skeleton()
-    add_code_cell(work_notebook, "%matplotlib inline")
-    fill_notebook(work_notebook, script_blocks)
-
-    return work_notebook
-
-
-def add_code_cell(work_notebook, code):
-    """Add a code cell to the notebook
-
-    Parameters
-    ----------
-    code : str
-        Cell content
-    """
-
-    code_cell = {
-        "cell_type": "code",
-        "execution_count": None,
-        "metadata": {"collapsed": False},
-        "outputs": [],
-        "source": [code.strip()]
-    }
-    work_notebook["cells"].append(code_cell)
-
-
-def add_markdown_cell(work_notebook, text):
-    """Add a markdown cell to the notebook
-
-    Parameters
-    ----------
-    code : str
-        Cell content
-    """
-    markdown_cell = {
-        "cell_type": "markdown",
-        "metadata": {},
-        "source": [rst2md(text)]
-    }
-    work_notebook["cells"].append(markdown_cell)
-
-
-def fill_notebook(work_notebook, script_blocks):
-    """Writes the Jupyter notebook cells
-
-    Parameters
-    ----------
-    script_blocks : list of tuples
-    """
-
-    for blabel, bcontent in script_blocks:
-        if blabel == 'code':
-            add_code_cell(work_notebook, bcontent)
-        else:
-            add_markdown_cell(work_notebook, bcontent + '\n')
-
-
-def save_notebook(work_notebook, write_file):
-    """Saves the Jupyter work_notebook to write_file"""
-    with open(write_file, 'w') as out_nb:
-        json.dump(work_notebook, out_nb, indent=2)
-
-
-###############################################################################
-# Notebook shell utility
-
-def python_to_jupyter_cli(args=None, namespace=None):
-    """Exposes the jupyter notebook renderer to the command line
-
-    Takes the same arguments as ArgumentParser.parse_args
-    """
-    parser = argparse.ArgumentParser(
-        description='Sphinx-Gallery Notebook converter')
-    parser.add_argument('python_src_file', nargs='+',
-                        help='Input Python file script to convert. '
-                        'Supports multiple files and shell wildcards'
-                        ' (e.g. *.py)')
-    args = parser.parse_args(args, namespace)
-
-    for src_file in args.python_src_file:
-        blocks = split_code_and_text_blocks(src_file)
-        print('Converting {0}'.format(src_file))
-        example_nb = jupyter_notebook(blocks)
-        save_notebook(example_nb, src_file.replace('.py', '.ipynb'))
diff --git a/doc/sphinxext/sphinx_gallery/py_source_parser.py b/doc/sphinxext/sphinx_gallery/py_source_parser.py
deleted file mode 100644
index d397087f99fbd..0000000000000
--- a/doc/sphinxext/sphinx_gallery/py_source_parser.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-r"""
-Parser for python source files
-==============================
-"""
-# Created Sun Nov 27 14:03:07 2016
-# Author: Óscar Nájera
-
-from __future__ import division, absolute_import, print_function
-import ast
-import re
-from textwrap import dedent
-
-SYNTAX_ERROR_DOCSTRING = """
-SyntaxError
-===========
-
-Example script with invalid Python syntax
-"""
-
-
-def get_docstring_and_rest(filename):
-    """Separate `filename` content between docstring and the rest
-
-    Strongly inspired from ast.get_docstring.
-
-    Returns
-    -------
-    docstring: str
-        docstring of `filename`
-    rest: str
-        `filename` content without the docstring
-    """
-    # can't use codecs.open(filename, 'r', 'utf-8') here b/c ast doesn't
-    # seem to work with unicode strings in Python2.7
-    # "SyntaxError: encoding declaration in Unicode string"
-    with open(filename, 'rb') as fid:
-        content = fid.read()
-    # change from Windows format to UNIX for uniformity
-    content = content.replace(b'\r\n', b'\n')
-
-    try:
-        node = ast.parse(content)
-    except SyntaxError:
-        return SYNTAX_ERROR_DOCSTRING, content.decode('utf-8')
-
-    if not isinstance(node, ast.Module):
-        raise TypeError("This function only supports modules. "
-                        "You provided {0}".format(node.__class__.__name__))
-    if node.body and isinstance(node.body[0], ast.Expr) and \
-       isinstance(node.body[0].value, ast.Str):
-        docstring_node = node.body[0]
-        docstring = docstring_node.value.s
-        if hasattr(docstring, 'decode'):  # python2.7
-            docstring = docstring.decode('utf-8')
-        # This get the content of the file after the docstring last line
-        # Note: 'maxsplit' argument is not a keyword argument in python2
-        rest = content.decode('utf-8').split('\n', docstring_node.lineno)[-1]
-        return docstring, rest
-    else:
-        raise ValueError(('Could not find docstring in file "{0}". '
-                          'A docstring is required by sphinx-gallery')
-                         .format(filename))
-
-
-def split_code_and_text_blocks(source_file):
-    """Return list with source file separated into code and text blocks.
-
-    Returns
-    -------
-    blocks : list of (label, content)
-        List where each element is a tuple with the label ('text' or 'code'),
-        and content string of block.
-    """
-    docstring, rest_of_content = get_docstring_and_rest(source_file)
-    blocks = [('text', docstring)]
-
-    pattern = re.compile(
-        r'(?P<header_line>^#{20,}.*)\s(?P<text_content>(?:^#.*\s)*)',
-        flags=re.M)
-
-    pos_so_far = 0
-    for match in re.finditer(pattern, rest_of_content):
-        match_start_pos, match_end_pos = match.span()
-        code_block_content = rest_of_content[pos_so_far:match_start_pos]
-        text_content = match.group('text_content')
-        sub_pat = re.compile('^#', flags=re.M)
-        text_block_content = dedent(re.sub(sub_pat, '', text_content)).lstrip()
-        if code_block_content.strip():
-            blocks.append(('code', code_block_content))
-        if text_block_content.strip():
-            blocks.append(('text', text_block_content))
-        pos_so_far = match_end_pos
-
-    remaining_content = rest_of_content[pos_so_far:]
-    if remaining_content.strip():
-        blocks.append(('code', remaining_content))
-
-    return blocks
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e5159054c8153..65b47a42289e4 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -1,8 +1,8 @@
 .. currentmodule:: sklearn
-
-
+.. include:: includes/big_toc_css.rst
+.. include:: whats_new/_contributors.rst
 ===============
-Release history
+Release History
 ===============
 
 Version 0.20 (under development)
@@ -5756,3 +5756,19 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 
 .. _Neeraj Gangwar: http://neerajgangwar.in
 .. _Arthur Mensch: https://amensch.fr
+.. include:: whats_new/v0.20.rst
+.. include:: whats_new/v0.19.rst
+
+=================
+Previous Releases
+=================
+.. toctree::
+    :maxdepth: 1
+
+    Version 0.18 <whats_new/v0.18.rst>
+    Version 0.17 <whats_new/v0.17.rst>
+    Version 0.16 <whats_new/v0.16.rst>
+    Version 0.15 <whats_new/v0.15.rst>
+    Version 0.14 <whats_new/v0.14.rst>
+    Version 0.13 <whats_new/v0.13.rst>
+    Older Versions <whats_new/older_versions.rst>
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
new file mode 100644
index 0000000000000..dfbc319da88f4
--- /dev/null
+++ b/doc/whats_new/_contributors.rst
@@ -0,0 +1,143 @@
+.. _Olivier Grisel: https://twitter.com/ogrisel
+
+.. _Gael Varoquaux: http://gael-varoquaux.info
+
+.. _Alexandre Gramfort: http://alexandre.gramfort.net
+
+.. _Fabian Pedregosa: http://fa.bianp.net
+
+.. _Mathieu Blondel: http://www.mblondel.org
+
+.. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/
+
+.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+
+.. _Yaroslav Halchenko: http://www.onerussian.com/
+
+.. _Vlad Niculae: http://vene.ro
+
+.. _Edouard Duchesnay: https://sites.google.com/site/duchesnay/home
+
+.. _Peter Prettenhofer: https://sites.google.com/site/peterprettenhofer/
+
+.. _Alexandre Passos: http://atpassos.me
+
+.. _Nicolas Pinto: https://twitter.com/npinto
+
+.. _Bertrand Thirion: https://team.inria.fr/parietal/bertrand-thirions-page
+
+.. _Andreas Müller: http://peekaboo-vision.blogspot.com
+
+.. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
+
+.. _Jake Vanderplas: http://staff.washington.edu/jakevdp/
+
+.. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
+
+.. _INRIA: http://www.inria.fr
+
+.. _Parietal Team: http://parietal.saclay.inria.fr/
+
+.. _David Warde-Farley: http://www-etud.iro.umontreal.ca/~wardefar/
+
+.. _Brian Holt: http://personal.ee.surrey.ac.uk/Personal/B.Holt
+
+.. _Satrajit Ghosh: http://www.mit.edu/~satra/
+
+.. _Robert Layton: https://twitter.com/robertlayton
+
+.. _Scott White: https://twitter.com/scottblanc
+
+.. _David Marek: http://www.davidmarek.cz/
+
+.. _Christian Osendorfer: https://osdf.github.io
+
+.. _Arnaud Joly: http://www.ajoly.org
+
+.. _Rob Zinkov: http://zinkov.com
+
+.. _Joel Nothman: http://joelnothman.com
+
+.. _Nicolas Trésegnie : http://nicolastr.com/
+
+.. _Kemal Eren: http://www.kemaleren.com
+
+.. _Yann Dauphin: http://ynd.github.io/
+
+.. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
+
+.. _Kyle Kastner: http://kastnerkyle.github.io
+
+.. _Daniel Nouri: http://danielnouri.org
+
+.. _Manoj Kumar: https://manojbits.wordpress.com
+
+.. _Luis Pedro Coelho: http://luispedro.org
+
+.. _Fares Hedyati: http://www.eecs.berkeley.edu/~fareshed
+
+.. _Antony Lee: https://www.ocf.berkeley.edu/~antonyl/
+
+.. _Martin Billinger: http://tnsre.embs.org/author/martinbillinger
+
+.. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
+
+.. _Trevor Stephens: http://trevorstephens.com/
+
+.. _Jan Hendrik Metzen: https://jmetzen.github.io/
+
+.. _Will Dawson: http://www.dawsonresearch.com
+
+.. _Andrew Tulloch: http://tullo.ch/
+
+.. _Hanna Wallach: http://dirichlet.net/
+
+.. _Yan Yi: http://seowyanyi.org
+
+.. _Hervé Bredin: http://herve.niderb.fr/
+
+.. _Eric Martin: http://www.ericmart.in
+
+.. _Nicolas Goix: https://perso.telecom-paristech.fr/~goix/
+
+.. _Sebastian Raschka: http://sebastianraschka.com
+
+.. _Brian McFee: https://bmcfee.github.io
+
+.. _Valentin Stolbunov: http://www.vstolbunov.com
+
+.. _Jaques Grobler: https://github.com/jaquesgrobler
+
+.. _Lars Buitinck: https://github.com/larsmans
+
+.. _Loic Esteve: https://github.com/lesteve
+
+.. _Noel Dawe: https://github.com/ndawe
+
+.. _Raghav RV: https://github.com/raghavrv
+
+.. _Tom Dupre la Tour: https://github.com/TomDLT
+
+.. _Nelle Varoquaux: https://github.com/nellev
+
+.. _Bing Tian Dai: https://github.com/btdai
+
+.. _Dylan Werner-Meier: https://github.com/unautre
+
+.. _Alyssa Batula: https://github.com/abatula
+
+.. _Srivatsan Ramesh: https://github.com/srivatsan-ramesh
+
+.. _Ron Weiss: http://www.ee.columbia.edu/~ronw
+
+.. _Kathleen Chen: https://github.com/kchen17
+
+.. _Vincent Pham: https://github.com/vincentpham1991
+
+.. _Denis Engemann: http://denis-engemann.de
+
+.. _Anish Shah: https://github.com/AnishShah
+
+.. _Neeraj Gangwar: http://neerajgangwar.in
+
+.. _Arthur Mensch: https://amensch.fr
diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst
new file mode 100644
index 0000000000000..eeb672914f033
--- /dev/null
+++ b/doc/whats_new/older_versions.rst
@@ -0,0 +1,1386 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_12.1:
+
+Version 0.12.1
+===============
+
+**October 8, 2012**
+
+The 0.12.1 release is a bug-fix release with no additional features, but is
+instead a set of bug fixes
+
+Changelog
+----------
+
+- Improved numerical stability in spectral embedding by `Gael
+  Varoquaux`_
+
+- Doctest under windows 64bit by `Gael Varoquaux`_
+
+- Documentation fixes for elastic net by `Andreas Müller`_ and
+  `Alexandre Gramfort`_
+
+- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
+
+- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
+
+- Fix parallel computing in MDS by `Gael Varoquaux`_
+
+- Fix Unicode support in count vectorizer by `Andreas Müller`_
+
+- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch <VirgileFritsch>`
+
+- Fix clone of SGD objects by `Peter Prettenhofer`_
+
+- Stabilize GMM by :user:`Virgile Fritsch <VirgileFritsch>`
+
+People
+------
+
+ *  14  `Peter Prettenhofer`_
+ *  12  `Gael Varoquaux`_
+ *  10  `Andreas Müller`_
+ *   5  `Lars Buitinck`_
+ *   3  :user:`Virgile Fritsch <VirgileFritsch>`
+ *   1  `Alexandre Gramfort`_
+ *   1  `Gilles Louppe`_
+ *   1  `Mathieu Blondel`_
+
+.. _changes_0_12:
+
+Version 0.12
+============
+
+**September 4, 2012**
+
+Changelog
+---------
+
+- Various speed improvements of the :ref:`decision trees <tree>` module, by
+  `Gilles Louppe`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
+  via the ``max_features`` argument, by `Peter Prettenhofer`_.
+
+- Added Huber and Quantile loss functions to
+  :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
+
+- :ref:`Decision trees <tree>` and :ref:`forests of randomized trees <forest>`
+  now support multi-output classification and regression problems, by
+  `Gilles Louppe`_.
+
+- Added :class:`preprocessing.LabelEncoder`, a simple utility class to
+  normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
+
+- Added the epsilon-insensitive loss and the ability to make probabilistic
+  predictions with the modified huber loss in :ref:`sgd`, by
+  `Mathieu Blondel`_.
+
+- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
+
+- SVMlight file format loader now detects compressed (gzip/bzip2) files and
+  decompresses them on the fly, by `Lars Buitinck`_.
+
+- SVMlight file format serializer now preserves double precision floating
+  point values, by `Olivier Grisel`_.
+
+- A common testing framework for all estimators was added, by `Andreas Müller`_.
+
+- Understandable error messages for estimators that do not accept
+  sparse input by `Gael Varoquaux`_
+
+- Speedups in hierarchical clustering by `Gael Varoquaux`_. In
+  particular building the tree now supports early stopping. This is
+  useful when the number of clusters is not small compared to the
+  number of samples.
+
+- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
+  by `Alexandre Gramfort`_.
+
+- Added :func:`metrics.auc_score` and
+  :func:`metrics.average_precision_score` convenience functions by `Andreas
+  Müller`_.
+
+- Improved sparse matrix support in the :ref:`feature_selection`
+  module by `Andreas Müller`_.
+
+- New word boundaries-aware character n-gram analyzer for the
+  :ref:`text_feature_extraction` module by :user:`@kernc <kernc>`.
+
+- Fixed bug in spectral clustering that led to single point clusters
+  by `Andreas Müller`_.
+
+- In :class:`feature_extraction.text.CountVectorizer`, added an option to
+  ignore infrequent words, ``min_df`` by  `Andreas Müller`_.
+
+- Add support for multiple targets in some linear models (ElasticNet, Lasso
+  and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
+  `Alexandre Gramfort`_.
+
+- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
+
+- Fixed feature importance computation in
+  :ref:`gradient_boosting`.
+
+API changes summary
+-------------------
+
+- The old ``scikits.learn`` package has disappeared; all code should import
+  from ``sklearn`` instead, which was introduced in 0.9.
+
+- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
+  with it's order reversed, in order to keep it consistent with the order
+  of the returned ``fpr`` and ``tpr``.
+
+- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
+  :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
+  object when initialising it and not through ``fit``. Now ``fit`` will
+  only accept the data as an input parameter.
+
+- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
+  the default gamma value was only computed the first time ``fit`` was called
+  and then stored. It is now recalculated on every call to ``fit``.
+
+- All ``Base`` classes are now abstract meta classes so that they can not be
+  instantiated.
+
+- :func:`cluster.ward_tree` now also returns the parent array. This is
+  necessary for early-stopping in which case the tree is not
+  completely built.
+
+- In :class:`feature_extraction.text.CountVectorizer` the parameters
+  ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
+  enable grid-searching both at once.
+
+- In :class:`feature_extraction.text.CountVectorizer`, words that appear
+  only in one document are now ignored by default. To reproduce
+  the previous behavior, set ``min_df=1``.
+
+- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
+  returns 2d array when fit on two classes.
+
+- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
+  and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
+  when fit on two classes.
+
+- Grid of alphas used for fitting :class:`linear_model.LassoCV` and
+  :class:`linear_model.ElasticNetCV` is now stored
+  in the attribute ``alphas_`` rather than overriding the init parameter
+  ``alphas``.
+
+- Linear models when alpha is estimated by cross-validation store
+  the estimated value in the ``alpha_`` attribute rather than just
+  ``alpha`` or ``best_alpha``.
+
+- :class:`ensemble.GradientBoostingClassifier` now supports
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
+
+- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
+  The all classes in the :ref:`svm` module now automatically select the
+  sparse or dense representation base on the input.
+
+- All clustering algorithms now interpret the array ``X`` given to ``fit`` as
+  input data, in particular :class:`cluster.SpectralClustering` and
+  :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
+
+- For clustering algorithms that take the desired number of clusters as a parameter,
+  this parameter is now called ``n_clusters``.
+
+
+People
+------
+ * 267  `Andreas Müller`_
+ *  94  `Gilles Louppe`_
+ *  89  `Gael Varoquaux`_
+ *  79  `Peter Prettenhofer`_
+ *  60  `Mathieu Blondel`_
+ *  57  `Alexandre Gramfort`_
+ *  52  `Vlad Niculae`_
+ *  45  `Lars Buitinck`_
+ *  44  Nelle Varoquaux
+ *  37  `Jaques Grobler`_
+ *  30  Alexis Mignon
+ *  30  Immanuel Bayer
+ *  27  `Olivier Grisel`_
+ *  16  Subhodeep Moitra
+ *  13  Yannick Schwartz
+ *  12  :user:`@kernc <kernc>`
+ *  11  :user:`Virgile Fritsch <VirgileFritsch>`
+ *   9  Daniel Duckworth
+ *   9  `Fabian Pedregosa`_
+ *   9  `Robert Layton`_
+ *   8  John Benediktsson
+ *   7  Marko Burjek
+ *   5  `Nicolas Pinto`_
+ *   4  Alexandre Abraham
+ *   4  `Jake Vanderplas`_
+ *   3  `Brian Holt`_
+ *   3  `Edouard Duchesnay`_
+ *   3  Florian Hoenig
+ *   3  flyingimmidev
+ *   2  Francois Savard
+ *   2  Hannes Schulz
+ *   2  Peter Welinder
+ *   2  `Yaroslav Halchenko`_
+ *   2  Wei Li
+ *   1  Alex Companioni
+ *   1  Brandyn A. White
+ *   1  Bussonnier Matthias
+ *   1  Charles-Pierre Astolfi
+ *   1  Dan O'Huiginn
+ *   1  David Cournapeau
+ *   1  Keith Goodman
+ *   1  Ludwig Schwardt
+ *   1  Olivier Hervieu
+ *   1  Sergio Medina
+ *   1  Shiqiao Du
+ *   1  Tim Sheerman-Chase
+ *   1  buguen
+
+
+
+.. _changes_0_11:
+
+Version 0.11
+============
+
+**May 7, 2012**
+
+Changelog
+---------
+
+Highlights
+.............
+
+- Gradient boosted regression trees (:ref:`gradient_boosting`)
+  for classification and regression by `Peter Prettenhofer`_
+  and `Scott White`_ .
+
+- Simple dict-based feature loader with support for categorical variables
+  (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
+
+- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
+  and added macro and micro average options to
+  :func:`metrics.precision_score`, :func:`metrics.recall_score` and
+  :func:`metrics.f1_score` by `Satrajit Ghosh`_.
+
+- :ref:`out_of_bag` of generalization error for :ref:`ensemble`
+  by `Andreas Müller`_.
+
+- Randomized sparse linear models for feature
+  selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
+
+- :ref:`label_propagation` for semi-supervised learning, by Clay
+  Woolam. **Note** the semi-supervised API is still work in progress,
+  and may change.
+
+- Added BIC/AIC model selection to classical :ref:`gmm` and unified
+  the API with the remainder of scikit-learn, by `Bertrand Thirion`_
+
+- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
+  a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
+  by Yannick Schwartz.
+
+- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
+  ``shrink_threshold`` parameter, which implements **shrunken centroid
+  classification**, by `Robert Layton`_.
+
+Other changes
+..............
+
+- Merged dense and sparse implementations of :ref:`sgd` module and
+  exposed utility extension types for sequential
+  datasets ``seq_dataset`` and weight vectors ``weight_vector``
+  by `Peter Prettenhofer`_.
+
+- Added ``partial_fit`` (support for online/minibatch learning) and
+  warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
+
+- Dense and sparse implementations of :ref:`svm` classes and
+  :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
+
+- Regressors can now be used as base estimator in the :ref:`multiclass`
+  module by `Mathieu Blondel`_.
+
+- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
+  by `Mathieu Blondel`_.
+
+- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
+  to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
+
+- Improved :ref:`cross_validation` and :ref:`grid_search` documentation
+  and introduced the new :func:`cross_validation.train_test_split`
+  helper function by `Olivier Grisel`_
+
+- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
+  consistency with ``decision_function``; for ``kernel==linear``,
+  ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
+
+- Performance improvements to efficient leave-one-out cross-validated
+  Ridge regression, esp. for the ``n_samples > n_features`` case, in
+  :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
+
+- Refactoring and simplification of the :ref:`text_feature_extraction`
+  API and fixed a bug that caused possible negative IDF,
+  by `Olivier Grisel`_.
+
+- Beam pruning option in :class:`_BaseHMM` module has been removed since it
+  is difficult to Cythonize. If you are interested in contributing a Cython
+  version, you can use the python version in the git history as a reference.
+
+- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
+  nearest neighbors searches. The metric can be specified by argument ``p``.
+
+API changes summary
+-------------------
+
+- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
+  instead.
+
+- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
+  :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
+  :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
+  and/or :class:`RadiusNeighborsRegressor` instead.
+
+- Sparse classes in the :ref:`sgd` module are now deprecated.
+
+- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
+  parameters must be passed to an object when initialising it and not through
+  ``fit``. Now ``fit`` will only accept the data as an input parameter.
+
+- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
+  ``sample`` and ``score`` or ``predict`` should be used instead.
+
+- attribute ``_scores`` and ``_pvalues`` in univariate feature selection
+  objects are now deprecated.
+  ``scores_`` or ``pvalues_`` should be used instead.
+
+- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
+  :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
+  parameter, not a parameter to fit. This makes grid searches
+  over this parameter possible.
+
+- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
+  consistent with the Olivetti faces dataset. Use ``images`` and
+  ``pairs`` attribute to access the natural images shapes instead.
+
+- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
+  changed.  Options now are ``'ovr'`` and ``'crammer_singer'``, with
+  ``'ovr'`` being the default.  This does not change the default behavior
+  but hopefully is less confusing.
+
+- Class :class:`feature_selection.text.Vectorizer` is deprecated and
+  replaced by :class:`feature_selection.text.TfidfVectorizer`.
+
+- The preprocessor / analyzer nested structure for text feature
+  extraction has been removed. All those features are
+  now directly passed as flat constructor arguments
+  to :class:`feature_selection.text.TfidfVectorizer` and
+  :class:`feature_selection.text.CountVectorizer`, in particular the
+  following parameters are now used:
+
+- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
+  analysis scheme, or use a specific python callable (as previously).
+
+- ``tokenizer`` and ``preprocessor`` have been introduced to make it
+  still possible to customize those steps with the new API.
+
+- ``input`` explicitly control how to interpret the sequence passed to
+  ``fit`` and ``predict``: filenames, file objects or direct (byte or
+  Unicode) strings.
+
+- charset decoding is explicit and strict by default.
+
+- the ``vocabulary``, fitted or not is now stored in the
+  ``vocabulary_`` attribute to be consistent with the project
+  conventions.
+
+- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
+  from :class:`feature_selection.text.CountVectorizer` to make grid
+  search trivial.
+
+- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
+  ``sample`` should be used instead.
+
+- Beam pruning option in :class:`_BaseHMM` module is removed since it is
+  difficult to be Cythonized. If you are interested, you can look in the
+  history codes by git.
+
+- The SVMlight format loader now supports files with both zero-based and
+  one-based column indices, since both occur "in the wild".
+
+- Arguments in class :class:`ShuffleSplit` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
+  ``train_fraction`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
+
+- Arguments in class :class:`Bootstrap` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
+  ``n_train`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
+
+- Argument ``p`` added to classes in :ref:`neighbors` to specify an
+  arbitrary Minkowski metric for nearest neighbors searches.
+
+
+People
+------
+   * 282  `Andreas Müller`_
+   * 239  `Peter Prettenhofer`_
+   * 198  `Gael Varoquaux`_
+   * 129  `Olivier Grisel`_
+   * 114  `Mathieu Blondel`_
+   * 103  Clay Woolam
+   *  96  `Lars Buitinck`_
+   *  88  `Jaques Grobler`_
+   *  82  `Alexandre Gramfort`_
+   *  50  `Bertrand Thirion`_
+   *  42  `Robert Layton`_
+   *  28  flyingimmidev
+   *  26  `Jake Vanderplas`_
+   *  26  Shiqiao Du
+   *  21  `Satrajit Ghosh`_
+   *  17  `David Marek`_
+   *  17  `Gilles Louppe`_
+   *  14  `Vlad Niculae`_
+   *  11  Yannick Schwartz
+   *  10  `Fabian Pedregosa`_
+   *   9  fcostin
+   *   7  Nick Wilson
+   *   5  Adrien Gaidon
+   *   5  `Nicolas Pinto`_
+   *   4  `David Warde-Farley`_
+   *   5  Nelle Varoquaux
+   *   5  Emmanuelle Gouillart
+   *   3  Joonas Sillanpää
+   *   3  Paolo Losi
+   *   2  Charles McCarthy
+   *   2  Roy Hyunjin Han
+   *   2  Scott White
+   *   2  ibayer
+   *   1  Brandyn White
+   *   1  Carlos Scheidegger
+   *   1  Claire Revillet
+   *   1  Conrad Lee
+   *   1  `Edouard Duchesnay`_
+   *   1  Jan Hendrik Metzen
+   *   1  Meng Xinfan
+   *   1  `Rob Zinkov`_
+   *   1  Shiqiao
+   *   1  Udi Weinsberg
+   *   1  Virgile Fritsch
+   *   1  Xinfan Meng
+   *   1  Yaroslav Halchenko
+   *   1  jansoe
+   *   1  Leon Palafox
+
+
+.. _changes_0_10:
+
+Version 0.10
+============
+
+**January 11, 2012**
+
+Changelog
+---------
+
+- Python 2.5 compatibility was dropped; the minimum Python version needed
+  to use scikit-learn is now 2.6.
+
+- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
+  associated cross-validated estimator, by `Gael Varoquaux`_
+
+- New :ref:`Tree <tree>` module by `Brian Holt`_, `Peter Prettenhofer`_,
+  `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
+  documentation and examples.
+
+- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
+
+- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
+
+- Faster tests by `Fabian Pedregosa`_ and others.
+
+- Silhouette Coefficient cluster analysis evaluation metric added as
+  :func:`sklearn.metrics.silhouette_score` by Robert Layton.
+
+- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
+  the clustering algorithm used to be run ``n_init`` times but the last
+  solution was retained instead of the best solution by `Olivier Grisel`_.
+
+- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
+  predict methods; Enhanced test time performance by converting model
+  parameters to fortran-style arrays after fitting (only multi-class).
+
+- Adjusted Mutual Information metric added as
+  :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
+
+- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
+  now support scaling of C regularization parameter by the number of
+  samples by `Alexandre Gramfort`_.
+
+- New :ref:`Ensemble Methods <ensemble>` module by `Gilles Louppe`_ and
+  `Brian Holt`_. The module comes with the random forest algorithm and the
+  extra-trees method, along with documentation and examples.
+
+- :ref:`outlier_detection`: outlier and novelty detection, by
+  :user:`Virgile Fritsch <VirgileFritsch>`.
+
+- :ref:`kernel_approximation`: a transform implementing kernel
+  approximation for fast SGD on non-linear kernels by
+  `Andreas Müller`_.
+
+- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
+
+- :ref:`SparseCoder` by `Vlad Niculae`_.
+
+- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
+
+- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
+
+- Improved documentation for developers and for the :mod:`sklearn.utils`
+  module, by `Jake Vanderplas`_.
+
+- Vectorized 20newsgroups dataset loader
+  (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
+  `Mathieu Blondel`_.
+
+- :ref:`multiclass` by `Lars Buitinck`_.
+
+- Utilities for fast computation of mean and variance for sparse matrices
+  by `Mathieu Blondel`_.
+
+- Make :func:`sklearn.preprocessing.scale` and
+  :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
+  `Olivier Grisel`_
+
+- Feature importances using decision trees and/or forest of trees,
+  by `Gilles Louppe`_.
+
+- Parallel implementation of forests of randomized trees by
+  `Gilles Louppe`_.
+
+- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
+  sets as well as the test sets by `Olivier Grisel`_.
+
+- Errors in the build of the documentation fixed by `Andreas Müller`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.9:
+
+- Some estimators that may overwrite their inputs to save memory previously
+  had ``overwrite_`` parameters; these have been replaced with ``copy_``
+  parameters with exactly the opposite meaning.
+
+  This particularly affects some of the estimators in :mod:`linear_model`.
+  The default behavior is still to copy everything passed in.
+
+- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
+  longer supports loading two files at once; use ``load_svmlight_files``
+  instead. Also, the (unused) ``buffer_mb`` parameter is gone.
+
+- Sparse estimators in the :ref:`sgd` module use dense parameter vector
+  ``coef_`` instead of ``sparse_coef_``. This significantly improves
+  test time performance.
+
+- The :ref:`covariance` module now has a robust estimator of
+  covariance, the Minimum Covariance Determinant estimator.
+
+- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
+  but the changes are backwards compatible. They have been moved to the
+  :mod:`metrics.cluster.supervised`, along with
+  :mod:`metrics.cluster.unsupervised` which contains the Silhouette
+  Coefficient.
+
+- The ``permutation_test_score`` function now behaves the same way as
+  ``cross_val_score`` (i.e. uses the mean score across the folds.)
+
+- Cross Validation generators now use integer indices (``indices=True``)
+  by default instead of boolean masks. This make it more intuitive to
+  use with sparse matrix data.
+
+- The functions used for sparse coding, ``sparse_encode`` and
+  ``sparse_encode_parallel`` have been combined into
+  :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
+  have been transposed for consistency with the matrix factorization setting,
+  as opposed to the regression setting.
+
+- Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
+  files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
+  re-generated. (They should continue to work, but accidentally had one
+  extra column of zeros prepended.)
+
+- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
+
+- :func:`sklearn.utils.extmath.fast_svd` has been renamed
+  :func:`sklearn.utils.extmath.randomized_svd` and the default
+  oversampling is now fixed to 10 additional random vectors instead
+  of doubling the number of components to extract. The new behavior
+  follows the reference paper.
+
+
+People
+------
+
+The following people contributed to scikit-learn since last release:
+
+   * 246  `Andreas Müller`_
+   * 242  `Olivier Grisel`_
+   * 220  `Gilles Louppe`_
+   * 183  `Brian Holt`_
+   * 166  `Gael Varoquaux`_
+   * 144  `Lars Buitinck`_
+   *  73  `Vlad Niculae`_
+   *  65  `Peter Prettenhofer`_
+   *  64  `Fabian Pedregosa`_
+   *  60  Robert Layton
+   *  55  `Mathieu Blondel`_
+   *  52  `Jake Vanderplas`_
+   *  44  Noel Dawe
+   *  38  `Alexandre Gramfort`_
+   *  24  :user:`Virgile Fritsch <VirgileFritsch>`
+   *  23  `Satrajit Ghosh`_
+   *   3  Jan Hendrik Metzen
+   *   3  Kenneth C. Arnold
+   *   3  Shiqiao Du
+   *   3  Tim Sheerman-Chase
+   *   3  `Yaroslav Halchenko`_
+   *   2  Bala Subrahmanyam Varanasi
+   *   2  DraXus
+   *   2  Michael Eickenberg
+   *   1  Bogdan Trach
+   *   1  Félix-Antoine Fortin
+   *   1  Juan Manuel Caicedo Carvajal
+   *   1  Nelle Varoquaux
+   *   1  `Nicolas Pinto`_
+   *   1  Tiziano Zito
+   *   1  Xinfan Meng
+
+
+
+.. _changes_0_9:
+
+Version 0.9
+===========
+
+**September 21, 2011**
+
+scikit-learn 0.9 was released on September 2011, three months after the 0.8
+release and includes the new modules :ref:`manifold`, :ref:`dirichlet_process`
+as well as several new algorithms and documentation improvements.
+
+This release also includes the dictionary-learning work developed by
+`Vlad Niculae`_ as part of the `Google Summer of Code
+<https://developers.google.com/open-source/gsoc>`_ program.
+
+
+
+.. |banner1| image:: ../auto_examples/manifold/images/thumb/sphx_glr_plot_compare_methods_thumb.png
+   :target: ../auto_examples/manifold/plot_compare_methods.html
+
+.. |banner2| image:: ../auto_examples/linear_model/images/thumb/sphx_glr_plot_omp_thumb.png
+   :target: ../auto_examples/linear_model/plot_omp.html
+
+.. |banner3| image:: ../auto_examples/decomposition/images/thumb/sphx_glr_plot_kernel_pca_thumb.png
+   :target: ../auto_examples/decomposition/plot_kernel_pca.html
+
+.. |center-div| raw:: html
+
+    <div style="text-align: center; margin: 0px 0 -5px 0;">
+
+.. |end-div| raw:: html
+
+    </div>
+
+
+|center-div| |banner2| |banner1| |banner3| |end-div|
+
+Changelog
+---------
+
+- New :ref:`manifold` module by `Jake Vanderplas`_ and
+  `Fabian Pedregosa`_.
+
+- New :ref:`Dirichlet Process <dirichlet_process>` Gaussian Mixture
+  Model by `Alexandre Passos`_
+
+- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
+  general refactoring, support for sparse matrices in input, speed and
+  documentation improvements. See the next section for a full list of API
+  changes.
+
+- Improvements on the :ref:`feature_selection` module by
+  `Gilles Louppe`_ : refactoring of the RFE classes, documentation
+  rewrite, increased efficiency and minor API changes.
+
+- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
+  `Alexandre Gramfort`_
+
+- Printing an estimator now behaves independently of architectures
+  and Python version thanks to :user:`Jean Kossaifi <JeanKossaifi>`.
+
+- :ref:`Loader for libsvm/svmlight format <libsvm_loader>` by
+  `Mathieu Blondel`_ and `Lars Buitinck`_
+
+- Documentation improvements: thumbnails in
+  example gallery by `Fabian Pedregosa`_.
+
+- Important bugfixes in :ref:`svm` module (segfaults, bad
+  performance) by `Fabian Pedregosa`_.
+
+- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
+  by `Lars Buitinck`_
+
+- Text feature extraction optimizations by Lars Buitinck
+
+- Chi-Square feature selection
+  (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
+
+- :ref:`sample_generators` module refactoring by `Gilles Louppe`_
+
+- :ref:`multiclass` by `Mathieu Blondel`_
+
+- Ball tree rewrite by `Jake Vanderplas`_
+
+- Implementation of :ref:`dbscan` algorithm by Robert Layton
+
+- Kmeans predict and transform by Robert Layton
+
+- Preprocessing module refactoring by `Olivier Grisel`_
+
+- Faster mean shift by Conrad Lee
+
+- New ``Bootstrap``, :ref:`ShuffleSplit` and various other
+  improvements in cross validation schemes by `Olivier Grisel`_ and
+  `Gael Varoquaux`_
+
+- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
+
+- Added :class:`Orthogonal Matching Pursuit <linear_model.OrthogonalMatchingPursuit>` by `Vlad Niculae`_
+
+- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
+
+- Implementation of :class:`linear_model.LassoLarsCV`
+  (cross-validated Lasso solver using the Lars algorithm) and
+  :class:`linear_model.LassoLarsIC` (BIC/AIC model
+  selection in Lars) by `Gael Varoquaux`_
+  and `Alexandre Gramfort`_
+
+- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
+
+- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
+
+- :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
+
+- :ref:`mldata` utilities by Pietro Berkes.
+
+- :ref:`olivetti_faces` by `David Warde-Farley`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.8:
+
+- The ``scikits.learn`` package was renamed ``sklearn``. There is
+  still a ``scikits.learn`` package alias for backward compatibility.
+
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
+
+      find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g'
+
+- Estimators no longer accept model parameters as ``fit`` arguments:
+  instead all parameters must be only be passed as constructor
+  arguments or using the now public ``set_params`` method inherited
+  from :class:`base.BaseEstimator`.
+
+  Some estimators can still accept keyword arguments on the ``fit``
+  but this is restricted to data-dependent values (e.g. a Gram matrix
+  or an affinity matrix that are precomputed from the ``X`` data matrix.
+
+- The ``cross_val`` package has been renamed to ``cross_validation``
+  although there is also a ``cross_val`` package alias in place for
+  backward compatibility.
+
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
+
+      find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g'
+
+- The ``score_func`` argument of the
+  ``sklearn.cross_validation.cross_val_score`` function is now expected
+  to accept ``y_test`` and ``y_predicted`` as only arguments for
+  classification and regression tasks or ``X_test`` for unsupervised
+  estimators.
+
+- ``gamma`` parameter for support vector machine algorithms is set
+  to ``1 / n_features`` by default, instead of ``1 / n_samples``.
+
+- The ``sklearn.hmm`` has been marked as orphaned: it will be removed
+  from scikit-learn in version 0.11 unless someone steps up to
+  contribute documentation, examples and fix lurking numerical
+  stability issues.
+
+- ``sklearn.neighbors`` has been made into a submodule.  The two previously
+  available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
+  have been marked as deprecated.  Their functionality has been divided
+  among five new classes: ``NearestNeighbors`` for unsupervised neighbors
+  searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
+  for supervised classification problems, and ``KNeighborsRegressor``
+  & ``RadiusNeighborsRegressor`` for supervised regression problems.
+
+- ``sklearn.ball_tree.BallTree`` has been moved to
+  ``sklearn.neighbors.BallTree``.  Using the former will generate a warning.
+
+- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
+  LassoLARSCV, etc.) have been renamed to
+  ``sklearn.linear_model.Lars()``.
+
+- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
+  parameter, which by default is None. If not given, the result is the distance
+  (or kernel similarity) between each sample in Y. If given, the result is the
+  pairwise distance (or kernel similarity) between samples in X to Y.
+
+- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
+  and by default returns the pairwise distance. For the component wise distance,
+  set the parameter ``sum_over_features`` to ``False``.
+
+Backward compatibility package aliases and other deprecated classes and
+functions will be removed in version 0.11.
+
+
+People
+------
+
+38 people contributed to this release.
+
+- 387  `Vlad Niculae`_
+- 320  `Olivier Grisel`_
+- 192  `Lars Buitinck`_
+- 179  `Gael Varoquaux`_
+- 168  `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
+- 127  `Jake Vanderplas`_
+- 120  `Mathieu Blondel`_
+- 85  `Alexandre Passos`_
+- 67  `Alexandre Gramfort`_
+- 57  `Peter Prettenhofer`_
+- 56  `Gilles Louppe`_
+- 42  Robert Layton
+- 38  Nelle Varoquaux
+- 32  :user:`Jean Kossaifi <JeanKossaifi>`
+- 30  Conrad Lee
+- 22  Pietro Berkes
+- 18  andy
+- 17  David Warde-Farley
+- 12  Brian Holt
+- 11  Robert
+- 8  Amit Aides
+- 8  :user:`Virgile Fritsch <VirgileFritsch>`
+- 7  `Yaroslav Halchenko`_
+- 6  Salvatore Masecchia
+- 5  Paolo Losi
+- 4  Vincent Schut
+- 3  Alexis Metaireau
+- 3  Bryan Silverthorn
+- 3  `Andreas Müller`_
+- 2  Minwoo Jake Lee
+- 1  Emmanuelle Gouillart
+- 1  Keith Goodman
+- 1  Lucas Wiman
+- 1  `Nicolas Pinto`_
+- 1  Thouis (Ray) Jones
+- 1  Tim Sheerman-Chase
+
+
+.. _changes_0_8:
+
+Version 0.8
+===========
+
+**May 11, 2011**
+
+scikit-learn 0.8 was released on May 2011, one month after the first
+"international" `scikit-learn coding sprint
+<https://github.com/scikit-learn/scikit-learn/wiki/Upcoming-events>`_ and is
+marked by the inclusion of important modules: :ref:`hierarchical_clustering`,
+:ref:`cross_decomposition`, :ref:`NMF`, initial support for Python 3 and by important
+enhancements and bug fixes.
+
+
+Changelog
+---------
+
+Several new modules where introduced during this release:
+
+- New :ref:`hierarchical_clustering` module by Vincent Michel,
+  `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
+
+- :ref:`kernel_pca` implementation by `Mathieu Blondel`_
+
+- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
+
+- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
+
+- :ref:`NMF` module `Vlad Niculae`_
+
+- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
+  :user:`Virgile Fritsch <VirgileFritsch>` in the :ref:`covariance` module.
+
+
+Some other modules benefited from significant improvements or cleanups.
+
+
+- Initial support for Python 3: builds and imports cleanly,
+  some modules are usable while others have failing tests by `Fabian Pedregosa`_.
+
+- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
+
+- Guide :ref:`performance-howto` by `Olivier Grisel`_.
+
+- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
+
+- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
+
+- Add attribute converged to Gaussian Mixture Models by Vincent Schut.
+
+- Implemented ``transform``, ``predict_log_proba`` in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
+
+- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
+  `Gael Varoquaux`_ and Amit Aides.
+
+- Refactored SGD module (removed code duplication, better variable naming),
+  added interface for sample weight by `Peter Prettenhofer`_.
+
+- Wrapped BallTree with Cython by Thouis (Ray) Jones.
+
+- Added function :func:`svm.l1_min_c` by Paolo Losi.
+
+- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
+  `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
+  `Fabian Pedregosa`_.
+
+
+People
+-------
+
+People that made this release possible preceded by number of commits:
+
+
+- 159  `Olivier Grisel`_
+- 96  `Gael Varoquaux`_
+- 96  `Vlad Niculae`_
+- 94  `Fabian Pedregosa`_
+- 36  `Alexandre Gramfort`_
+- 32  Paolo Losi
+- 31  `Edouard Duchesnay`_
+- 30  `Mathieu Blondel`_
+- 25  `Peter Prettenhofer`_
+- 22  `Nicolas Pinto`_
+- 11  :user:`Virgile Fritsch <VirgileFritsch>`
+   -  7  Lars Buitinck
+   -  6  Vincent Michel
+   -  5  `Bertrand Thirion`_
+   -  4  Thouis (Ray) Jones
+   -  4  Vincent Schut
+   -  3  Jan Schlüter
+   -  2  Julien Miotte
+   -  2  `Matthieu Perrot`_
+   -  2  Yann Malet
+   -  2  `Yaroslav Halchenko`_
+   -  1  Amit Aides
+   -  1  `Andreas Müller`_
+   -  1  Feth Arezki
+   -  1  Meng Xinfan
+
+
+.. _changes_0_7:
+
+Version 0.7
+===========
+
+**March 2, 2011**
+
+scikit-learn 0.7 was released in March 2011, roughly three months
+after the 0.6 release. This release is marked by the speed
+improvements in existing algorithms like k-Nearest Neighbors and
+K-Means algorithm and by the inclusion of an efficient algorithm for
+computing the Ridge Generalized Cross Validation solution. Unlike the
+preceding release, no new modules where added to this release.
+
+Changelog
+---------
+
+- Performance improvements for Gaussian Mixture Model sampling [Jan
+  Schlüter].
+
+- Implementation of efficient leave-one-out cross-validated Ridge in
+  :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
+
+- Better handling of collinearity and early stopping in
+  :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
+  Pedregosa`_].
+
+- Fixes for liblinear ordering of labels and sign of coefficients
+  [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
+
+- Performance improvements for Nearest Neighbors algorithm in
+  high-dimensional spaces [`Fabian Pedregosa`_].
+
+- Performance improvements for :class:`cluster.KMeans` [`Gael
+  Varoquaux`_ and `James Bergstra`_].
+
+- Sanity checks for SVM-based classes [`Mathieu Blondel`_].
+
+- Refactoring of :class:`neighbors.NeighborsClassifier` and
+  :func:`neighbors.kneighbors_graph`: added different algorithms for
+  the k-Nearest Neighbor Search and implemented a more stable
+  algorithm for finding barycenter weights. Also added some
+  developer documentation for this module, see
+  `notes_neighbors
+  <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
+
+- Documentation improvements: Added :class:`pca.RandomizedPCA` and
+  :class:`linear_model.LogisticRegression` to the class
+  reference. Also added references of matrices used for clustering
+  and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
+  Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
+  Gouillart]
+
+- Binded decision_function in classes that make use of liblinear_,
+  dense and sparse variants, like :class:`svm.LinearSVC` or
+  :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
+
+- Performance and API improvements to
+  :func:`metrics.euclidean_distances` and to
+  :class:`pca.RandomizedPCA` [`James Bergstra`_].
+
+- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
+
+- Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
+  [`Ron Weiss`_].
+
+- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+- 85  `Fabian Pedregosa`_
+- 67  `Mathieu Blondel`_
+- 20  `Alexandre Gramfort`_
+- 19  `James Bergstra`_
+- 14  Dan Yamins
+- 13  `Olivier Grisel`_
+- 12  `Gael Varoquaux`_
+- 4  `Edouard Duchesnay`_
+- 4  `Ron Weiss`_
+- 2  Satrajit Ghosh
+- 2  Vincent Dubourg
+- 1  Emmanuelle Gouillart
+- 1  Kamel Ibn Hassen Derouiche
+- 1  Paolo Losi
+- 1  VirgileFritsch
+- 1  `Yaroslav Halchenko`_
+- 1  Xinfan Meng
+
+
+.. _changes_0_6:
+
+Version 0.6
+===========
+
+**December 21, 2010**
+
+scikit-learn 0.6 was released on December 2010. It is marked by the
+inclusion of several new modules and a general renaming of old
+ones. It is also marked by the inclusion of new example, including
+applications to real-world datasets.
+
+
+Changelog
+---------
+
+- New `stochastic gradient
+  <http://scikit-learn.org/stable/modules/sgd.html>`_ descent
+  module by Peter Prettenhofer. The module comes with complete
+  documentation and examples.
+
+- Improved svm module: memory consumption has been reduced by 50%,
+  heuristic to automatically set class weights, possibility to
+  assign weights to samples (see
+  :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
+
+- New :ref:`gaussian_process` module by Vincent Dubourg. This module
+  also has great documentation and some very neat examples. See
+  example_gaussian_process_plot_gp_regression.py or
+  example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
+  for a taste of what can be done.
+
+- It is now possible to use liblinear’s Multi-class SVC (option
+  multi_class in :class:`svm.LinearSVC`)
+
+- New features and performance improvements of text feature
+  extraction.
+
+- Improved sparse matrix support, both in main classes
+  (:class:`grid_search.GridSearchCV`) as in modules
+  sklearn.svm.sparse and sklearn.linear_model.sparse.
+
+- Lots of cool new examples and a new section that uses real-world
+  datasets was created. These include:
+  :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
+  :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
+  :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
+  :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
+  others.
+
+- Faster :ref:`least_angle_regression` algorithm. It is now 2x
+  faster than the R version on worst case and up to 10x times faster
+  on some cases.
+
+- Faster coordinate descent algorithm. In particular, the full path
+  version of lasso (:func:`linear_model.lasso_path`) is more than
+  200x times faster than before.
+
+- It is now possible to get probability estimates from a
+  :class:`linear_model.LogisticRegression` model.
+
+- module renaming: the glm module has been renamed to linear_model,
+  the gmm module has been included into the more general mixture
+  model and the sgd module has been included in linear_model.
+
+- Lots of bug fixes and documentation improvements.
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+   * 207  `Olivier Grisel`_
+
+   * 167 `Fabian Pedregosa`_
+
+   * 97 `Peter Prettenhofer`_
+
+   * 68 `Alexandre Gramfort`_
+
+   * 59  `Mathieu Blondel`_
+
+   * 55  `Gael Varoquaux`_
+
+   * 33  Vincent Dubourg
+
+   * 21  `Ron Weiss`_
+
+   * 9  Bertrand Thirion
+
+   * 3  `Alexandre Passos`_
+
+   * 3  Anne-Laure Fouque
+
+   * 2  Ronan Amicel
+
+   * 1 `Christian Osendorfer`_
+
+
+
+.. _changes_0_5:
+
+
+Version 0.5
+===========
+
+**October 11, 2010**
+
+Changelog
+---------
+
+New classes
+-----------
+
+- Support for sparse matrices in some classifiers of modules
+  ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
+  :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
+  :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
+
+- New :class:`pipeline.Pipeline` object to compose different estimators.
+
+- Recursive Feature Elimination routines in module
+  :ref:`feature_selection`.
+
+- Addition of various classes capable of cross validation in the
+  linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
+  etc.).
+
+- New, more efficient LARS algorithm implementation. The Lasso
+  variant of the algorithm is also implemented. See
+  :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
+  :class:`linear_model.LassoLars`.
+
+- New Hidden Markov Models module (see classes
+  :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
+  :class:`hmm.GMMHMM`)
+
+- New module feature_extraction (see :ref:`class reference
+  <feature_extraction_ref>`)
+
+- New FastICA algorithm in module sklearn.fastica
+
+
+Documentation
+-------------
+
+- Improved documentation for many modules, now separating
+  narrative documentation from the class reference. As an example,
+  see `documentation for the SVM module
+  <http://scikit-learn.org/stable/modules/svm.html>`_ and the
+  complete `class reference
+  <http://scikit-learn.org/stable/modules/classes.html>`_.
+
+Fixes
+-----
+
+- API changes: adhere variable names to PEP-8, give more
+  meaningful names.
+
+- Fixes for svm module to run on a shared memory context
+  (multiprocessing).
+
+- It is again possible to generate latex (and thus PDF) from the
+  sphinx docs.
+
+Examples
+--------
+
+- new examples using some of the mlcomp datasets:
+  ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
+  :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
+
+- Many more examples. `See here
+  <http://scikit-learn.org/stable/auto_examples/index.html>`_
+  the full list of examples.
+
+
+External dependencies
+---------------------
+
+- Joblib is now a dependency of this package, although it is
+  shipped with (sklearn.externals.joblib).
+
+Removed modules
+---------------
+
+- Module ann (Artificial Neural Networks) has been removed from
+  the distribution. Users wanting this sort of algorithms should
+  take a look into pybrain.
+
+Misc
+----
+
+- New sphinx theme for the web page.
+
+
+Authors
+-------
+
+The following is a list of authors for this release, preceded by
+number of commits:
+
+     * 262  Fabian Pedregosa
+     * 240  Gael Varoquaux
+     * 149  Alexandre Gramfort
+     * 116  Olivier Grisel
+     *  40  Vincent Michel
+     *  38  Ron Weiss
+     *  23  Matthieu Perrot
+     *  10  Bertrand Thirion
+     *   7  Yaroslav Halchenko
+     *   9  VirgileFritsch
+     *   6  Edouard Duchesnay
+     *   4  Mathieu Blondel
+     *   1  Ariel Rokem
+     *   1  Matthieu Brucher
+
+Version 0.4
+===========
+
+**August 26, 2010**
+
+Changelog
+---------
+
+Major changes in this release include:
+
+- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
+  speed improvements (roughly 100x times faster).
+
+- Coordinate Descent Refactoring (and bug fixing) for consistency
+  with R's package GLMNET.
+
+- New metrics module.
+
+- New GMM module contributed by Ron Weiss.
+
+- Implementation of the LARS algorithm (without Lasso variant for now).
+
+- feature_selection module redesign.
+
+- Migration to GIT as version control system.
+
+- Removal of obsolete attrselect module.
+
+- Rename of private compiled extensions (added underscore).
+
+- Removal of legacy unmaintained code.
+
+- Documentation improvements (both docstring and rst).
+
+- Improvement of the build system to (optionally) link with MKL.
+  Also, provide a lite BLAS implementation in case no system-wide BLAS is
+  found.
+
+- Lots of new examples.
+
+- Many, many bug fixes ...
+
+
+Authors
+-------
+
+The committer list for this release is the following (preceded by number
+of commits):
+
+    * 143  Fabian Pedregosa
+    * 35  Alexandre Gramfort
+    * 34  Olivier Grisel
+    * 11  Gael Varoquaux
+    *  5  Yaroslav Halchenko
+    *  2  Vincent Michel
+    *  1  Chris Filo Gorgolewski
+
+
+Earlier versions
+================
+
+Earlier versions included contributions by Fred Mailhot, David Cooke,
+David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
+
diff --git a/doc/whats_new/v0.13.rst b/doc/whats_new/v0.13.rst
new file mode 100644
index 0000000000000..c234cd6eb2a37
--- /dev/null
+++ b/doc/whats_new/v0.13.rst
@@ -0,0 +1,391 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_13_1:
+
+Version 0.13.1
+==============
+
+**February 23, 2013**
+
+The 0.13.1 release only fixes some bugs and does not add any new functionality.
+
+Changelog
+---------
+
+- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
+  interpreted as a test by `Yaroslav Halchenko`_.
+
+- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
+  by `Gael Varoquaux`_.
+
+- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
+
+- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
+
+- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
+
+- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
+
+- Other small improvements to tests and documentation.
+
+People
+------
+List of contributors for release 0.13.1 by number of commits.
+ * 16  `Lars Buitinck`_
+ * 12  `Andreas Müller`_
+ *  8  `Gael Varoquaux`_
+ *  5  Robert Marchman
+ *  3  `Peter Prettenhofer`_
+ *  2  Hrishikesh Huilgolkar
+ *  1  Bastiaan van den Berg
+ *  1  Diego Molla
+ *  1  `Gilles Louppe`_
+ *  1  `Mathieu Blondel`_
+ *  1  `Nelle Varoquaux`_
+ *  1  Rafael Cunha de Almeida
+ *  1  Rolando Espinoza La fuente
+ *  1  `Vlad Niculae`_
+ *  1  `Yaroslav Halchenko`_
+
+
+.. _changes_0_13:
+
+Version 0.13
+============
+
+**January 21, 2013**
+
+New Estimator Classes
+---------------------
+
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
+  data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
+  your estimators. See :ref:`dummy_estimators` in the user guide.
+  Multioutput support added by `Arnaud Joly`_.
+
+- :class:`decomposition.FactorAnalysis`, a transformer implementing the
+  classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
+  Gramfort`_. See :ref:`FA` in the user guide.
+
+- :class:`feature_extraction.FeatureHasher`, a transformer implementing the
+  "hashing trick" for fast, low-memory feature extraction from string fields
+  by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
+  for text documents by `Olivier Grisel`_  See :ref:`feature_hashing` and
+  :ref:`hashing_vectorizer` for the documentation and sample usage.
+
+- :class:`pipeline.FeatureUnion`, a transformer that concatenates
+  results of several other transformers by `Andreas Müller`_. See
+  :ref:`feature_union` in the user guide.
+
+- :class:`random_projection.GaussianRandomProjection`,
+  :class:`random_projection.SparseRandomProjection` and the function
+  :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
+  transformers implementing Gaussian and sparse random projection matrix
+  by `Olivier Grisel`_ and `Arnaud Joly`_.
+  See :ref:`random_projection` in the user guide.
+
+- :class:`kernel_approximation.Nystroem`, a transformer for approximating
+  arbitrary kernels by `Andreas Müller`_. See
+  :ref:`nystroem_kernel_approx` in the user guide.
+
+- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
+  encodings of categorical features by `Andreas Müller`_. See
+  :ref:`preprocessing_categorical_features` in the user guide.
+
+- :class:`linear_model.PassiveAggressiveClassifier` and
+  :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
+  an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
+  `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
+  guide.
+
+- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
+  sparse representations using ensembles of totally random trees by  `Andreas Müller`_.
+  See :ref:`random_trees_embedding` in the user guide.
+
+- :class:`manifold.SpectralEmbedding` and function
+  :func:`manifold.spectral_embedding`, implementing the "laplacian
+  eigenmaps" transformation for non-linear dimensionality reduction by Wei
+  Li. See :ref:`spectral_embedding` in the user guide.
+
+- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
+  and `Nelle Varoquaux`_,
+
+
+Changelog
+---------
+
+- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
+  option for normalized output that reports the fraction of
+  misclassifications, rather than the raw number of misclassifications. By
+  Kyle Beauchamp.
+
+- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
+  support sample weighting, by `Noel Dawe`_  and `Gilles Louppe`_.
+
+- Speedup improvement when using bootstrap samples in forests of randomized
+  trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
+
+- Partial dependence plots for :ref:`gradient_boosting` in
+  :func:`ensemble.partial_dependence.partial_dependence` by `Peter
+  Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
+  example.
+
+- The table of contents on the website has now been made expandable by
+  `Jaques Grobler`_.
+
+- :class:`feature_selection.SelectPercentile` now breaks ties
+  deterministically instead of returning all equally ranked features.
+
+- :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` are more numerically stable
+  since they use scores, rather than p-values, to rank results. This means
+  that they might sometimes select different features than they did
+  previously.
+
+- Ridge regression and ridge classification fitting with ``sparse_cg`` solver
+  no longer has quadratic memory complexity, by `Lars Buitinck`_ and
+  `Fabian Pedregosa`_.
+
+- Ridge regression and ridge classification now support a new fast solver
+  called ``lsqr``, by `Mathieu Blondel`_.
+
+- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
+
+- Added support for reading/writing svmlight files with pairwise
+  preference attribute (qid in svmlight file format) in
+  :func:`datasets.dump_svmlight_file` and
+  :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
+
+- Faster and more robust :func:`metrics.confusion_matrix` and
+  :ref:`clustering_evaluation` by Wei Li.
+
+- :func:`cross_validation.cross_val_score` now works with precomputed kernels
+  and affinity matrices, by `Andreas Müller`_.
+
+- LARS algorithm made more numerically stable with heuristics to drop
+  regressors too correlated as well as to stop the path when
+  numerical noise becomes predominant, by `Gael Varoquaux`_.
+
+- Faster implementation of :func:`metrics.precision_recall_curve` by
+  Conrad Lee.
+
+- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
+  in computer vision applications.
+
+- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
+  Shaun Jackman.
+
+- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
+  by Andrew Winterman.
+
+- Improve consistency in gradient boosting: estimators
+  :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` use the estimator
+  :class:`tree.DecisionTreeRegressor` instead of the
+  :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
+
+- Fixed a floating point exception in the :ref:`decision trees <tree>`
+  module, by Seberg.
+
+- Fix :func:`metrics.roc_curve` fails when y_true has only one class
+  by Wei Li.
+
+- Add the :func:`metrics.mean_absolute_error` function which computes the
+  mean absolute error. The :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error` and
+  :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
+
+- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
+  :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
+  of ``class_weight`` was reversed as erroneously higher weight meant less
+  positives of a given class in earlier releases.
+
+- Improve narrative documentation and consistency in
+  :mod:`sklearn.metrics` for regression and classification metrics
+  by `Arnaud Joly`_.
+
+- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
+  unsorted indices by Xinfan Meng and `Andreas Müller`_.
+
+- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
+  with little observations attached to them, by `Gael Varoquaux`_.
+
+
+API changes summary
+-------------------
+- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
+  This applies to :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
+
+- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
+  This applies to :class:`semi_supervised.LabelPropagation` and
+  :class:`semi_supervised.label_propagation.LabelSpreading`.
+
+- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
+  consistency in :class:`ensemble.BaseGradientBoosting` and
+  :class:`ensemble.GradientBoostingRegressor`.
+
+- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
+  was already integrated into the "regular" linear models.
+
+- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
+  accumulated error, was removed. Use ``mean_squared_error`` instead.
+
+- Passing ``class_weight`` parameters to ``fit`` methods is no longer
+  supported. Pass them to estimator constructors instead.
+
+- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
+  ``predict`` or ``sample`` methods instead.
+
+- The ``solver`` fit option in Ridge regression and classification is now
+  deprecated and will be removed in v0.14. Use the constructor option
+  instead.
+
+- :class:`feature_extraction.text.DictVectorizer` now returns sparse
+  matrices in the CSR format, instead of COO.
+
+- Renamed ``k`` in :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
+  ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
+
+- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
+  This applies to :class:`cross_validation.ShuffleSplit`,
+  :class:`cross_validation.StratifiedShuffleSplit`,
+  :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
+
+- Replaced ``rho`` in :class:`linear_model.ElasticNet` and
+  :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
+  had different meanings; ``l1_ratio`` was introduced to avoid confusion.
+  It has the same meaning as previously ``rho`` in
+  :class:`linear_model.ElasticNet` and ``(1-rho)`` in
+  :class:`linear_model.SGDClassifier`.
+
+- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
+  store a list of paths in the case of multiple targets, rather than
+  an array of paths.
+
+- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
+  to adhere more strictly with the API.
+
+- :func:`cluster.spectral_embedding` was moved to
+  :func:`manifold.spectral_embedding`.
+
+- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
+  :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
+  to ``eigen_solver``.
+
+- Renamed ``mode`` in :func:`manifold.spectral_embedding` and
+  :class:`cluster.SpectralClustering` to ``eigen_solver``.
+
+- ``classes_`` and ``n_classes_`` attributes of
+  :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
+  now flat in case of single output problems and nested in case of
+  multi-output problems.
+
+- The ``estimators_`` attribute of
+  :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
+  :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
+  array of :class:'tree.DecisionTreeRegressor'.
+
+- Renamed ``chunk_size`` to ``batch_size`` in
+  :class:`decomposition.MiniBatchDictionaryLearning` and
+  :class:`decomposition.MiniBatchSparsePCA` for consistency.
+
+- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
+  attribute and support arbitrary dtypes for labels ``y``.
+  Also, the dtype returned by ``predict`` now reflects the dtype of
+  ``y`` during ``fit`` (used to be ``np.float``).
+
+- Changed default test_size in :func:`cross_validation.train_test_split`
+  to None, added possibility to infer ``test_size`` from ``train_size`` in
+  :class:`cross_validation.ShuffleSplit` and
+  :class:`cross_validation.StratifiedShuffleSplit`.
+
+- Renamed function :func:`sklearn.metrics.zero_one` to
+  :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
+  in :func:`sklearn.metrics.zero_one_loss` is different from
+  :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
+  ``normalize=True``.
+
+- Renamed function :func:`metrics.zero_one_score` to
+  :func:`metrics.accuracy_score`.
+
+- :func:`datasets.make_circles` now has the same number of inner and outer points.
+
+- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
+  from ``fit`` to ``__init__``.
+
+People
+------
+List of contributors for release 0.13 by number of commits.
+
+ * 364  `Andreas Müller`_
+ * 143  `Arnaud Joly`_
+ * 137  `Peter Prettenhofer`_
+ * 131  `Gael Varoquaux`_
+ * 117  `Mathieu Blondel`_
+ * 108  `Lars Buitinck`_
+ * 106  Wei Li
+ * 101  `Olivier Grisel`_
+ *  65  `Vlad Niculae`_
+ *  54  `Gilles Louppe`_
+ *  40  `Jaques Grobler`_
+ *  38  `Alexandre Gramfort`_
+ *  30  `Rob Zinkov`_
+ *  19  Aymeric Masurelle
+ *  18  Andrew Winterman
+ *  17  `Fabian Pedregosa`_
+ *  17  Nelle Varoquaux
+ *  16  `Christian Osendorfer`_
+ *  14  `Daniel Nouri`_
+ *  13  :user:`Virgile Fritsch <VirgileFritsch>`
+ *  13  syhw
+ *  12  `Satrajit Ghosh`_
+ *  10  Corey Lynch
+ *  10  Kyle Beauchamp
+ *   9  Brian Cheung
+ *   9  Immanuel Bayer
+ *   9  mr.Shu
+ *   8  Conrad Lee
+ *   8  `James Bergstra`_
+ *   7  Tadej Janež
+ *   6  Brian Cajes
+ *   6  `Jake Vanderplas`_
+ *   6  Michael
+ *   6  Noel Dawe
+ *   6  Tiago Nunes
+ *   6  cow
+ *   5  Anze
+ *   5  Shiqiao Du
+ *   4  Christian Jauvin
+ *   4  Jacques Kvam
+ *   4  Richard T. Guy
+ *   4  `Robert Layton`_
+ *   3  Alexandre Abraham
+ *   3  Doug Coleman
+ *   3  Scott Dickerson
+ *   2  ApproximateIdentity
+ *   2  John Benediktsson
+ *   2  Mark Veronda
+ *   2  Matti Lyra
+ *   2  Mikhail Korobov
+ *   2  Xinfan Meng
+ *   1  Alejandro Weinstein
+ *   1  `Alexandre Passos`_
+ *   1  Christoph Deil
+ *   1  Eugene Nizhibitsky
+ *   1  Kenneth C. Arnold
+ *   1  Luis Pedro Coelho
+ *   1  Miroslav Batchkarov
+ *   1  Pavel
+ *   1  Sebastian Berg
+ *   1  Shaun Jackman
+ *   1  Subhodeep Moitra
+ *   1  bob
+ *   1  dengemann
+ *   1  emanuele
+ *   1  x006
+
diff --git a/doc/whats_new/v0.14.rst b/doc/whats_new/v0.14.rst
new file mode 100644
index 0000000000000..2b0456593e613
--- /dev/null
+++ b/doc/whats_new/v0.14.rst
@@ -0,0 +1,389 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_14:
+
+Version 0.14
+===============
+
+**August 7, 2013**
+
+Changelog
+---------
+
+- Missing values with sparse and dense matrices can be imputed with the
+  transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
+
+- The core implementation of decisions trees has been rewritten from
+  scratch, allowing for faster tree induction and lower memory
+  consumption in all tree-based estimators. By `Gilles Louppe`_.
+
+- Added :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
+  `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
+  guide for details and examples.
+
+- Added :class:`grid_search.RandomizedSearchCV` and
+  :class:`grid_search.ParameterSampler` for randomized hyperparameter
+  optimization. By `Andreas Müller`_.
+
+- Added :ref:`biclustering <biclustering>` algorithms
+  (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
+  :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
+  generation methods (:func:`sklearn.datasets.make_biclusters` and
+  :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
+  (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
+
+- Added :ref:`Restricted Boltzmann Machines<rbm>`
+  (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
+
+- Python 3 support by :user:`Justin Vincent <justinvf>`, `Lars Buitinck`_,
+  :user:`Subhodeep Moitra <smoitra87>` and `Olivier Grisel`_. All tests now pass under
+  Python 3.3.
+
+- Ability to pass one penalty (alpha value) per target in
+  :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
+
+- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
+  issue (minor practical significance).
+  By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
+
+- Added an interactive version of `Andreas Müller`_'s
+  `Machine Learning Cheat Sheet (for scikit-learn)
+  <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
+  to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
+  By `Jaques Grobler`_.
+
+- :class:`grid_search.GridSearchCV` and
+  :func:`cross_validation.cross_val_score` now support the use of advanced
+  scoring function such as area under the ROC curve and f-beta scores.
+  See :ref:`scoring_parameter` for details. By `Andreas Müller`_
+  and `Lars Buitinck`_.
+  Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
+  deprecated.
+
+- Multi-label classification output is now supported by
+  :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
+  :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
+  :func:`metrics.classification_report`,
+  :func:`metrics.precision_score` and :func:`metrics.recall_score`
+  by `Arnaud Joly`_.
+
+- Two new metrics :func:`metrics.hamming_loss` and
+  :func:`metrics.jaccard_similarity_score`
+  are added with multi-label support by `Arnaud Joly`_.
+
+- Speed and memory usage improvements in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`,
+  by Jochen Wersdörfer and Roman Sinayev.
+
+- The ``min_df`` parameter in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
+  has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
+  for novice users who try it out on tiny document collections.
+  A value of at least 2 is still recommended for practical use.
+
+- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
+  :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
+  converts their ``coef_`` into a sparse matrix, meaning stored models
+  trained using these estimators can be made much more compact.
+
+- :class:`linear_model.SGDClassifier` now produces multiclass probability
+  estimates when trained under log loss or modified Huber loss.
+
+- Hyperlinks to documentation in example code on the website by
+  :user:`Martin Luessi <mluessi>`.
+
+- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
+  of the features for non-default ``feature_range`` settings. By `Andreas
+  Müller`_.
+
+- ``max_features`` in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  now supports percentage values. By `Gilles Louppe`_.
+
+- Performance improvements in :class:`isotonic.IsotonicRegression` by
+  `Nelle Varoquaux`_.
+
+- :func:`metrics.accuracy_score` has an option normalize to return
+  the fraction or the number of correctly classified sample
+  by `Arnaud Joly`_.
+
+- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
+  loss. By Jochen Wersdörfer and `Lars Buitinck`_.
+
+- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
+  incorrect probabilities has been fixed.
+
+- Feature selectors now share a mixin providing consistent ``transform``,
+  ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
+
+- A fitted :class:`grid_search.GridSearchCV` or
+  :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
+  By `Joel Nothman`_.
+
+- Refactored and vectorized implementation of :func:`metrics.roc_curve`
+  and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
+
+- The new estimator :class:`sklearn.decomposition.TruncatedSVD`
+  performs dimensionality reduction using SVD on sparse matrices,
+  and can be used for latent semantic analysis (LSA).
+  By `Lars Buitinck`_.
+
+- Added self-contained example of out-of-core learning on text data
+  :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
+  By :user:`Eustache Diemert <oddskool>`.
+
+- The default number of components for
+  :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
+  to be ``n_features``. This was the default behavior, so programs using it
+  will continue to work as they did.
+
+- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
+  faster on sparse data (the speedup depends on the sparsity). By
+  `Lars Buitinck`_.
+
+- Reduce memory footprint of FastICA by `Denis Engemann`_ and
+  `Alexandre Gramfort`_.
+
+- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
+  a column format and prints progress in decreasing frequency.
+  It also shows the remaining time. By `Peter Prettenhofer`_.
+
+- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
+  :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
+  rather than the OOB score for model selection. An example that shows
+  how to use OOB estimates to select the number of trees was added.
+  By `Peter Prettenhofer`_.
+
+- Most metrics now support string labels for multiclass classification
+  by `Arnaud Joly`_ and `Lars Buitinck`_.
+
+- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
+  and `Vlad Niculae`_.
+
+- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
+  'alphas' parameter now works as expected when given a list of
+  values. By Philippe Gervais.
+
+- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
+  that prevented all folds provided by a CV object to be used (only
+  the first 3 were used). When providing a CV object, execution
+  time may thus increase significantly compared to the previous
+  version (bug results are correct now). By Philippe Gervais.
+
+- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
+  module is now tested with multi-output data by `Arnaud Joly`_.
+
+- :func:`datasets.make_multilabel_classification` can now return
+  the output in label indicator multilabel format  by `Arnaud Joly`_.
+
+- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
+  and :class:`neighbors.RadiusNeighborsRegressor`,
+  and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
+  :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
+  by `Arnaud Joly`_.
+
+- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
+  :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
+  controlled.  This is useful to ensure consistency in the probability
+  estimates for the classifiers trained with ``probability=True``. By
+  `Vlad Niculae`_.
+
+- Out-of-core learning support for discrete naive Bayes classifiers
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
+  method by `Olivier Grisel`_.
+
+- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
+  Vincent Michel and `Andreas Müller`_.
+
+- Improved documentation on :ref:`multi-class, multi-label and multi-output
+  classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
+
+- Better input and error handling in the :mod:`metrics` module by
+  `Arnaud Joly`_ and `Joel Nothman`_.
+
+- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
+
+- Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
+  by `cleverless <https://github.com/cleverless>`_
+
+
+API changes summary
+-------------------
+
+- The :func:`auc_score` was renamed :func:`roc_auc_score`.
+
+- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
+  ``nosetests sklearn`` from the command line.
+
+- Feature importances in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  are now computed on the fly when accessing  the ``feature_importances_``
+  attribute. Setting ``compute_importances=True`` is no longer required.
+  By `Gilles Louppe`_.
+
+- :class:`linear_model.lasso_path` and
+  :class:`linear_model.enet_path` can return its results in the same
+  format as that of :class:`linear_model.lars_path`. This is done by
+  setting the ``return_models`` parameter to ``False``. By
+  `Jaques Grobler`_ and `Alexandre Gramfort`_
+
+- :class:`grid_search.IterGrid` was renamed to
+  :class:`grid_search.ParameterGrid`.
+
+- Fixed bug in :class:`KFold` causing imperfect class balance in some
+  cases. By `Alexandre Gramfort`_ and Tadej Janež.
+
+- :class:`sklearn.neighbors.BallTree` has been refactored, and a
+  :class:`sklearn.neighbors.KDTree` has been
+  added which shares the same interface.  The Ball Tree now works with
+  a wide variety of distance metrics.  Both classes have many new
+  methods, including single-tree and dual-tree queries, breadth-first
+  and depth-first searching, and more advanced queries such as
+  kernel density estimation and 2-point correlation functions.
+  By `Jake Vanderplas`_
+
+- Support for scipy.spatial.cKDTree within neighbors queries has been
+  removed, and the functionality replaced with the new :class:`KDTree`
+  class.
+
+- :class:`sklearn.neighbors.KernelDensity` has been added, which performs
+  efficient kernel density estimation with a variety of kernels.
+
+- :class:`sklearn.decomposition.KernelPCA` now always returns output with
+  ``n_components`` components, unless the new parameter ``remove_zero_eig``
+  is set to ``True``. This new behavior is consistent with the way
+  kernel PCA was always documented; previously, the removal of components
+  with zero eigenvalues was tacitly performed on all data.
+
+- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
+  sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
+
+- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
+  is now deprecated in favor of the new ``TruncatedSVD``.
+
+- :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
+  otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
+
+- :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
+  parameters were renamed ``encoding`` and ``decode_errors``.
+
+- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
+  and :class:`sklearn.ensemble.GradientBoostingClassifier`
+  is deprecated and has been replaced by ``oob_improvement_`` .
+
+- Attributes in OrthogonalMatchingPursuit have been deprecated
+  (copy_X, Gram, ...) and precompute_gram renamed precompute
+  for consistency. See #2224.
+
+- :class:`sklearn.preprocessing.StandardScaler` now converts integer input
+  to float, and raises a warning. Previously it rounded for dense integer
+  input.
+
+- :class:`sklearn.multiclass.OneVsRestClassifier` now has a
+  ``decision_function`` method. This will return the distance of each
+  sample from the decision boundary for each class, as long as the
+  underlying estimators implement the ``decision_function`` method.
+  By `Kyle Kastner`_.
+
+- Better input validation, warning on unexpected shapes for y.
+
+People
+------
+List of contributors for release 0.14 by number of commits.
+
+ * 277  Gilles Louppe
+ * 245  Lars Buitinck
+ * 187  Andreas Mueller
+ * 124  Arnaud Joly
+ * 112  Jaques Grobler
+ * 109  Gael Varoquaux
+ * 107  Olivier Grisel
+ * 102  Noel Dawe
+ *  99  Kemal Eren
+ *  79  Joel Nothman
+ *  75  Jake VanderPlas
+ *  73  Nelle Varoquaux
+ *  71  Vlad Niculae
+ *  65  Peter Prettenhofer
+ *  64  Alexandre Gramfort
+ *  54  Mathieu Blondel
+ *  38  Nicolas Trésegnie
+ *  35  eustache
+ *  27  Denis Engemann
+ *  25  Yann N. Dauphin
+ *  19  Justin Vincent
+ *  17  Robert Layton
+ *  15  Doug Coleman
+ *  14  Michael Eickenberg
+ *  13  Robert Marchman
+ *  11  Fabian Pedregosa
+ *  11  Philippe Gervais
+ *  10  Jim Holmström
+ *  10  Tadej Janež
+ *  10  syhw
+ *   9  Mikhail Korobov
+ *   9  Steven De Gryze
+ *   8  sergeyf
+ *   7  Ben Root
+ *   7  Hrishikesh Huilgolkar
+ *   6  Kyle Kastner
+ *   6  Martin Luessi
+ *   6  Rob Speer
+ *   5  Federico Vaggi
+ *   5  Raul Garreta
+ *   5  Rob Zinkov
+ *   4  Ken Geis
+ *   3  A. Flaxman
+ *   3  Denton Cockburn
+ *   3  Dougal Sutherland
+ *   3  Ian Ozsvald
+ *   3  Johannes Schönberger
+ *   3  Robert McGibbon
+ *   3  Roman Sinayev
+ *   3  Szabo Roland
+ *   2  Diego Molla
+ *   2  Imran Haque
+ *   2  Jochen Wersdörfer
+ *   2  Sergey Karayev
+ *   2  Yannick Schwartz
+ *   2  jamestwebber
+ *   1  Abhijeet Kolhe
+ *   1  Alexander Fabisch
+ *   1  Bastiaan van den Berg
+ *   1  Benjamin Peterson
+ *   1  Daniel Velkov
+ *   1  Fazlul Shahriar
+ *   1  Felix Brockherde
+ *   1  Félix-Antoine Fortin
+ *   1  Harikrishnan S
+ *   1  Jack Hale
+ *   1  JakeMick
+ *   1  James McDermott
+ *   1  John Benediktsson
+ *   1  John Zwinck
+ *   1  Joshua Vredevoogd
+ *   1  Justin Pati
+ *   1  Kevin Hughes
+ *   1  Kyle Kelley
+ *   1  Matthias Ekman
+ *   1  Miroslav Shubernetskiy
+ *   1  Naoki Orii
+ *   1  Norbert Crombach
+ *   1  Rafael Cunha de Almeida
+ *   1  Rolando Espinoza La fuente
+ *   1  Seamus Abshere
+ *   1  Sergey Feldman
+ *   1  Sergio Medina
+ *   1  Stefano Lattarini
+ *   1  Steve Koch
+ *   1  Sturla Molden
+ *   1  Thomas Jarosch
+ *   1  Yaroslav Halchenko
+ 
diff --git a/doc/whats_new/v0.15.rst b/doc/whats_new/v0.15.rst
new file mode 100644
index 0000000000000..a2eafc63b0617
--- /dev/null
+++ b/doc/whats_new/v0.15.rst
@@ -0,0 +1,623 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_15_2:
+
+Version 0.15.2
+==============
+
+**September 4, 2014**
+
+Bug fixes
+---------
+
+- Fixed handling of the ``p`` parameter of the Minkowski distance that was
+  previously ignored in nearest neighbors models. By :user:`Nikolay
+  Mayorov <nmayorov>`.
+
+- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
+  stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
+
+- Fixed the build under Windows when scikit-learn is built with MSVC while
+  NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
+  Vaggi <FedericoV>`.
+
+- Fixed an array index overflow bug in the coordinate descent solver. By
+  `Gael Varoquaux`_.
+
+- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
+
+- Removed unnecessary data copy in :class:`cluster.KMeans`.
+  By `Gael Varoquaux`_.
+
+- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
+  By Calvin Giles.
+
+- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  now projects the input on the most discriminant directions. By Martin Billinger.
+
+- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
+
+- Performance optimization in :class:`isotonic.IsotonicRegression`.
+  By Robert Bradshaw.
+
+- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
+  running the tests. By `Joel Nothman`_.
+
+- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
+  :user:`Matt Pico <MattpSoftware>`, and others.
+
+.. _changes_0_15_1:
+
+Version 0.15.1
+==============
+
+**August 1, 2014**
+
+Bug fixes
+---------
+
+- Made :func:`cross_validation.cross_val_score` use
+  :class:`cross_validation.KFold` instead of
+  :class:`cross_validation.StratifiedKFold` on multi-output classification
+  problems. By :user:`Nikolay Mayorov <nmayorov>`.
+
+- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
+  the default behavior of 0.14.1 for backward compatibility. By
+  :user:`Hamzeh Alsalhi <hamsal>`.
+
+- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
+  convergence detection. By Edward Raff and `Gael Varoquaux`_.
+
+- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
+  in case of ties at the per-class vote level by computing the correct
+  per-class sum of prediction scores. By `Andreas Müller`_.
+
+- Made :func:`cross_validation.cross_val_score` and
+  :class:`grid_search.GridSearchCV` accept Python lists as input data.
+  This is especially useful for cross-validation and model selection of
+  text processing pipelines. By `Andreas Müller`_.
+
+- Fixed data input checks of most estimators to accept input data that
+  implements the NumPy ``__array__`` protocol. This is the case for
+  for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
+  pandas. By `Gael Varoquaux`_.
+
+- Fixed a regression for :class:`linear_model.SGDClassifier` with
+  ``class_weight="auto"`` on data with non-contiguous labels. By
+  `Olivier Grisel`_.
+
+
+.. _changes_0_15:
+
+Version 0.15
+============
+
+**July 15, 2014**
+
+Highlights
+-----------
+
+- Many speed and memory improvements all across the code
+
+- Huge speed and memory improvements to random forests (and extra
+  trees) that also benefit better from parallel computing.
+
+- Incremental fit to :class:`BernoulliRBM <neural_network.BernoulliRBM>`
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies.
+
+- Added :class:`linear_model.RANSACRegressor` for robust regression
+  models.
+
+- Added dimensionality reduction with :class:`manifold.TSNE` which can be
+  used to visualize high-dimensional data.
+
+
+Changelog
+---------
+
+New features
+............
+
+- Added :class:`ensemble.BaggingClassifier` and
+  :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
+  any kind of base estimator. See the :ref:`Bagging <bagging>` section of
+  the user guide for details and examples. By `Gilles Louppe`_.
+
+- New unsupervised feature selection algorithm
+  :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
+
+- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
+  fitting of regression models. By :user:`Johannes Schönberger <ahojnnes>`.
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies, by  `Nelle Varoquaux`_ and `Gael Varoquaux`_.
+
+- Shorthand constructors :func:`pipeline.make_pipeline` and
+  :func:`pipeline.make_union` were added by `Lars Buitinck`_.
+
+- Shuffle option for :class:`cross_validation.StratifiedKFold`.
+  By :user:`Jeffrey Blackburne <jblackburne>`.
+
+- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
+  Imran Haque.
+
+- Added ``partial_fit`` to :class:`BernoulliRBM
+  <neural_network.BernoulliRBM>`
+  By :user:`Danny Sullivan <dsullivan7>`.
+
+- Added :func:`learning_curve <learning_curve.learning_curve>` utility to
+  chart performance with respect to training size. See
+  :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
+
+- Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
+  By Brian Wignall and `Alexandre Gramfort`_.
+
+- Added :class:`linear_model.MultiTaskElasticNetCV` and
+  :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
+
+- Added :class:`manifold.TSNE`. By Alexander Fabisch.
+
+Enhancements
+............
+
+- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` meta-estimators.
+  By :user:`Hamzeh Alsalhi <hamsal>`.
+
+- Memory improvements of decision trees, by `Arnaud Joly`_.
+
+- Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
+  as the stopping criteria. Refactored the tree code to use either a
+  stack or a priority queue for tree building.
+  By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Decision trees can now be fitted on fortran- and c-style arrays, and
+  non-continuous arrays without the need to make a copy.
+  If the input array has a different dtype than ``np.float32``, a fortran-
+  style copy will be made since fortran-style memory layout has speed
+  advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Speed improvement of regression trees by optimizing the
+  the computation of the mean square error criterion. This lead
+  to speed improvement of the tree, forest and gradient boosting tree
+  modules. By `Arnaud Joly`_
+
+- The ``img_to_graph`` and ``grid_tograph`` functions in
+  :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
+  instead of ``np.matrix`` when ``return_as=np.ndarray``.  See the
+  Notes section for more information on compatibility.
+
+- Changed the internal storage of decision trees to use a struct array.
+  This fixed some small bugs, while improving code and providing a small
+  speed gain. By `Joel Nothman`_.
+
+- Reduce memory usage and overhead when fitting and predicting with forests
+  of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
+  threading backend of joblib 0.8 and releasing the GIL in the tree fitting
+  Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
+
+- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
+  By `Gilles Louppe`_ and `Peter Prettenhofer`_.
+
+- Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
+  module: a ``warm_start`` argument to fit additional trees,
+  a ``max_leaf_nodes`` argument to fit GBM style trees,
+  a ``monitor`` fit argument to inspect the estimator during training, and
+  refactoring of the verbose code. By `Peter Prettenhofer`_.
+
+- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
+  By `Arnaud Joly`_.
+
+- Faster depth-based tree building algorithm such as decision tree,
+  random forest, extra trees or gradient tree boosting (with depth based
+  growing strategy) by avoiding trying to split on found constant features
+  in the sample subset. By `Arnaud Joly`_.
+
+- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
+  methods: the minimum weighted fraction of the input samples required to be
+  at a leaf node. By `Noel Dawe`_.
+
+- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
+
+- Added predict method to :class:`cluster.AffinityPropagation` and
+  :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
+
+- Vector and matrix multiplications have been optimised throughout the
+  library by `Denis Engemann`_, and `Alexandre Gramfort`_.
+  In particular, they should take less memory with older NumPy versions
+  (prior to 1.7.2).
+
+- Precision-recall and ROC examples now use train_test_split, and have more
+  explanation of why these metrics are useful. By `Kyle Kastner`_
+
+- The training algorithm for :class:`decomposition.NMF` is faster for
+  sparse matrices and has much lower memory complexity, meaning it will
+  scale up gracefully to large datasets. By `Lars Buitinck`_.
+
+- Added svd_method option with default value to "randomized" to
+  :class:`decomposition.FactorAnalysis` to save memory and
+  significantly speedup computation by `Denis Engemann`_, and
+  `Alexandre Gramfort`_.
+
+- Changed :class:`cross_validation.StratifiedKFold` to try and
+  preserve as much of the original ordering of samples as possible so as
+  not to hide overfitting on datasets with a non-negligible level of
+  samples dependency.
+  By `Daniel Nouri`_ and `Olivier Grisel`_.
+
+- Add multi-output support to :class:`gaussian_process.GaussianProcess`
+  by John Novak.
+
+- Support for precomputed distance matrices in nearest neighbor estimators
+  by `Robert Layton`_ and `Joel Nothman`_.
+
+- Norm computations optimized for NumPy 1.6 and later versions by
+  `Lars Buitinck`_. In particular, the k-means algorithm no longer
+  needs a temporary data structure the size of its input.
+
+- :class:`dummy.DummyClassifier` can now be used to predict a constant
+  output value. By `Manoj Kumar`_.
+
+- :class:`dummy.DummyRegressor` has now a strategy parameter which allows
+  to predict the mean, the median of the training set or a constant
+  output value. By :user:`Maheshakya Wijewardena <maheshakya>`.
+
+- Multi-label classification output in multilabel indicator format
+  is now supported by :func:`metrics.roc_auc_score` and
+  :func:`metrics.average_precision_score` by `Arnaud Joly`_.
+
+- Significant performance improvements (more than 100x speedup for
+  large problems) in :class:`isotonic.IsotonicRegression` by
+  `Andrew Tulloch`_.
+
+- Speed and memory usage improvements to the SGD algorithm for linear
+  models: it now uses threads, not separate processes, when ``n_jobs>1``.
+  By `Lars Buitinck`_.
+
+- Grid search and cross validation allow NaNs in the input arrays so that
+  preprocessors such as :class:`preprocessing.Imputer
+  <preprocessing.Imputer>` can be trained within the cross validation loop,
+  avoiding potentially skewed results.
+
+- Ridge regression can now deal with sample weights in feature space
+  (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
+  Both solutions are provided by the Cholesky solver.
+
+- Several classification and regression metrics now support weighted
+  samples with the new ``sample_weight`` argument:
+  :func:`metrics.accuracy_score`,
+  :func:`metrics.zero_one_loss`,
+  :func:`metrics.precision_score`,
+  :func:`metrics.average_precision_score`,
+  :func:`metrics.f1_score`,
+  :func:`metrics.fbeta_score`,
+  :func:`metrics.recall_score`,
+  :func:`metrics.roc_auc_score`,
+  :func:`metrics.explained_variance_score`,
+  :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error`,
+  :func:`metrics.r2_score`.
+  By `Noel Dawe`_.
+
+- Speed up of the sample generator
+  :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
+
+Documentation improvements
+...........................
+
+- The :ref:`Working With Text Data <text_data_tutorial>` tutorial
+  has now been worked in to the main documentation's tutorial section.
+  Includes exercises and skeletons for tutorial presentation.
+  Original tutorial created by several authors including
+  `Olivier Grisel`_, Lars Buitinck and many others.
+  Tutorial integration into the scikit-learn documentation
+  by `Jaques Grobler`_
+
+- Added :ref:`Computational Performance <computational_performance>`
+  documentation. Discussion and examples of prediction latency / throughput
+  and different factors that have influence over speed. Additional tips for
+  building faster models and choosing a relevant compromise between speed
+  and predictive power.
+  By :user:`Eustache Diemert <oddskool>`.
+
+Bug fixes
+.........
+
+- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
+  ``partial_fit`` was not working properly.
+
+- Fixed bug in :class:`linear_model.stochastic_gradient` :
+  ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
+
+- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
+  labels
+
+- Fixed a bug in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`: they would not
+  pre-compute the Gram matrix with ``precompute=True`` or
+  ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
+
+- Fixed incorrect estimation of the degrees of freedom in
+  :func:`feature_selection.f_regression` when variates are not centered.
+  By :user:`Virgile Fritsch <VirgileFritsch>`.
+
+- Fixed a race condition in parallel processing with
+  ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
+  By `Olivier Grisel`_.
+
+- Raise error in :class:`cluster.FeatureAgglomeration` and
+  :class:`cluster.WardAgglomeration` when no samples are given,
+  rather than returning meaningless clustering.
+
+- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
+  ``loss='huber'``: ``gamma`` might have not been initialized.
+
+- Fixed feature importances as computed with a forest of randomized trees
+  when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
+  By `Gilles Louppe`_.
+
+API changes summary
+-------------------
+
+- :mod:`sklearn.hmm` is deprecated. Its removal is planned
+  for the 0.17 release.
+
+- Use of :class:`covariance.EllipticEnvelop` has now been removed after
+  deprecation.
+  Please use :class:`covariance.EllipticEnvelope` instead.
+
+- :class:`cluster.Ward` is deprecated. Use
+  :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cluster.WardClustering` is deprecated. Use
+- :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cross_validation.Bootstrap` is deprecated.
+  :class:`cross_validation.KFold` or
+  :class:`cross_validation.ShuffleSplit` are recommended instead.
+
+- Direct support for the sequence of sequences (or list of lists) multilabel
+  format is deprecated. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+  By `Joel Nothman`_.
+
+- Add score method to :class:`PCA <decomposition.PCA>` following the model of
+  probabilistic PCA and deprecate
+  :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
+  score implementation is not correct. The computation now also exploits the
+  matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
+
+- The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
+  now returns the average log-likelihood of the samples. Use score_samples
+  to get log-likelihood of each sample. By `Alexandre Gramfort`_.
+
+- Generating boolean masks (the setting ``indices=False``)
+  from cross-validation generators is deprecated.
+  Support for masks will be removed in 0.17.
+  The generators have produced arrays of indices by default since 0.10.
+  By `Joel Nothman`_.
+
+- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
+  are now considered valid classification targets. This fixes a regression
+  from version 0.13 in some classifiers. By `Joel Nothman`_.
+
+- Fix wrong ``explained_variance_ratio_`` attribute in
+  :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
+  By `Alexandre Gramfort`_.
+
+- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
+  :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
+  This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
+  ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
+  object of length greater than one.
+  By `Manoj Kumar`_.
+
+- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
+  when fitting intercept and input data is sparse. The automatic grid
+  of alphas was not computed correctly and the scaling with normalize
+  was wrong. By `Manoj Kumar`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for decision trees, random forests and gradient tree boosting.
+  Previously, the count for the number of drawn features started only after
+  one non constant features in the split. This bug fix will affect
+  computational and generalization performance of those algorithms in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for :class:`ensemble.ExtraTreesClassifier` and
+  :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
+  features in the split was counted as drawn. Now constant features are
+  counted as drawn. Furthermore at least one feature must be non constant
+  in order to make a valid split. This bug fix will affect
+  computational and generalization performance of extra trees in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
+  Previously it was broken for input of non-integer ``dtype`` and the
+  weighted array that was returned was wrong. By `Manoj Kumar`_.
+
+- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
+  when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
+
+
+People
+------
+
+List of contributors for release 0.15 by number of commits.
+
+* 312	Olivier Grisel
+* 275	Lars Buitinck
+* 221	Gael Varoquaux
+* 148	Arnaud Joly
+* 134	Johannes Schönberger
+* 119	Gilles Louppe
+* 113	Joel Nothman
+* 111	Alexandre Gramfort
+*  95	Jaques Grobler
+*  89	Denis Engemann
+*  83	Peter Prettenhofer
+*  83	Alexander Fabisch
+*  62	Mathieu Blondel
+*  60	Eustache Diemert
+*  60	Nelle Varoquaux
+*  49	Michael Bommarito
+*  45	Manoj-Kumar-S
+*  28	Kyle Kastner
+*  26	Andreas Mueller
+*  22	Noel Dawe
+*  21	Maheshakya Wijewardena
+*  21	Brooke Osborn
+*  21	Hamzeh Alsalhi
+*  21	Jake VanderPlas
+*  21	Philippe Gervais
+*  19	Bala Subrahmanyam Varanasi
+*  12	Ronald Phlypo
+*  10	Mikhail Korobov
+*   8	Thomas Unterthiner
+*   8	Jeffrey Blackburne
+*   8	eltermann
+*   8	bwignall
+*   7	Ankit Agrawal
+*   7	CJ Carey
+*   6	Daniel Nouri
+*   6	Chen Liu
+*   6	Michael Eickenberg
+*   6	ugurthemaster
+*   5	Aaron Schumacher
+*   5	Baptiste Lagarde
+*   5	Rajat Khanduja
+*   5	Robert McGibbon
+*   5	Sergio Pascual
+*   4	Alexis Metaireau
+*   4	Ignacio Rossi
+*   4	Virgile Fritsch
+*   4	Sebastian Säger
+*   4	Ilambharathi Kanniah
+*   4	sdenton4
+*   4	Robert Layton
+*   4	Alyssa
+*   4	Amos Waterland
+*   3	Andrew Tulloch
+*   3	murad
+*   3	Steven Maude
+*   3	Karol Pysniak
+*   3	Jacques Kvam
+*   3	cgohlke
+*   3	cjlin
+*   3	Michael Becker
+*   3	hamzeh
+*   3	Eric Jacobsen
+*   3	john collins
+*   3	kaushik94
+*   3	Erwin Marsi
+*   2	csytracy
+*   2	LK
+*   2	Vlad Niculae
+*   2	Laurent Direr
+*   2	Erik Shilts
+*   2	Raul Garreta
+*   2	Yoshiki Vázquez Baeza
+*   2	Yung Siang Liau
+*   2	abhishek thakur
+*   2	James Yu
+*   2	Rohit Sivaprasad
+*   2	Roland Szabo
+*   2	amormachine
+*   2	Alexis Mignon
+*   2	Oscar Carlsson
+*   2	Nantas Nardelli
+*   2	jess010
+*   2	kowalski87
+*   2	Andrew Clegg
+*   2	Federico Vaggi
+*   2	Simon Frid
+*   2	Félix-Antoine Fortin
+*   1	Ralf Gommers
+*   1	t-aft
+*   1	Ronan Amicel
+*   1	Rupesh Kumar Srivastava
+*   1	Ryan Wang
+*   1	Samuel Charron
+*   1	Samuel St-Jean
+*   1	Fabian Pedregosa
+*   1	Skipper Seabold
+*   1	Stefan Walk
+*   1	Stefan van der Walt
+*   1	Stephan Hoyer
+*   1	Allen Riddell
+*   1	Valentin Haenel
+*   1	Vijay Ramesh
+*   1	Will Myers
+*   1	Yaroslav Halchenko
+*   1	Yoni Ben-Meshulam
+*   1	Yury V. Zaytsev
+*   1	adrinjalali
+*   1	ai8rahim
+*   1	alemagnani
+*   1	alex
+*   1	benjamin wilson
+*   1	chalmerlowe
+*   1	dzikie drożdże
+*   1	jamestwebber
+*   1	matrixorz
+*   1	popo
+*   1	samuela
+*   1	François Boulogne
+*   1	Alexander Measure
+*   1	Ethan White
+*   1	Guilherme Trein
+*   1	Hendrik Heuer
+*   1	IvicaJovic
+*   1	Jan Hendrik Metzen
+*   1	Jean Michel Rouly
+*   1	Eduardo Ariño de la Rubia
+*   1	Jelle Zijlstra
+*   1	Eddy L O Jansson
+*   1	Denis
+*   1	John
+*   1	John Schmidt
+*   1	Jorge Cañardo Alastuey
+*   1	Joseph Perla
+*   1	Joshua Vredevoogd
+*   1	José Ricardo
+*   1	Julien Miotte
+*   1	Kemal Eren
+*   1	Kenta Sato
+*   1	David Cournapeau
+*   1	Kyle Kelley
+*   1	Daniele Medri
+*   1	Laurent Luce
+*   1	Laurent Pierron
+*   1	Luis Pedro Coelho
+*   1	DanielWeitzenfeld
+*   1	Craig Thompson
+*   1	Chyi-Kwei Yau
+*   1	Matthew Brett
+*   1	Matthias Feurer
+*   1	Max Linke
+*   1	Chris Filo Gorgolewski
+*   1	Charles Earl
+*   1	Michael Hanke
+*   1	Michele Orrù
+*   1	Bryan Lunt
+*   1	Brian Kearns
+*   1	Paul Butler
+*   1	Paweł Mandera
+*   1	Peter
+*   1	Andrew Ash
+*   1	Pietro Zambelli
+*   1	staubda
+
diff --git a/doc/whats_new/v0.16.rst b/doc/whats_new/v0.16.rst
new file mode 100644
index 0000000000000..33d8cc47e939a
--- /dev/null
+++ b/doc/whats_new/v0.16.rst
@@ -0,0 +1,541 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_16_1:
+
+Version 0.16.1
+===============
+
+**April 14, 2015**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Allow input data larger than ``block_size`` in
+  :class:`covariance.LedoitWolf` by `Andreas Müller`_.
+
+- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
+  caused unstable result in :class:`calibration.CalibratedClassifierCV` by
+  `Jan Hendrik Metzen`_.
+
+- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
+
+- Fix several stability and convergence issues in
+  :class:`cross_decomposition.CCA` and
+  :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
+
+- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
+  on fortran-ordered data.
+
+- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
+  and ``predict_proba`` by `Andreas Müller`_.
+
+- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
+
+.. _changes_0_16:
+
+Version 0.16
+============
+
+**March 26, 2015**
+
+Highlights
+-----------
+
+- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
+  requirements, bug-fixes and better default settings.
+
+- Multinomial Logistic regression and a path algorithm in
+  :class:`linear_model.LogisticRegressionCV`.
+
+- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
+
+- Probability callibration of classifiers using
+  :class:`calibration.CalibratedClassifierCV`.
+
+- :class:`cluster.Birch` clustering method for large-scale datasets.
+
+- Scalable approximate nearest neighbors search with Locality-sensitive
+  hashing forests in :class:`neighbors.LSHForest`.
+
+- Improved error messages and better validation when using malformed input data.
+
+- More robust integration with pandas dataframes.
+
+Changelog
+---------
+
+New features
+............
+
+- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
+  for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
+
+- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
+  of Support Vector Regression which is much faster for large
+  sample sizes than :class:`svm.SVR` with linear kernel. By
+  `Fabian Pedregosa`_ and Qiang Luo.
+
+- Incremental fit for :class:`GaussianNB <naive_bayes.GaussianNB>`.
+
+- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
+  :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
+
+- Added the :func:`metrics.label_ranking_average_precision_score` metrics.
+  By `Arnaud Joly`_.
+
+- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
+
+- Added :class:`linear_model.LogisticRegressionCV`. By
+  `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
+  and `Alexandre Gramfort`_.
+
+- Added ``warm_start`` constructor parameter to make it possible for any
+  trained forest model to grow additional trees incrementally. By
+  :user:`Laurent Direr<ldirer>`.
+
+- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
+
+- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
+  algorithm that supports out-of-core learning with a ``partial_fit``
+  method. By `Kyle Kastner`_.
+
+- Averaged SGD for :class:`SGDClassifier <linear_model.SGDClassifier>`
+  and :class:`SGDRegressor <linear_model.SGDRegressor>` By
+  :user:`Danny Sullivan <dsullivan7>`.
+
+- Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
+  function which computes cross-validated estimates. By `Luis Pedro Coelho`_
+
+- Added :class:`linear_model.TheilSenRegressor`, a robust
+  generalized-median-based estimator. By :user:`Florian Wilhelm <FlorianWilhelm>`.
+
+- Added :func:`metrics.median_absolute_error`, a robust metric.
+  By `Gael Varoquaux`_ and :user:`Florian Wilhelm <FlorianWilhelm>`.
+
+- Add :class:`cluster.Birch`, an online clustering algorithm. By
+  `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
+
+- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  using two new solvers. By :user:`Clemens Brunner <cle1109>` and `Martin Billinger`_.
+
+- Added :class:`kernel_ridge.KernelRidge`, an implementation of
+  kernelized ridge regression.
+  By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
+
+- All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
+  By `Mathieu Blondel`_.
+
+- Added :class:`cross_validation.PredefinedSplit` cross-validation
+  for fixed user-provided cross-validation folds.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- Added :class:`calibration.CalibratedClassifierCV`, an approach for
+  calibrating the predicted probabilities of a classifier.
+  By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
+  and :user:`Balazs Kegl <kegl>`.
+
+
+Enhancements
+............
+
+- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
+  to return distances between nodes for both structured and unstructured
+  versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
+  The same option was added in :func:`hierarchical.linkage_tree`.
+  By `Manoj Kumar`_
+
+- Add support for sample weights in scorer objects.  Metrics with sample
+  weight support will automatically benefit from it. By `Noel Dawe`_ and
+  `Vlad Niculae`_.
+
+- Added ``newton-cg`` and `lbfgs` solver support in
+  :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
+
+- Add ``selection="random"`` parameter to implement stochastic coordinate
+  descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
+  and related. By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to
+  :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
+  By :user:`Jatin Shah <jatinshah>`.
+
+- Support sparse multilabel indicator representation in
+  :class:`preprocessing.LabelBinarizer` and
+  :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi <hamsal>` with thanks
+  to Rohit Sivaprasad), as well as evaluation metrics (by
+  `Joel Nothman`_).
+
+- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
+  By `Jatin Shah`.
+
+- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
+  as optional parameter. By `Saurabh Jha`.
+
+- Add ``sample_weight`` parameter to `metrics.hinge_loss`.
+  By `Saurabh Jha`.
+
+- Add ``multi_class="multinomial"`` option in
+  :class:`linear_model.LogisticRegression` to implement a Logistic
+  Regression solver that minimizes the cross-entropy or multinomial loss
+  instead of the default One-vs-Rest setting. Supports `lbfgs` and
+  `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
+  `newton-cg` by Simon Wu.
+
+- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
+  single pass, when giving the option ``sort=False``. By :user:`Dan
+  Blanchard <dan-blanchard>`.
+
+- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
+  configured to work with estimators that may fail and raise errors on
+  individual folds. This option is controlled by the `error_score`
+  parameter. This does not affect errors raised on re-fit. By
+  :user:`Michal Romaniuk <romaniukm>`.
+
+- Add ``digits`` parameter to `metrics.classification_report` to allow
+  report to show different precision of floating point numbers. By
+  :user:`Ian Gilmore <agileminor>`.
+
+- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
+  By :user:`Aaron Staple <staple>`.
+
+- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
+  handle unknown categorical features more gracefully during transform.
+  By `Manoj Kumar`_.
+
+- Added support for sparse input data to decision trees and their ensembles.
+  By `Fares Hedyati`_ and `Arnaud Joly`_.
+
+- Optimized :class:`cluster.AffinityPropagation` by reducing the number of
+  memory allocations of large temporary data-structures. By `Antony Lee`_.
+
+- Parellization of the computation of feature importances in random forest.
+  By `Olivier Grisel`_ and `Arnaud Joly`_.
+
+- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
+  in their constructor. By `Manoj Kumar`_.
+
+- Added decision function for :class:`multiclass.OneVsOneClassifier`
+  By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
+
+- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
+  support non-Euclidean metrics. By `Manoj Kumar`_
+
+- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
+  and family now accept callables that return a connectivity matrix.
+  By `Manoj Kumar`_.
+
+- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
+
+- :class:`cluster.DBSCAN` now supports sparse input and sample weights and
+  has been optimized: the inner loop has been rewritten in Cython and
+  radius neighbors queries are now computed in batch. By `Joel Nothman`_
+  and `Lars Buitinck`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`ensemble.RandomForestClassifier`,
+  :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
+  and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
+
+- :class:`grid_search.RandomizedSearchCV` now does sampling without
+  replacement if all parameters are given as lists. By `Andreas Müller`_.
+
+- Parallelized calculation of :func:`pairwise_distances` is now supported
+  for scipy metrics and custom callables. By `Joel Nothman`_.
+
+- Allow the fitting and scoring of all clustering algorithms in
+  :class:`pipeline.Pipeline`. By `Andreas Müller`_.
+
+- More robust seeding and improved error messages in :class:`cluster.MeanShift`
+  by `Andreas Müller`_.
+
+- Make the stopping criterion for :class:`mixture.GMM`,
+  :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
+  number of samples by thresholding the average log-likelihood change
+  instead of its sum over all samples. By `Hervé Bredin`_.
+
+- The outcome of :func:`manifold.spectral_embedding` was made deterministic
+  by flipping the sign of eigenvectors. By :user:`Hasil Sharma <Hasil-Sharma>`.
+
+- Significant performance and memory usage improvements in
+  :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
+
+- Numerical stability improvements for :class:`preprocessing.StandardScaler`
+  and :func:`preprocessing.scale`. By `Nicolas Goix`_
+
+- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
+  By `Rob Zinkov`_ and `Andreas Müller`_.
+
+- :func:`cross_validation.train_test_split` now preserves the input type,
+  instead of converting to numpy arrays.
+
+
+Documentation improvements
+..........................
+
+- Added example of using :class:`FeatureUnion` for heterogeneous input.
+  By :user:`Matt Terry <mrterry>`
+
+- Documentation on scorers was improved, to highlight the handling of loss
+  functions. By :user:`Matt Pico <MattpSoftware>`.
+
+- A discrepancy between liblinear output and scikit-learn's wrappers
+  is now noted. By `Manoj Kumar`_.
+
+- Improved documentation generation: examples referring to a class or
+  function are now shown in a gallery on the class/function's API reference
+  page. By `Joel Nothman`_.
+
+- More explicit documentation of sample generators and of data
+  transformation. By `Joel Nothman`_.
+
+- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
+  used to point to empty pages stating that they are aliases of BinaryTree.
+  This has been fixed to show the correct class docs. By `Manoj Kumar`_.
+
+- Added silhouette plots for analysis of KMeans clustering using
+  :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
+  See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
+
+Bug fixes
+.........
+- Metaestimators now support ducktyping for the presence of ``decision_function``,
+  ``predict_proba`` and other methods. This fixes behavior of
+  :class:`grid_search.GridSearchCV`,
+  :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
+  :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
+  By `Joel Nothman`_
+
+- The ``scoring`` attribute of grid-search and cross-validation methods is no longer
+  ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
+  the base estimator doesn't have predict.
+
+- The function :func:`hierarchical.ward_tree` now returns the children in
+  the same order for both the structured and unstructured versions. By
+  `Matteo Visconti di Oleggio Castello`_.
+
+- :class:`feature_selection.RFECV` now correctly handles cases when
+  ``step`` is not equal to 1. By :user:`Nikolay Mayorov <nmayorov>`
+
+- The :class:`decomposition.PCA` now undoes whitening in its
+  ``inverse_transform``. Also, its ``components_`` now always have unit
+  length. By :user:`Michael Eickenberg <eickenberg>`.
+
+- Fix incomplete download of the dataset when
+  :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
+
+- Various fixes to the Gaussian processes subpackage by Vincent Dubourg
+  and Jan Hendrik Metzen.
+
+- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
+  appropriate error message and suggests a work around.
+  By :user:`Danny Sullivan <dsullivan7>`.
+
+- :class:`RBFSampler <kernel_approximation.RBFSampler>` with ``gamma=g``
+  formerly approximated :func:`rbf_kernel <metrics.pairwise.rbf_kernel>`
+  with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
+  which may substantially change your results if you use a fixed value.
+  (If you cross-validated over ``gamma``, it probably doesn't matter
+  too much.) By :user:`Dougal Sutherland <dougalsutherland>`.
+
+- Pipeline object delegate the ``classes_`` attribute to the underlying
+  estimator. It allows, for instance, to make bagging of a pipeline object.
+  By `Arnaud Joly`_
+
+- :class:`neighbors.NearestCentroid` now uses the median as the centroid
+  when metric is set to ``manhattan``. It was using the mean before.
+  By `Manoj Kumar`_
+
+- Fix numerical stability issues in :class:`linear_model.SGDClassifier`
+  and :class:`linear_model.SGDRegressor` by clipping large gradients and
+  ensuring that weight decay rescaling is always positive (for large
+  l2 regularization and large learning rate values).
+  By `Olivier Grisel`_
+
+- When `compute_full_tree` is set to "auto", the full tree is
+  built when n_clusters is high and is early stopped when n_clusters is
+  low, while the behavior should be vice-versa in
+  :class:`cluster.AgglomerativeClustering` (and friends).
+  This has been fixed By `Manoj Kumar`_
+
+- Fix lazy centering of data in :func:`linear_model.enet_path` and
+  :func:`linear_model.lasso_path`. It was centered around one. It has
+  been changed to be centered around the origin. By `Manoj Kumar`_
+
+- Fix handling of precomputed affinity matrices in
+  :class:`cluster.AgglomerativeClustering` when using connectivity
+  constraints. By :user:`Cathy Deng <cathydeng>`
+
+- Correct ``partial_fit`` handling of ``class_prior`` for
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
+
+- Fixed a crash in :func:`metrics.precision_recall_fscore_support`
+  when using unsorted ``labels`` in the multi-label setting.
+  By `Andreas Müller`_.
+
+- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
+  ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
+  data is not the same as fit data. By `Manoj Kumar`_.
+
+- Fix log-density calculation in the :class:`mixture.GMM` with
+  tied covariance. By `Will Dawson`_
+
+- Fixed a scaling error in :class:`feature_selection.SelectFdr`
+  where a factor ``n_features`` was missing. By `Andrew Tulloch`_
+
+- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
+  classes when using distance weighting and having identical data points.
+  By `Garret-R <https://github.com/Garrett-R>`_.
+
+- Fixed round off errors with non positive-definite covariance matrices
+  in GMM. By :user:`Alexis Mignon <AlexisMignon>`.
+
+- Fixed a error in the computation of conditional probabilities in
+  :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
+
+- Make the method ``radius_neighbors`` of
+  :class:`neighbors.NearestNeighbors` return the samples lying on the
+  boundary for ``algorithm='brute'``. By `Yan Yi`_.
+
+- Flip sign of ``dual_coef_`` of :class:`svm.SVC`
+  to make it consistent with the documentation and
+  ``decision_function``. By Artem Sobolev.
+
+- Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
+  We now use the weighted average of targets (secondary method). By
+  `Andreas Müller`_ and `Michael Bommarito <http://bommaritollc.com/>`_.
+
+API changes summary
+-------------------
+
+- :class:`GridSearchCV <grid_search.GridSearchCV>` and
+  :func:`cross_val_score <cross_validation.cross_val_score>` and other
+  meta-estimators don't convert pandas DataFrames into arrays any more,
+  allowing DataFrame specific operations in custom estimators.
+
+- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
+  :func:`predict_proba_ovr`,
+  :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
+  :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
+  are deprecated. Use the underlying estimators instead.
+
+- Nearest neighbors estimators used to take arbitrary keyword arguments
+  and pass these to their distance metric. This will no longer be supported
+  in scikit-learn 0.18; use the ``metric_params`` argument instead.
+
+- `n_jobs` parameter of the fit method shifted to the constructor of the
+       LinearRegression class.
+
+- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
+  now returns two probabilities per sample in the multiclass case; this
+  is consistent with other estimators and with the method's documentation,
+  but previous versions accidentally returned only the positive
+  probability. Fixed by Will Lamond and `Lars Buitinck`_.
+
+- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
+  to False. Setting precompute to "auto" was found to be slower when
+  n_samples > n_features since the computation of the Gram matrix is
+  computationally expensive and outweighs the benefit of fitting the Gram
+  for just one alpha.
+  ``precompute="auto"`` is now deprecated and will be removed in 0.18
+  By `Manoj Kumar`_.
+
+- Expose ``positive`` option in :func:`linear_model.enet_path` and
+  :func:`linear_model.enet_path` which constrains coefficients to be
+  positive. By `Manoj Kumar`_.
+
+- Users should now supply an explicit ``average`` parameter to
+  :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` when performing multiclass
+  or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
+
+- `scoring` parameter for cross validation now accepts `'f1_micro'`,
+  `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
+  only. Similar changes apply to `'precision'` and `'recall'`.
+  By `Joel Nothman`_.
+
+- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
+  :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
+  been removed. They were deprecated since 0.14
+
+- From now onwards, all estimators will uniformly raise ``NotFittedError``
+  (:class:`utils.validation.NotFittedError`), when any of the ``predict``
+  like methods are called before the model is fit. By `Raghav RV`_.
+
+- Input data validation was refactored for more consistent input
+  validation. The ``check_arrays`` function was replaced by ``check_array``
+  and ``check_X_y``. By `Andreas Müller`_.
+
+- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
+  ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
+  then for every sample this avoids setting the sample itself as the
+  first nearest neighbor. By `Manoj Kumar`_.
+
+- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
+  and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
+  set by the user. If set to True, then the sample itself is considered
+  as the first nearest neighbor.
+
+- `thresh` parameter is deprecated in favor of new `tol` parameter in
+  :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
+  section for details. By `Hervé Bredin`_.
+
+- Estimators will treat input with dtype object as numeric when possible.
+  By `Andreas Müller`_
+
+- Estimators now raise `ValueError` consistently when fitted on empty
+  data (less than 1 sample or less than 1 feature for 2D input).
+  By `Olivier Grisel`_.
+
+
+- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
+  :class:`linear_model.PassiveAgressiveClassifier` and
+  :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
+
+- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
+  `random_state` parameter is deprecated. By :user:`Erich Schubert <kno10>`.
+
+Code Contributors
+-----------------
+A. Flaxman, Aaron Schumacher, Aaron Staple, abhishek thakur, Akshay, akshayah3,
+Aldrian Obaja, Alexander Fabisch, Alexandre Gramfort, Alexis Mignon, Anders
+Aagaard, Andreas Mueller, Andreas van Cranenburgh, Andrew Tulloch, Andrew
+Walker, Antony Lee, Arnaud Joly, banilo, Barmaley.exe, Ben Davies, Benedikt
+Koehler, bhsu, Boris Feld, Borja Ayerdi, Boyuan Deng, Brent Pedersen, Brian
+Wignall, Brooke Osborn, Calvin Giles, Cathy Deng, Celeo, cgohlke, chebee7i,
+Christian Stade-Schuldt, Christof Angermueller, Chyi-Kwei Yau, CJ Carey,
+Clemens Brunner, Daiki Aminaka, Dan Blanchard, danfrankj, Danny Sullivan, David
+Fletcher, Dmitrijs Milajevs, Dougal J. Sutherland, Erich Schubert, Fabian
+Pedregosa, Florian Wilhelm, floydsoft, Félix-Antoine Fortin, Gael Varoquaux,
+Garrett-R, Gilles Louppe, gpassino, gwulfs, Hampus Bengtsson, Hamzeh Alsalhi,
+Hanna Wallach, Harry Mavroforakis, Hasil Sharma, Helder, Herve Bredin,
+Hsiang-Fu Yu, Hugues SALAMIN, Ian Gilmore, Ilambharathi Kanniah, Imran Haque,
+isms, Jake VanderPlas, Jan Dlabal, Jan Hendrik Metzen, Jatin Shah, Javier López
+Peña, jdcaballero, Jean Kossaifi, Jeff Hammerbacher, Joel Nothman, Jonathan
+Helmus, Joseph, Kaicheng Zhang, Kevin Markham, Kyle Beauchamp, Kyle Kastner,
+Lagacherie Matthieu, Lars Buitinck, Laurent Direr, leepei, Loic Esteve, Luis
+Pedro Coelho, Lukas Michelbacher, maheshakya, Manoj Kumar, Manuel, Mario
+Michael Krell, Martin, Martin Billinger, Martin Ku, Mateusz Susik, Mathieu
+Blondel, Matt Pico, Matt Terry, Matteo Visconti dOC, Matti Lyra, Max Linke,
+Mehdi Cherti, Michael Bommarito, Michael Eickenberg, Michal Romaniuk, MLG,
+mr.Shu, Nelle Varoquaux, Nicola Montecchio, Nicolas, Nikolay Mayorov, Noel
+Dawe, Okal Billy, Olivier Grisel, Óscar Nájera, Paolo Puggioni, Peter
+Prettenhofer, Pratap Vardhan, pvnguyen, queqichao, Rafael Carrascosa, Raghav R
+V, Rahiel Kasim, Randall Mason, Rob Zinkov, Robert Bradshaw, Saket Choudhary,
+Sam Nicholls, Samuel Charron, Saurabh Jha, sethdandridge, sinhrks, snuderl,
+Stefan Otte, Stefan van der Walt, Steve Tjoa, swu, Sylvain Zimmer, tejesh95,
+terrycojones, Thomas Delteil, Thomas Unterthiner, Tomas Kazmar, trevorstephens,
+tttthomasssss, Tzu-Ming Kuo, ugurcaliskan, ugurthemaster, Vinayak Mehta,
+Vincent Dubourg, Vjacheslav Murashkin, Vlad Niculae, wadawson, Wei Xue, Will
+Lamond, Wu Jiang, x0l, Xinfan Meng, Yan Yi, Yu-Chin
+
diff --git a/doc/whats_new/v0.17.rst b/doc/whats_new/v0.17.rst
new file mode 100644
index 0000000000000..35e895e5d4188
--- /dev/null
+++ b/doc/whats_new/v0.17.rst
@@ -0,0 +1,511 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_17_1:
+
+Version 0.17.1
+==============
+
+**February 18, 2016**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+
+- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
+  ``joblib.Parallel`` that can silently yield to wrong results when working
+  on datasets larger than 1MB:
+  https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
+
+- Fixed reading of Bunch pickles generated with scikit-learn
+  version <= 0.16. This can affect users who have already
+  downloaded a dataset with scikit-learn 0.16 and are loading it
+  with scikit-learn 0.17. See :issue:`6196` for
+  how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
+  Esteve`_.
+
+- Fixed a bug that prevented using ROC AUC score to perform grid search on
+  several CPU / cores on large arrays. See :issue:`6147`
+  By `Olivier Grisel`_.
+
+- Fixed a bug that prevented to properly set the ``presort`` parameter
+  in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
+  By Andrew McCulloh.
+
+- Fixed a joblib error when evaluating the perplexity of a
+  :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
+  By Chyi-Kwei Yau.
+
+
+.. _changes_0_17:
+
+Version 0.17
+============
+
+**November 5, 2015**
+
+Changelog
+---------
+
+New features
+............
+
+- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
+  calling `partial_fit`. By :user:`Giorgio Patrini <giorgiop>`.
+
+- The new class :class:`ensemble.VotingClassifier` implements a
+  "majority rule" / "soft voting" ensemble classifier to combine
+  estimators for classification. By `Sebastian Raschka`_.
+
+- The new class :class:`preprocessing.RobustScaler` provides an
+  alternative to :class:`preprocessing.StandardScaler` for feature-wise
+  centering and range normalization that is robust to outliers.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.MaxAbsScaler` provides an
+  alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
+  range normalization when the data is already centered or sparse.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.FunctionTransformer` turns a Python
+  function into a ``Pipeline``-compatible transformer object.
+  By Joe Jevnik.
+
+- The new classes :class:`cross_validation.LabelKFold` and
+  :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
+  respectively similar to :class:`cross_validation.KFold` and
+  :class:`cross_validation.ShuffleSplit`, except that the folds are
+  conditioned on a label array. By `Brian McFee`_, :user:`Jean
+  Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
+
+- :class:`decomposition.LatentDirichletAllocation` implements the Latent
+  Dirichlet Allocation topic model with online  variational
+  inference. By :user:`Chyi-Kwei Yau <chyikwei>`, with code based on an implementation
+  by Matt Hoffman. (:issue:`3659`)
+
+- The new solver ``sag`` implements a Stochastic Average Gradient descent
+  and is available in both :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. This solver is very efficient for large
+  datasets. By :user:`Danny Sullivan <dsullivan7>` and `Tom Dupre la Tour`_.
+  (:issue:`4738`)
+
+- The new solver ``cd`` implements a Coordinate Descent in
+  :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
+  still available setting new parameter ``solver`` to ``pg``, but is
+  deprecated and will be removed in 0.19, along with
+  :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
+  ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
+  ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
+  shuffling step in the ``cd`` solver.
+  By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
+
+Enhancements
+............
+- :class:`manifold.TSNE` now supports approximate optimization via the
+  Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
+  (:issue:`4025`)
+
+- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
+  as implemented in the ``mean_shift`` function. By :user:`Martino
+  Sorbaro <martinosorb>`.
+
+- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
+  By `Jan Hendrik Metzen`_.
+
+- :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
+  By `Arnaud Joly`_.
+
+- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
+  By :user:`Cory Lorenz <clorenz7>`.
+
+- Added the :func:`metrics.label_ranking_loss` metric.
+  By `Arnaud Joly`_.
+
+- Added the :func:`metrics.cohen_kappa_score` metric.
+
+- Added a ``warm_start`` constructor parameter to the bagging ensemble
+  models to increase the size of the ensemble. By :user:`Tim Head <betatim>`.
+
+- Added option to use multi-output regression metrics without averaging.
+  By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
+
+- Added ``stratify`` option to :func:`cross_validation.train_test_split`
+  for stratified splitting. By Miroslav Batchkarov.
+
+- The :func:`tree.export_graphviz` function now supports aesthetic
+  improvements for :class:`tree.DecisionTreeClassifier` and
+  :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
+  by their majority class or impurity, showing variable names, and using
+  node proportions instead of raw sample counts. By `Trevor Stephens`_.
+
+- Improved speed of ``newton-cg`` solver in
+  :class:`linear_model.LogisticRegression`, by avoiding loss computation.
+  By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
+
+- The ``class_weight="auto"`` heuristic in classifiers supporting
+  ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
+  option, which has a simpler formula and interpretation.
+  By `Hanna Wallach`_ and `Andreas Müller`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
+  `Trevor Stephens`_.
+
+- Added backlinks from the API reference pages to the user guide. By
+  `Andreas Müller`_.
+
+- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
+  :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` has been extended.
+  It is now possible to ignore one or more labels, such as where
+  a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
+
+- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
+  By `Trevor Stephens`_.
+
+- Provide an option for sparse output from
+  :func:`sklearn.metrics.pairwise.cosine_similarity`. By
+  :user:`Jaidev Deshpande <jaidevd>`.
+
+- Add :func:`minmax_scale` to provide a function interface for
+  :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
+
+- ``dump_svmlight_file`` now handles multi-label datasets.
+  By Chih-Wei Chang.
+
+- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
+  By `Tom Dupre la Tour`_.
+
+- The "Wisconsin Breast Cancer" classical two-class classification dataset
+  is now included in scikit-learn, available with
+  :func:`sklearn.dataset.load_breast_cancer`.
+
+- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
+  short tasks. This makes it possible for scikit-learn to benefit from
+  parallelism when many very short tasks are executed in parallel, for
+  instance by the :class:`grid_search.GridSearchCV` meta-estimator
+  with ``n_jobs > 1`` used with a large grid of parameters on a small
+  dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
+
+- For more details about changes in joblib 0.9.3 see the release notes:
+  https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
+
+- Improved speed (3 times per iteration) of
+  :class:`decomposition.DictLearning` with coordinate descent method
+  from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
+
+- Parallel processing (threaded) for queries of nearest neighbors
+  (using the ball-tree) by Nikolay Mayorov.
+
+- Allow :func:`datasets.make_multilabel_classification` to output
+  a sparse ``y``. By Kashif Rasul.
+
+- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
+  distances, allowing memory-efficient distance precomputation. By
+  `Joel Nothman`_.
+
+- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
+  for retrieving the leaf indices samples are predicted as. By
+  :user:`Daniel Galvez <galv>` and `Gilles Louppe`_.
+
+- Speed up decision tree regressors, random forest regressors, extra trees
+  regressors and gradient boosting estimators by computing a proxy
+  of the impurity improvement during the tree growth. The proxy quantity is
+  such that the split that maximizes this value also maximizes the impurity
+  improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber <jmschrei>`
+  and `Gilles Louppe`_.
+
+- Speed up tree based methods by reducing the number of computations needed
+  when computing the impurity measure taking into account linear
+  relationship of the computed statistics. The effect is particularly
+  visible with extra trees and on datasets with categorical or sparse
+  features. By `Arnaud Joly`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
+  method for retrieving the leaf indices each sample ends up in under
+  each try. By :user:`Jacob Schreiber <jmschrei>`.
+
+- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
+  By Sonny Hu. (:issue:`#4881`)
+
+- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
+  the stopping criterion. By Santi Villalba. (:issue:`5186`)
+
+- Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
+  , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
+
+- Added optional parameter ``warm_start`` in
+  :class:`linear_model.LogisticRegression`. If set to True, the solvers
+  ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
+  coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
+
+- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
+  the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
+  Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
+
+- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
+  and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
+  the same. This allows gradient boosters to turn off presorting when building
+  deep trees or using sparse data. By :user:`Jacob Schreiber <jmschrei>`.
+
+- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
+  default. By :user:`Graham Clenaghan <gclenaghan>`.
+
+- Added :class:`feature_selection.SelectFromModel` meta-transformer which can
+  be used along with estimators that have `coef_` or `feature_importances_`
+  attribute to select important features of the input data. By
+  :user:`Maheshakya Wijewardena <maheshakya>`, `Joel Nothman`_ and `Manoj Kumar`_.
+
+- Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
+
+- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
+  for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
+
+- Improved verbosity in :class:`decomposition.DictionaryLearning`.
+
+- :class:`ensemble.RandomForestClassifier` and
+  :class:`ensemble.RandomForestRegressor` no longer explicitly store the
+  samples used in bagging, resulting in a much reduced memory footprint for
+  storing random forest models.
+
+- Added ``positive`` option to :class:`linear_model.Lars` and
+  :func:`linear_model.lars_path` to force coefficients to be positive.
+  (:issue:`5131`)
+
+- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
+  to provide precomputed squared norms for ``X``.
+
+- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
+
+- Added the :func:`preprocessing.min_max_scale` function.
+
+Bug fixes
+.........
+
+- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
+  multi-label output. By `Andreas Müller`_.
+
+- Fixed the output shape of :class:`linear_model.RANSACRegressor` to
+  ``(n_samples, )``. By `Andreas Müller`_.
+
+- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
+  `Andreas Müller`_.
+
+- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
+  lot of memory for large discrete grids. By `Joel Nothman`_.
+
+- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
+  in the final fit. By `Manoj Kumar`_.
+
+- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
+  oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
+
+- All regressors now consistently handle and warn when given ``y`` that is of
+  shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
+  (:issue:`5431`)
+
+- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
+  `Lars Buitinck`_.
+
+- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
+  matrices when using shrinkage. By `Martin Billinger`_.
+
+- Fixed :func:`cross_validation.cross_val_predict` for estimators with
+  sparse predictions. By Buddha Prakash.
+
+- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
+  to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
+  (:issue:`5182`)
+
+- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
+  when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
+  (:issue:`5282`)
+
+- Dataset fetchers use different filenames under Python 2 and Python 3 to
+  avoid pickling compatibility issues. By `Olivier Grisel`_.
+  (:issue:`5355`)
+
+- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
+  results to depend on scale. By `Jake Vanderplas`_.
+
+- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
+  when fitting the intercept in the case of sparse data. The fix
+  automatically changes the solver to 'sag' in this case.
+  :issue:`5360` by `Tom Dupre la Tour`_.
+
+- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
+  with a large number of features and fewer samples. (:issue:`4478`)
+  By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
+
+- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+  platform dependent output, and failed on `fit_transform`.
+  By :user:`Arthur Mensch <arthurmensch>`.
+
+- Fixes to the ``Bunch`` class used to store datasets.
+
+- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
+  ``percentiles`` parameter.
+
+- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
+  leads to inconsistent results when pickling.
+
+- Fixed the conditions on when a precomputed Gram matrix needs to
+  be recomputed in :class:`linear_model.LinearRegression`,
+  :class:`linear_model.OrthogonalMatchingPursuit`,
+  :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
+
+- Fixed inconsistent memory layout in the coordinate descent solver
+  that affected :class:`linear_model.DictionaryLearning` and
+  :class:`covariance.GraphLasso`. (:issue:`5337`)
+  By `Olivier Grisel`_.
+
+- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
+  parameter.
+
+- Nearest Neighbor estimators with custom distance metrics can now be pickled.
+  (:issue:`4362`)
+
+- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
+  were not properly handled when performing grid-searches.
+
+- Fixed a bug in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV` when using
+  ``class_weight='balanced'```or ``class_weight='auto'``.
+  By `Tom Dupre la Tour`_.
+
+- Fixed bug :issue:`5495` when
+  doing OVR(SVC(decision_function_shape="ovr")). Fixed by
+  :user:`Elvis Dohmatob <dohmatob>`.
+
+
+API changes summary
+-------------------
+- Attribute `data_min`, `data_max` and `data_range` in
+  :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
+  from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
+  and `data_range_`. By :user:`Giorgio Patrini <giorgiop>`.
+
+- All Scaler classes now have an `scale_` attribute, the feature-wise
+  rescaling applied by their `transform` methods. The old attribute `std_`
+  in :class:`preprocessing.StandardScaler` is deprecated and superseded
+  by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
+
+- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
+  parameter to make their decision function of shape ``(n_samples, n_classes)``
+  by setting ``decision_function_shape='ovr'``. This will be the default behavior
+  starting in 0.19. By `Andreas Müller`_.
+
+- Passing 1D data arrays as input to estimators is now deprecated as it
+  caused confusion in how the array elements should be interpreted
+  as features or as samples. All data arrays are now expected
+  to be explicitly shaped ``(n_samples, n_features)``.
+  By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+
+- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- The ``store_covariance`` and ``tol`` parameters have been moved from
+  the fit method to the constructor in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
+  ``store_covariances`` and ``tol`` parameters have been moved from the
+  fit method to the constructor in
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- Models inheriting from ``_LearntSelectorMixin`` will no longer support the
+  transform methods. (i.e,  RandomForests, GradientBoosting, LogisticRegression,
+  DecisionTrees, SVMs and SGD related models). Wrap these models around the
+  metatransfomer :class:`feature_selection.SelectFromModel` to remove
+  features (according to `coefs_` or `feature_importances_`)
+  which are below a certain threshold value instead.
+
+- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
+  to ensure consistency of ``predict(X)`` and ``labels_``. By
+  :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+
+- Classifier and Regressor models are now tagged as such using the
+  ``_estimator_type`` attribute.
+
+- Cross-validation iterators always provide indices into training and test set,
+  not boolean masks.
+
+- The ``decision_function`` on all regressors was deprecated and will be
+  removed in 0.19.  Use ``predict`` instead.
+
+- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
+  Use :func:`datasets.fetch_lfw_pairs` instead.
+
+- The deprecated ``hmm`` module was removed.
+
+- The deprecated ``Bootstrap`` cross-validation iterator was removed.
+
+- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
+  Use :class:`clustering.AgglomerativeClustering` instead.
+
+- :func:`cross_validation.check_cv` is now a public function.
+
+- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
+  and will be removed in 0.19.
+
+- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
+  to the constructor.
+
+- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
+  method. Use the construction parameter instead.
+
+- The deprecated support for the sequence of sequences (or list of lists) multilabel
+  format was removed. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+
+- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
+  change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
+
+- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
+  :class:`preprocessing.LabelBinarizer` were removed.
+
+- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
+  gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
+  Use ``gamma="auto"`` instead.
+
+Code Contributors
+-----------------
+Aaron Schumacher, Adithya Ganesh, akitty, Alexandre Gramfort, Alexey Grigorev,
+Ali Baharev, Allen Riddell, Ando Saabas, Andreas Mueller, Andrew Lamb, Anish
+Shah, Ankur Ankan, Anthony Erlinger, Ari Rouvinen, Arnaud Joly, Arnaud Rachez,
+Arthur Mensch, banilo, Barmaley.exe, benjaminirving, Boyuan Deng, Brett Naul,
+Brian McFee, Buddha Prakash, Chi Zhang, Chih-Wei Chang, Christof Angermueller,
+Christoph Gohlke, Christophe Bourguignat, Christopher Erick Moody, Chyi-Kwei
+Yau, Cindy Sridharan, CJ Carey, Clyde-fare, Cory Lorenz, Dan Blanchard, Daniel
+Galvez, Daniel Kronovet, Danny Sullivan, Data1010, David, David D Lowe, David
+Dotson, djipey, Dmitry Spikhalskiy, Donne Martin, Dougal J. Sutherland, Dougal
+Sutherland, edson duarte, Eduardo Caro, Eric Larson, Eric Martin, Erich
+Schubert, Fernando Carrillo, Frank C. Eckert, Frank Zalkow, Gael Varoquaux,
+Ganiev Ibraim, Gilles Louppe, Giorgio Patrini, giorgiop, Graham Clenaghan,
+Gryllos Prokopis, gwulfs, Henry Lin, Hsuan-Tien Lin, Immanuel Bayer, Ishank
+Gulati, Jack Martin, Jacob Schreiber, Jaidev Deshpande, Jake Vanderplas, Jan
+Hendrik Metzen, Jean Kossaifi, Jeffrey04, Jeremy, jfraj, Jiali Mei,
+Joe Jevnik, Joel Nothman, John Kirkham, John Wittenauer, Joseph, Joshua Loyal,
+Jungkook Park, KamalakerDadi, Kashif Rasul, Keith Goodman, Kian Ho, Konstantin
+Shmelkov, Kyler Brown, Lars Buitinck, Lilian Besson, Loic Esteve, Louis Tiao,
+maheshakya, Maheshakya Wijewardena, Manoj Kumar, MarkTab marktab.net, Martin
+Ku, Martin Spacek, MartinBpr, martinosorb, MaryanMorel, Masafumi Oyamada,
+Mathieu Blondel, Matt Krump, Matti Lyra, Maxim Kolganov, mbillinger, mhg,
+Michael Heilman, Michael Patterson, Miroslav Batchkarov, Nelle Varoquaux,
+Nicolas, Nikolay Mayorov, Olivier Grisel, Omer Katz, Óscar Nájera, Pauli
+Virtanen, Peter Fischer, Peter Prettenhofer, Phil Roth, pianomania, Preston
+Parry, Raghav RV, Rob Zinkov, Robert Layton, Rohan Ramanath, Saket Choudhary,
+Sam Zhang, santi, saurabh.bansod, scls19fr, Sebastian Raschka, Sebastian
+Saeger, Shivan Sornarajah, SimonPL, sinhrks, Skipper Seabold, Sonny Hu, sseg,
+Stephen Hoover, Steven De Gryze, Steven Seguin, Theodore Vasiloudis, Thomas
+Unterthiner, Tiago Freitas Pereira, Tian Wang, Tim Head, Timothy Hopper,
+tokoroten, Tom Dupré la Tour, Trevor Stephens, Valentin Stolbunov, Vighnesh
+Birodkar, Vinayak Mehta, Vincent, Vincent Michel, vstolbunov, wangz10, Wei Xue,
+Yucheng Low, Yury Zhauniarovich, Zac Stewart, zhai_pro, Zichen Wang
+
diff --git a/doc/whats_new/v0.18.rst b/doc/whats_new/v0.18.rst
new file mode 100644
index 0000000000000..ad240d5782793
--- /dev/null
+++ b/doc/whats_new/v0.18.rst
@@ -0,0 +1,816 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_18_2:
+
+Version 0.18.2
+==============
+
+**June 20, 2017**
+
+.. topic:: Last release with Python 2.6 support
+
+    Scikit-learn 0.18 is the last major release of scikit-learn to support Python 2.6.
+    Later versions of scikit-learn will require Python 2.7 or above.
+
+
+Changelog
+---------
+
+- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
+  `Loic Esteve`_.
+
+- Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
+  :issue:`9149`.
+
+Code Contributors
+-----------------
+Aman Dalmia, Loic Esteve, Nate Guerin, Sergei Lebedev
+
+
+.. _changes_0_18_1:
+
+Version 0.18.1
+==============
+
+**November 11, 2016**
+
+Changelog
+---------
+
+Enhancements
+............
+
+- Improved ``sample_without_replacement`` speed by utilizing
+  numpy.random.permutation for most cases. As a result,
+  samples may differ in this release for a fixed random state.
+  Affected estimators:
+
+  - :class:`ensemble.BaggingClassifier`
+  - :class:`ensemble.BaggingRegressor`
+  - :class:`linear_model.RANSACRegressor`
+  - :class:`model_selection.RandomizedSearchCV`
+  - :class:`random_projection.SparseRandomProjection`
+
+  This also affects the :meth:`datasets.make_classification`
+  method.
+
+Bug fixes
+.........
+
+- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
+  parameters were not being utilised by :class:`manifold.TSNE`.
+  :issue:`6497` by :user:`Sebastian Säger <ssaeger>`
+
+- Fix bug for svm's decision values when ``decision_function_shape``
+  is ``ovr`` in :class:`svm.SVC`.
+  :class:`svm.SVC`'s decision_function was incorrect from versions
+  0.17.0 through 0.18.0.
+  :issue:`7724` by `Bing Tian Dai`_
+
+- Attribute ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
+  with SVD and Eigen solver are now of the same length. :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
+
+- Fixes issue in :ref:`univariate_feature_selection` where score
+  functions were not accepting multi-label targets. :issue:`7676`
+  by :user:`Mohammed Affan <affanv14>`
+
+- Fixed setting parameters when calling ``fit`` multiple times on
+  :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
+
+- Fixes issue in ``partial_fit`` method of
+  :class:`multiclass.OneVsRestClassifier` when number of classes used in
+  ``partial_fit`` was less than the total number of classes in the
+  data. :issue:`7786` by `Srivatsan Ramesh`_
+
+- Fixes issue in :class:`calibration.CalibratedClassifierCV` where
+  the sum of probabilities of each class for a data was not 1, and
+  ``CalibratedClassifierCV`` now handles the case where the training set
+  has less number of classes than the total data. :issue:`7799` by
+  `Srivatsan Ramesh`_
+
+- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+
+- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
+  integer inputs. :issue:`6282` by `Jake Vanderplas`_.
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301`
+  by :user:`Nelson Liu <nelson-liu>`.
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+
+- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
+  attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
+  Krivich <kiote>`.
+
+- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
+  string labels. :issue:`5874` by `Raghav RV`_.
+
+- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
+  an error when ``stratify`` is a list of string labels. :issue:`7593` by
+  `Raghav RV`_.
+
+- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
+  :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
+  because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
+  `Raghav RV`_.
+
+- All cross-validation utilities in :mod:`sklearn.model_selection` now
+  permit one time cross-validation splitters for the ``cv`` parameter. Also
+  non-deterministic cross-validation splitters (where multiple calls to
+  ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
+  The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
+  parameter setting on the split produced by the first ``split`` call
+  to the cross-validation splitter.  :issue:`7660` by `Raghav RV`_.
+
+- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
+  returned an invalid CSR matrix.
+  :issue:`7750` by :user:`CJ Carey <perimosocordiae>`.
+
+- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
+  small negative distance. :issue:`7732` by :user:`Artsion <asanakoy>`.
+
+API changes summary
+-------------------
+
+Trees and forests
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
+  Liu <nelson-liu>`.
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe.
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+
+
+Linear, kernelized and related models
+
+- Length of ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  changed for both Eigen and SVD solvers. The attribute has now a length
+  of min(n_components, n_classes - 1). :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
+
+.. _changes_0_18:
+
+Version 0.18
+============
+
+**September 28, 2016**
+
+.. topic:: Last release with Python 2.6 support
+
+    Scikit-learn 0.18 will be the last version of scikit-learn to support Python 2.6.
+    Later versions of scikit-learn will require Python 2.7 or above.
+
+.. _model_selection_changes:
+
+Model Selection Enhancements and API Changes
+--------------------------------------------
+
+- **The model_selection module**
+
+  The new module :mod:`sklearn.model_selection`, which groups together the
+  functionalities of formerly :mod:`sklearn.cross_validation`,
+  :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
+  possibilities such as nested cross-validation and better manipulation of
+  parameter searches with Pandas.
+
+  Many things will stay the same but there are some key differences. Read
+  below to know more about the changes.
+
+- **Data-independent CV splitters enabling nested cross-validation**
+
+  The new cross-validation splitters, defined in the
+  :mod:`sklearn.model_selection`, are no longer initialized with any
+  data-dependent parameters such as ``y``. Instead they expose a
+  :func:`split` method that takes in the data and yields a generator for the
+  different splits.
+
+  This change makes it possible to use the cross-validation splitters to
+  perform nested cross-validation, facilitated by
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` utilities.
+
+- **The enhanced cv_results_ attribute**
+
+  The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
+  ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
+  array corresponding to the parameter settings (i.e. search candidates).
+
+  The ``cv_results_`` dict can be easily imported into ``pandas`` as a
+  ``DataFrame`` for exploring the search results.
+
+  The ``cv_results_`` arrays include scores for each cross-validation split
+  (with keys such as ``'split0_test_score'``), as well as their mean
+  (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
+
+  The ranks for the search candidates (based on their mean
+  cross-validation score) is available at ``cv_results_['rank_test_score']``.
+
+  The parameter values for each parameter is stored separately as numpy
+  masked object arrays. The value, for that search candidate, is masked if
+  the corresponding parameter is not applicable. Additionally a list of all
+  the parameter dicts are stored at ``cv_results_['params']``.
+
+- **Parameters n_folds and n_iter renamed to n_splits**
+
+  Some parameter names have changed:
+  The ``n_folds`` parameter in new :class:`model_selection.KFold`,
+  :class:`model_selection.GroupKFold` (see below for the name change),
+  and :class:`model_selection.StratifiedKFold` is now renamed to
+  ``n_splits``. The ``n_iter`` parameter in
+  :class:`model_selection.ShuffleSplit`, the new class
+  :class:`model_selection.GroupShuffleSplit` and
+  :class:`model_selection.StratifiedShuffleSplit` is now renamed to
+  ``n_splits``.
+
+- **Rename of splitter classes which accepts group labels along with data**
+
+  The cross-validation splitters ``LabelKFold``,
+  ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
+  been renamed to :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` respectively.
+
+  Note the change from singular to plural form in
+  :class:`model_selection.LeavePGroupsOut`.
+
+- **Fit parameter labels renamed to groups**
+
+  The ``labels`` parameter in the :func:`split` method of the newly renamed
+  splitters :class:`model_selection.GroupKFold`,
+  :class:`model_selection.LeaveOneGroupOut`,
+  :class:`model_selection.LeavePGroupsOut`,
+  :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
+  following the new nomenclature of their class names.
+
+- **Parameter n_labels renamed to n_groups**
+
+  The parameter ``n_labels`` in the newly renamed
+  :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
+
+- Training scores and Timing information
+
+  ``cv_results_`` also includes the training scores for each
+  cross-validation split (with keys such as ``'split0_train_score'``), as
+  well as their mean (``'mean_train_score'``) and standard deviation
+  (``'std_train_score'``). To avoid the cost of evaluating training score,
+  set ``return_train_score=False``.
+
+  Additionally the mean and standard deviation of the times taken to split,
+  train and score the model across all the cross-validation splits is
+  available at the key ``'mean_time'`` and ``'std_time'`` respectively.
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and Regressors
+
+- The Gaussian Process module has been reimplemented and now offers classification
+  and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
+  and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
+  implementation supports kernel engineering, gradient-based hyperparameter optimization or
+  sampling of functions from GP prior and GP posterior. Extensive documentation and
+  examples are provided. By `Jan Hendrik Metzen`_.
+
+- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron <multilayer_perceptron>`
+  :issue:`3204` by :user:`Issam H. Laradji <IssamLaradji>`
+
+- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
+  :issue:`5291` by `Manoj Kumar`_.
+
+- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
+  converts single output regressors to multi-output regressors by fitting
+  one regressor per output. By :user:`Tim Head <betatim>`.
+
+Other estimators
+
+- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
+  replace former mixture models, employing faster inference
+  for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
+  :user:`Thierry Guillemot <tguillemot>`.
+
+- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+  and it is available calling with parameter ``svd_solver='randomized'``.
+  The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
+  behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
+  calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
+  the best solver is selected depending on the size of the input and the
+  number of components requested. :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Added two functions for mutual information estimation:
+  :func:`feature_selection.mutual_info_classif` and
+  :func:`feature_selection.mutual_info_regression`. These functions can be
+  used in :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` as score functions.
+  By :user:`Andrea Bravi <AndreaBravi>` and :user:`Nikolay Mayorov <nmayorov>`.
+
+- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
+  random forests. By `Nicolas Goix`_.
+
+- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
+  Elkan's fast K-Means algorithm. By `Andreas Müller`_.
+
+Model selection and evaluation
+
+- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+  Index which measures the similarity of two clusterings of a set of points
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+  and Harabaz score to evaluate the resulting clustering of a set of points.
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- Added new cross-validation splitter
+  :class:`model_selection.TimeSeriesSplit` to handle time series data.
+  :issue:`6586` by :user:`YenChen Lin <yenchenlin>`
+
+- The cross-validation iterators are replaced by cross-validation splitters
+  available from :mod:`sklearn.model_selection`, allowing for nested
+  cross-validation. See :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
+  the mean absolute error. This criterion can also be used in
+  :class:`ensemble.ExtraTreesRegressor`,
+  :class:`ensemble.RandomForestRegressor`, and the gradient boosting
+  estimators. :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+
+- Added weighted impurity-based early stopping criterion for decision tree
+  growth. :issue:`6954` by :user:`Nelson Liu <nelson-liu>`
+
+- The random forest, extra tree and decision tree estimators now has a
+  method ``decision_path`` which returns the decision path of samples in
+  the tree. By `Arnaud Joly`_.
+
+- A new example has been added unveiling the decision tree structure.
+  By `Arnaud Joly`_.
+
+- Random forest, extra trees, decision trees and gradient boosting estimator
+  accept the parameter ``min_samples_split`` and ``min_samples_leaf``
+  provided as a percentage of the training samples. By :user:`yelite <yelite>` and `Arnaud Joly`_.
+
+- Gradient boosting estimators accept the parameter ``criterion`` to specify
+  to splitting criterion used in built decision trees.
+  :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+
+- The memory footprint is reduced (sometimes greatly) for
+  :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
+  i.e, :class:`ensemble.BaggingClassifier`,
+  :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
+  by dynamically generating attribute ``estimators_samples_`` only when it is
+  needed. By :user:`David Staub <staubda>`.
+
+- Added ``n_jobs`` and ``sample_weight`` parameters for
+  :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
+  :issue:`5805` by :user:`Ibraim Ganiev <olologin>`.
+
+Linear, kernelized and related models
+
+- In :class:`linear_model.LogisticRegression`, the SAG solver is now
+  available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
+
+- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
+  :class:`svm.LinearSVR` now support ``sample_weight``.
+  By :user:`Imaculate <Imaculate>`.
+
+- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
+  error on the samples for every trial. By `Manoj Kumar`_.
+
+- Prediction of out-of-sample events with Isotonic Regression
+  (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
+  data). By :user:`Jonathan Arfa <jarfa>`.
+
+- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
+  `O(n^2)` behavior in pathological cases, and is also generally faster
+  (:issue:`#6691`). By `Antony Lee`_.
+
+- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
+  through the parameter ``priors``. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
+  now works with ``np.float32`` input data without converting it
+  into ``np.float64``. This allows to reduce the memory
+  consumption. :issue:`6913` by :user:`YenChen Lin <yenchenlin>`.
+
+- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
+  now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
+  :issue:`5762` by :user:`Utkarsh Upadhyay <musically-ut>`.
+
+Decomposition, manifold learning and clustering
+
+- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
+  data matrix of original shape. By :user:`Anish Shah <AnishShah>`.
+
+- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
+  with ``np.float32`` and ``np.float64`` input data without converting it.
+  This allows to reduce the memory consumption by using ``np.float32``.
+  :issue:`6846` by :user:`Sebastian Säger <ssaeger>` and
+  :user:`YenChen Lin <yenchenlin>`.
+
+Preprocessing and feature selection
+
+- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
+  :issue:`5929` by :user:`Konstantin Podshumok <podshumok>`.
+
+- :class:`feature_extraction.FeatureHasher` now accepts string values.
+  :issue:`6173` by :user:`Ryad Zenine <ryadzenine>` and
+  :user:`Devashish Deshpande <dsquareindia>`.
+
+- Keyword arguments can now be supplied to ``func`` in
+  :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
+  parameter. By `Brian McFee`_.
+
+- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
+  now accept score functions that take X, y as input and return only the scores.
+  By :user:`Nikolay Mayorov <nmayorov>`.
+
+Model evaluation and meta-estimators
+
+- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
+  now support ``partial_fit``. By :user:`Asish Panda <kaichogami>` and
+  :user:`Philipp Dowling <phdowling>`.
+
+- Added support for substituting or disabling :class:`pipeline.Pipeline`
+  and :class:`pipeline.FeatureUnion` components using the ``set_params``
+  interface that powers :mod:`sklearn.grid_search`.
+  See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
+  By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
+
+- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
+  (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
+  into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
+  more information. :issue:`6697` by `Raghav RV`_.
+
+- Generalization of :func:`model_selection.cross_val_predict`.
+  One can pass method names such as `predict_proba` to be used in the cross
+  validation framework instead of the default `predict`.
+  By :user:`Ori Ziv <zivori>` and :user:`Sears Merritt <merritts>`.
+
+- The training scores and time taken for training followed by scoring for
+  each search candidate are now available at the ``cv_results_`` dict.
+  See :ref:`model_selection_changes` for more information.
+  :issue:`7325` by :user:`Eugene Chen <eyc88>` and `Raghav RV`_.
+
+Metrics
+
+- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
+  the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+  :issue:`7239` by :user:`Hong Guangguo <hongguangguo>` with help from
+  :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
+
+- Support sparse contingency matrices in cluster evaluation
+  (:mod:`metrics.cluster.supervised`) to scale to a large number of
+  clusters.
+  :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
+  By :user:`Jatin Shah <jatinshah>` and `Raghav RV`_.
+
+- Speed up :func:`metrics.silhouette_score` by using vectorized operations.
+  By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
+  By :user:`Bernardo Stein <DanielSidhion>`.
+
+Miscellaneous
+
+- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
+  the score on the test folds in parallel. By `Manoj Kumar`_
+
+- Codebase does not contain C/C++ cython generated files: they are
+  generated during build. Distribution packages will still contain generated
+  C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
+
+- Reduce the memory usage for 32-bit float input arrays of
+  :func:`utils.sparse_func.mean_variance_axis` and
+  :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
+  fused types. By :user:`YenChen Lin <yenchenlin>`.
+
+- The :func:`ignore_warnings` now accept a category argument to ignore only
+  the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
+
+- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
+  :func:`load_iris` dataset
+  :issue:`7049`,
+  :func:`load_breast_cancer` dataset
+  :issue:`7152`,
+  :func:`load_digits` dataset,
+  :func:`load_diabetes` dataset,
+  :func:`load_linnerud` dataset,
+  :func:`load_boston` dataset
+  :issue:`7154` by
+  :user:`Manvendra Singh<manu-chroma>`.
+
+- Simplification of the ``clone`` function, deprecate support for estimators
+  that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
+
+- When unpickling a scikit-learn estimator in a different version than the one
+  the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
+  on model persistence <persistence_limitations>` for more details. (:issue:`7248`)
+  By `Andreas Müller`_.
+
+Bug fixes
+.........
+
+Trees and ensembles
+
+- Random forest, extra trees, decision trees and gradient boosting
+  won't accept anymore ``min_samples_split=1`` as at least 2 samples
+  are required to split a decision tree node. By `Arnaud Joly`_
+
+- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
+  ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
+  by `Sebastian Raschka`_.
+
+- Fix bug where :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` would perform poorly if the
+  ``random_state`` was fixed
+  (:issue:`7411`). By `Joel Nothman`_.
+
+- Fix bug in ensembles with randomization where the ensemble would not
+  set ``random_state`` on base estimators in a pipeline or similar nesting.
+  (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
+  :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
+  and :class:`ensemble.AdaBoostRegressor` will now differ from previous
+  versions. By `Joel Nothman`_.
+
+Linear, kernelized and related models
+
+- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
+  :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
+  (:issue:`6764`). By :user:`Wenhua Yang <geekoala>`.
+
+- Fix bug in :class:`linear_model.LogisticRegressionCV` where
+  ``solver='liblinear'`` did not accept ``class_weights='balanced``.
+  (:issue:`6817`). By `Tom Dupre la Tour`_.
+
+- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
+  occurred when there were outliers being labelled and a weight function
+  specified (:issue:`6902`).  By
+  `LeonieBorne <https://github.com/LeonieBorne>`_.
+
+- Fix :class:`linear_model.ElasticNet` sparse decision function to match
+  output with dense in the multioutput case.
+
+Decomposition, manifold learning and clustering
+
+- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+
+- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
+  In practice this is enough for obtaining a good approximation of the
+  true eigenvalues/vectors in the presence of noise. When `n_components` is
+  small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
+  a higher number. This improves precision with few components.
+  :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
+
+- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
+  and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+  New features) is fixed. `components_` are stored with no whitening.
+  :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
+  Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
+
+- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
+  occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
+  :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
+  and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
+  :user:`Peter Fischer <yanlend>`.
+
+- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
+  of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
+  correct results. By :user:`JPFrancoia <JPFrancoia>`
+
+Preprocessing and feature selection
+
+- :func:`preprocessing.data._transform_selected` now always passes a copy
+  of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
+  Oliveira <https://github.com/caioaao>`_.
+
+Model evaluation and meta-estimators
+
+- :class:`model_selection.StratifiedKFold` now raises error if all n_labels
+  for individual classes is less than n_folds.
+  :issue:`6182` by :user:`Devashish Deshpande <dsquareindia>`.
+
+- Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
+  where train and test sample could overlap in some edge cases,
+  see :issue:`6121` for
+  more details. By `Loic Esteve`_.
+
+- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
+  return splits of size ``train_size`` and ``test_size`` in all cases
+  (:issue:`6472`). By `Andreas Müller`_.
+
+- Cross-validation of :class:`OneVsOneClassifier` and
+  :class:`OneVsRestClassifier` now works with precomputed kernels.
+  :issue:`7350` by :user:`Russell Smith <rsmith54>`.
+
+- Fix incomplete ``predict_proba`` method delegation from
+  :class:`model_selection.GridSearchCV` to
+  :class:`linear_model.SGDClassifier` (:issue:`7159`)
+  by `Yichuan Liu <https://github.com/yl565>`_.
+
+Metrics
+
+- Fix bug in :func:`metrics.silhouette_score` in which clusters of
+  size 1 were incorrectly scored. They should get a score of 0.
+  By `Joel Nothman`_.
+
+- Fix bug in :func:`metrics.silhouette_samples` so that it now works with
+  arbitrary labels, not just those ranging from 0 to n_clusters - 1.
+
+- Fix bug where expected and adjusted mutual information were incorrect if
+  cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
+
+- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
+  boolean arrays when required in ``scipy.spatial.distance``.
+  :issue:`5460` by `Tom Dupre la Tour`_.
+
+- Fix sparse input support in :func:`metrics.silhouette_score` as well as
+  example examples/text/document_clustering.py. By :user:`YenChen Lin <yenchenlin>`.
+
+- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
+  longer round ``y_score`` values when creating ROC curves; this was causing
+  problems for users with very small differences in scores (:issue:`7353`).
+
+Miscellaneous
+
+- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
+  that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
+  (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
+
+- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
+  power iterations are requested, since it applies LU normalization by default.
+  If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
+  Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
+  with them as parameters, could not be passed to :func:`base.clone`.
+  By `Loic Esteve`_.
+
+- :func:`datasets.load_svmlight_file` now is able to read long int QID values.
+  :issue:`7101` by :user:`Ibraim Ganiev <olologin>`.
+
+
+API changes summary
+-------------------
+
+Linear, kernelized and related models
+
+- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
+  Use ``loss`` instead. By `Manoj Kumar`_.
+
+- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
+  :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa <jarfa>`.
+
+Decomposition, manifold learning and clustering
+
+- The old :class:`mixture.DPGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_process'``).
+  The new class solves the computational
+  problems of the old class and computes the Gaussian mixture with a
+  Dirichlet process prior faster than before.
+  :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.VBGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_distribution'``).
+  The new class solves the computational
+  problems of the old class and computes the Variational Bayesian Gaussian
+  mixture faster than before.
+  :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.GMM` is deprecated in favor of the new
+  :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
+  faster than before and some of computational problems have been solved.
+  :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+Model evaluation and meta-estimators
+
+- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
+  :mod:`sklearn.learning_curve` have been deprecated and the classes and
+  functions have been reorganized into the :mod:`sklearn.model_selection`
+  module. Ref :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
+
+- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
+  the attribute ``cv_results_``.
+  Ref :ref:`model_selection_changes` for more information.
+  :issue:`6697` by `Raghav RV`_.
+
+- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+  by the new parameter ``n_splits`` since it can provide a consistent
+  and unambiguous interface to represent the number of train-test splits.
+  :issue:`7187` by :user:`YenChen Lin <yenchenlin>`.
+
+- ``classes`` parameter was renamed to ``labels`` in
+  :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell <srvanrell>`.
+
+- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
+  ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
+  :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut`
+  and :class:`model_selection.LeavePGroupsOut` respectively.
+  Also the parameter ``labels`` in the :func:`split` method of the newly
+  renamed splitters :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` is renamed to
+  ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
+  the parameter ``n_labels`` is renamed to ``n_groups``.
+  :issue:`6660` by `Raghav RV`_.
+
+- Error and loss names for ``scoring`` parameters are now prefixed by
+  ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
+  are deprecated and will be removed in version 0.20.
+  :issue:`7261` by :user:`Tim Head <betatim>`.
+
+Code Contributors
+-----------------
+Aditya Joshi, Alejandro, Alexander Fabisch, Alexander Loginov, Alexander
+Minyushkin, Alexander Rudy, Alexandre Abadie, Alexandre Abraham, Alexandre
+Gramfort, Alexandre Saint, alexfields, Alvaro Ulloa, alyssaq, Amlan Kar,
+Andreas Mueller, andrew giessel, Andrew Jackson, Andrew McCulloh, Andrew
+Murray, Anish Shah, Arafat, Archit Sharma, Ariel Rokem, Arnaud Joly, Arnaud
+Rachez, Arthur Mensch, Ash Hoover, asnt, b0noI, Behzad Tabibian, Bernardo,
+Bernhard Kratzwald, Bhargav Mangipudi, blakeflei, Boyuan Deng, Brandon Carter,
+Brett Naul, Brian McFee, Caio Oliveira, Camilo Lamus, Carol Willing, Cass,
+CeShine Lee, Charles Truong, Chyi-Kwei Yau, CJ Carey, codevig, Colin Ni, Dan
+Shiebler, Daniel, Daniel Hnyk, David Ellis, David Nicholson, David Staub, David
+Thaler, David Warshaw, Davide Lasagna, Deborah, definitelyuncertain, Didi
+Bar-Zev, djipey, dsquareindia, edwinENSAE, Elias Kuthe, Elvis DOHMATOB, Ethan
+White, Fabian Pedregosa, Fabio Ticconi, fisache, Florian Wilhelm, Francis,
+Francis O'Donovan, Gael Varoquaux, Ganiev Ibraim, ghg, Gilles Louppe, Giorgio
+Patrini, Giovanni Cherubin, Giovanni Lanzani, Glenn Qian, Gordon
+Mohr, govin-vatsan, Graham Clenaghan, Greg Reda, Greg Stupp, Guillaume
+Lemaitre, Gustav Mörtberg, halwai, Harizo Rajaona, Harry Mavroforakis,
+hashcode55, hdmetor, Henry Lin, Hobson Lane, Hugo Bowne-Anderson,
+Igor Andriushchenko, Imaculate, Inki Hwang, Isaac Sijaranamual,
+Ishank Gulati, Issam Laradji, Iver Jordal, jackmartin, Jacob Schreiber, Jake
+Vanderplas, James Fiedler, James Routley, Jan Zikes, Janna Brettingen, jarfa, Jason
+Laska, jblackburne, jeff levesque, Jeffrey Blackburne, Jeffrey04, Jeremy Hintz,
+jeremynixon, Jeroen, Jessica Yung, Jill-Jênn Vie, Jimmy Jia, Jiyuan Qian, Joel
+Nothman, johannah, John, John Boersma, John Kirkham, John Moeller,
+jonathan.striebel, joncrall, Jordi, Joseph Munoz, Joshua Cook, JPFrancoia,
+jrfiedler, JulianKahnert, juliathebrave, kaichogami, KamalakerDadi, Kenneth
+Lyons, Kevin Wang, kingjr, kjell, Konstantin Podshumok, Kornel Kielczewski,
+Krishna Kalyan, krishnakalyan3, Kvle Putnam, Kyle Jackson, Lars Buitinck,
+ldavid, LeiG, LeightonZhang, Leland McInnes, Liang-Chi Hsieh, Lilian Besson,
+lizsz, Loic Esteve, Louis Tiao, Léonie Borne, Mads Jensen, Maniteja Nandana,
+Manoj Kumar, Manvendra Singh, Marco, Mario Krell, Mark Bao, Mark Szepieniec,
+Martin Madsen, MartinBpr, MaryanMorel, Massil, Matheus, Mathieu Blondel,
+Mathieu Dubois, Matteo, Matthias Ekman, Max Moroz, Michael Scherer, michiaki
+ariga, Mikhail Korobov, Moussa Taifi, mrandrewandrade, Mridul Seth, nadya-p,
+Naoya Kanai, Nate George, Nelle Varoquaux, Nelson Liu, Nick James,
+NickleDave, Nico, Nicolas Goix, Nikolay Mayorov, ningchi, nlathia,
+okbalefthanded, Okhlopkov, Olivier Grisel, Panos Louridas, Paul Strickland,
+Perrine Letellier, pestrickland, Peter Fischer, Pieter, Ping-Yao, Chang,
+practicalswift, Preston Parry, Qimu Zheng, Rachit Kansal, Raghav RV,
+Ralf Gommers, Ramana.S, Rammig, Randy Olson, Rob Alexander, Robert Lutz,
+Robin Schucker, Rohan Jain, Ruifeng Zheng, Ryan Yu, Rémy Léone, saihttam,
+Saiwing Yeung, Sam Shleifer, Samuel St-Jean, Sartaj Singh, Sasank Chilamkurthy,
+saurabh.bansod, Scott Andrews, Scott Lowe, seales, Sebastian Raschka, Sebastian
+Saeger, Sebastián Vanrell, Sergei Lebedev, shagun Sodhani, shanmuga cv,
+Shashank Shekhar, shawpan, shengxiduan, Shota, shuckle16, Skipper Seabold,
+sklearn-ci, SmedbergM, srvanrell, Sébastien Lerique, Taranjeet, themrmax,
+Thierry, Thierry Guillemot, Thomas, Thomas Hallock, Thomas Moreau, Tim Head,
+tKammy, toastedcornflakes, Tom, TomDLT, Toshihiro Kamishima, tracer0tong, Trent
+Hauck, trevorstephens, Tue Vo, Varun, Varun Jewalikar, Viacheslav, Vighnesh
+Birodkar, Vikram, Villu Ruusmann, Vinayak Mehta, walter, waterponey, Wenhua
+Yang, Wenjian Huang, Will Welch, wyseguy7, xyguo, yanlend, Yaroslav Halchenko,
+yelite, Yen, YenChenLin, Yichuan Liu, Yoav Ram, Yoshiki, Zheng RuiFeng, zivori, Óscar Nájera
+
diff --git a/doc/whats_new/v0.19.rst b/doc/whats_new/v0.19.rst
new file mode 100644
index 0000000000000..eb29ab1599b31
--- /dev/null
+++ b/doc/whats_new/v0.19.rst
@@ -0,0 +1,923 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_19:
+
+Version 0.19
+============
+
+**Release Candidate (0.19b2) July 17, 2017**
+
+Highlights
+----------
+
+We are excited to release a number of great new features including
+:class:`neighbors.LocalOutlierFactor` for anomaly detection,
+:class:`preprocessing.QuantileTransformer` for robust feature transformation,
+and the :class:`multioutput.ClassifierChain` meta-estimator to simply account
+for dependencies between classes in multilabel problems. We have some new
+algorithms in existing estimators, such as multiplicative update in
+:class:`decomposition.NMF` and multinomial
+:class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``).
+
+Cross validation is now able to return the results from multiple metric
+evaluations. The new :func:`model_selection.cross_validate` can return many
+scores on the test data as well as training set performance and timings, and we
+have extended the ``scoring`` and ``refit`` parameters for grid/randomized
+search :ref:`to handle multiple metrics <multimetric_grid_search>`.
+
+You can also learn faster.  For instance, the :ref:`new option to cache
+transformations <pipeline_cache>` in :class:`pipeline.Pipeline` makes grid
+search over pipelines including slow transformations much more efficient.  And
+you can predict faster: if you're sure you know what you're doing, you can turn
+off validating that the input is finite using :func:`config_context`.
+
+We've made some important fixes too.  We've fixed a longstanding implementation
+error in :func:`metrics.average_precision_score`, so please be cautious with
+prior results reported from that function.  A number of errors in the
+:class:`manifold.TSNE` implementation have been fixed, particularly in the
+default Barnes-Hut approximation.  :class:`semi_supervised.LabelSpreading` and
+:class:`semi_supervised.LabelPropagation` have had substantial fixes.
+LabelPropagation was previously broken. LabelSpreading should now correctly
+respect its alpha parameter.
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
+- :class:`cross_decomposition.PLSRegression`
+  with ``scale=True`` (bug fix)
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
+- gradient boosting ``loss='quantile'`` (bug fix)
+- :class:`ensemble.IsolationForest` (bug fix)
+- :class:`feature_selection.SelectFdr` (bug fix)
+- :class:`linear_model.RANSACRegressor` (bug fix)
+- :class:`linear_model.LassoLars` (bug fix)
+- :class:`linear_model.LassoLarsIC` (bug fix)
+- :class:`manifold.TSNE` (bug fix)
+- :class:`neighbors.NearestCentroid` (bug fix)
+- :class:`semi_supervised.LabelSpreading` (bug fix)
+- :class:`semi_supervised.LabelPropagation` (bug fix)
+- tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- Added :class:`multioutput.ClassifierChain` for multi-label
+  classification. By `Adam Kleczewski <adamklec>`_.
+
+- Added solver ``'saga'`` that implements the improved version of Stochastic
+  Average Gradient, in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. It allows the use of L1 penalty with
+  multinomial logistic loss, and behaves marginally better than 'sag'
+  during the first epochs of ridge and logistic regression.
+  :issue:`8446` by `Arthur Mensch`_.
+
+Other estimators
+
+- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
+  detection based on nearest neighbors.
+  :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
+
+- Added :class:`preprocessing.QuantileTransformer` class and
+  :func:`preprocessing.quantile_transform` function for features
+  normalization based on quantiles.
+  :issue:`8363` by :user:`Denis Engemann <dengemann>`,
+  :user:`Guillaume Lemaitre <glemaitre>`, `Olivier Grisel`_, `Raghav RV`_,
+  :user:`Thierry Guillemot <tguillemot>`, and `Gael Varoquaux`_.
+
+- The new solver ``'mu'`` implements a Multiplicate Update in
+  :class:`decomposition.NMF`, allowing the optimization of all
+  beta-divergences, including the Frobenius norm, the generalized
+  Kullback-Leibler divergence and the Itakura-Saito divergence.
+  :issue:`5295` by `Tom Dupre la Tour`_.
+
+Model selection and evaluation
+
+- :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` now support simultaneous
+  evaluation of multiple metrics. Refer to the
+  :ref:`multimetric_grid_search` section of the user guide for more
+  information. :issue:`7388` by `Raghav RV`_
+
+- Added the :func:`model_selection.cross_validate` which allows evaluation
+  of multiple metrics. This function returns a dict with more useful
+  information from cross-validation such as the train scores, fit times and
+  score times.
+  Refer to :ref:`multimetric_cross_validation` section of the userguide
+  for more information. :issue:`7388` by `Raghav RV`_
+
+- Added :func:`metrics.mean_squared_log_error`, which computes
+  the mean square error of the logarithmic transformation of targets,
+  particularly useful for targets with an exponential trend.
+  :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
+
+- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
+  compute Discounted cumulative gain (DCG) and Normalized discounted
+  cumulative gain (NDCG).
+  :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
+
+- Added the :class:`model_selection.RepeatedKFold` and
+  :class:`model_selection.RepeatedStratifiedKFold`.
+  :issue:`8120` by `Neeraj Gangwar`_.
+
+Miscellaneous
+
+- Validation that input data contains no NaN or inf can now be suppressed
+  using :func:`config_context`, at your own risk. This will save on runtime,
+  and may be particularly useful for prediction time. :issue:`7548` by
+  `Joel Nothman`_.
+
+- Added a test to ensure parameter listing in docstrings match the
+  function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
+  `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- The ``min_weight_fraction_leaf`` constraint in tree construction is now
+  more efficient, taking a fast path to declare a node a leaf if its weight
+  is less than 2 * the minimum. Note that the constructed tree will be
+  different from previous versions where ``min_weight_fraction_leaf`` is
+  used. :issue:`7441` by :user:`Nelson Liu <nelson-liu>`.
+
+- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
+  now support sparse input for prediction.
+  :issue:`6101` by :user:`Ibraim Ganiev <olologin>`.
+
+- :class:`ensemble.VotingClassifier` now allows changing estimators by using
+  :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
+  removed by setting it to ``None``.
+  :issue:`7674` by :user:`Yichuan Liu <yl565>`.
+
+- :func:`tree.export_graphviz` now shows configurable number of decimal
+  places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
+  to change output shape of `transform` method to 2 dimensional.
+  :issue:`7794` by :user:`Ibraim Ganiev <olologin>` and
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+Linear, kernelized and related models
+
+- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron` now expose ``max_iter`` and
+  ``tol`` parameters, to handle convergence more precisely.
+  ``n_iter`` parameter is deprecated, and the fitted estimator exposes
+  a ``n_iter_`` attribute, with actual number of iterations before
+  convergence. :issue:`5036` by `Tom Dupre la Tour`_.
+
+- Added ``average`` parameter to perform weight averaging in
+  :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
+  by :user:`Andrea Esuli <aesuli>`.
+
+- :class:`linear_model.RANSACRegressor` no longer throws an error
+  when calling ``fit`` if no inliers are found in its first iteration.
+  Furthermore, causes of skipped iterations are tracked in newly added
+  attributes, ``n_skips_*``.
+  :issue:`7914` by :user:`Michael Horrell <mthorrell>`.
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is a lot faster with ``return_std=True``. :issue:`8591` by
+  :user:`Hadrien Bertrand <hbertrand>`.
+
+- Added ``return_std`` to ``predict`` method of
+  :class:`linear_model.ARDRegression` and
+  :class:`linear_model.BayesianRidge`.
+  :issue:`7838` by :user:`Sergey Feldman <sergeyf>`.
+
+- Memory usage enhancements: Prevent cast from float32 to float64 in:
+  :class:`linear_model.MultiTaskElasticNet`;
+  :class:`linear_model.LogisticRegression` when using newton-cg solver; and
+  :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr
+  solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich <massich>` and :user:`Nicolas
+  Cordier <ncordier>` and :user:`Thierry Guillemot <tguillemot>`.
+
+Other predictors
+
+- Custom metrics for the :mod:`neighbors` binary trees now have
+  fewer constraints: they must take two 1d-arrays and return a float.
+  :issue:`6288` by `Jake Vanderplas`_.
+
+- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
+  appropriate algorithm for all input types and metrics. :issue:`9145` by
+  :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
+  <preddy5Pradyumna>`.
+
+Decomposition, manifold learning and clustering
+
+- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans`
+  now use significantly less memory when assigning data points to their
+  nearest cluster center. :issue:`7721` by :user:`Jon Crall <Erotemic>`.
+
+- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and
+  :class:`decomposition.TruncatedSVD` now expose the singular values
+  from the underlying SVD. They are stored in the attribute
+  ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
+  :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
+
+- :class:`decomposition.NMF` now faster when ``beta_loss=0``.
+  :issue:`9277` by :user:`hongkahjun`.
+
+- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE`
+  :issue:`7089` by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE`
+  so the results are closer to the one from the reference implementation
+  `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_ by :user:`Thomas
+  Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Memory usage enhancements: Prevent cast from float32 to float64 in
+  :class:`decomposition.PCA` and
+  :func:`decomposition.randomized_svd_low_rank`.
+  :issue:`9067` by `Raghav RV`_.
+
+Preprocessing and feature selection
+
+- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel`
+  to enable selection of the norm order when ``coef_`` is more than 1D.
+  :issue:`6181` by :user:`Antoine Wendlinger <antoinewdg>`.
+
+- Added ability to use sparse matrices in :func:`feature_selection.f_regression`
+  with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
+
+- Small performance improvement to n-gram creation in
+  :mod:`feature_extraction.text` by binding methods for loops and
+  special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
+
+- Relax assumption on the data for the
+  :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2
+  kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`,
+  the transform function should not check whether ``X < 0`` but whether ``X <
+  -self.skewedness``. :issue:`7573` by :user:`Romain Brault <RomainBrault>`.
+
+- Made default kernel parameters kernel-dependent in
+  :class:`kernel_approximation.Nystroem`.
+  :issue:`5229` by :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+
+Model evaluation and meta-estimators
+
+- :class:`pipeline.Pipeline` is now able to cache transformers
+  within a pipeline by using the ``memory`` constructor parameter.
+  :issue:`7990` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its
+  ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina
+  Rakotoarison <herilalaina>`.
+
+- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
+  :issue:`7723` by :user:`Mikhail Korobov <kmike>`.
+
+- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`.
+  A ``TypeError`` will be raised for any other kwargs. :issue:`8028`
+  by :user:`Alexander Booth <alexandercbooth>`.
+
+- :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV` and
+  :func:`model_selection.cross_val_score` now allow estimators with callable
+  kernels which were previously prohibited.
+  :issue:`8005` by `Andreas Müller`_ .
+
+- :func:`model_selection.cross_val_predict` now returns output of the
+  correct shape for all values of the argument ``method``.
+  :issue:`7863` by :user:`Aman Dalmia <dalmia>`.
+
+- Added ``shuffle`` and ``random_state`` parameters to shuffle training
+  data before taking prefixes of it based on training sizes in
+  :func:`model_selection.learning_curve`.
+  :issue:`7506` by :user:`Narine Kokhlikyan <NarineK>`.
+
+- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput
+  multiclass (or multilabel) data.  :issue:`9044` by `Vlad Niculae`_.
+
+- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`.
+  :issue:`5991` by :user:`Arthur Mensch <arthurmensch>` and `Joel Nothman`_.
+
+- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
+  :issue:`8845` by  :user:`themrmax <themrmax>`
+
+- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier`
+  now support online learning using ``partial_fit``.
+  :issue: `8053` by :user:`Peng Yu <yupbank>`.
+
+- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
+  :issue:`8282` by :user:`Aman Dalmia <dalmia>`.
+
+- More clustering metrics are now available through :func:`metrics.get_scorer`
+  and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_.
+
+- A scorer based on :func:`metrics.explained_variance_score` is also available.
+  :issue:`9259` by :user:`Hanmin Qin <qinhanmin2014>`.
+
+Metrics
+
+- :func:`metrics.matthews_corrcoef` now support multiclass classification.
+  :issue:`8094` by :user:`Jon Crall <Erotemic>`.
+
+- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`.
+  :issue:`8335` by :user:`Victor Poughon <vpoughon>`.
+
+Miscellaneous
+
+- :func:`utils.check_estimator` now attempts to ensure that methods
+  transform, predict, etc.  do not set attributes on the estimator.
+  :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
+
+- Added type checking to the ``accept_sparse`` parameter in
+  :mod:`utils.validation` methods. This parameter now accepts only boolean,
+  string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
+  should be replaced by ``accept_sparse=False``.
+  :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
+
+- Make it possible to load a chunk of an svmlight formatted file by
+  passing a range of bytes to :func:`datasets.load_svmlight_file`.
+  :issue:`935` by :user:`Olivier Grisel <ogrisel>`.
+
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
+  now accept non-finite features. :issue:`8931` by :user:`Attractadore`.
+
+Bug fixes
+.........
+
+Trees and ensembles
+
+- Fixed a memory leak in trees when using trees with ``criterion='mae'``.
+  :issue:`8002` by `Raghav RV`_.
+
+- Fixed a bug where :class:`ensemble.IsolationForest` uses an
+  an incorrect formula for the average path length
+  :issue:`8549` by `Peter Wang <https://github.com/PTRWang>`_.
+
+- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws
+  ``ZeroDivisionError`` while fitting data with single class labels.
+  :issue:`7501` by :user:`Dominik Krzeminski <dokato>`.
+
+- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where a float being compared
+  to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by
+  :user:`He Chen <chenhe95>`.
+
+- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` ignored the
+  ``min_impurity_split`` parameter.
+  :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
+
+- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`.
+  :issue:`8936` by :user:`Michael Lewis <mlewis1729>`
+
+- Fixed excessive memory usage in prediction for random forests estimators.
+  :issue:`8672` by :user:`Mike Benfield <mikebenfield>`.
+
+- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2
+  :issue:`8068` by :user:`xor`.
+
+- Fixed a bug where :class:`ensemble.IsolationForest` fails when
+  ``max_features`` is less than 1.
+  :issue:`5732` by :user:`Ishank Gulati <IshankGulati>`.
+
+- Fix a bug where gradient boosting with ``loss='quantile'`` computed
+  negative errors for negative values of ``ytrue - ypred`` leading to wrong
+  values when calling ``__call__``.
+  :issue:`8087` by :user:`Alexis Mignon <AlexisMignon>`
+
+- Fix a bug where :class:`ensemble.VotingClassifier` raises an error
+  when a numpy array is passed in for weights. :issue:`7983` by
+  :user:`Vincent Pham <vincentpham1991>`.
+
+- Fixed a bug where :func:`tree.export_graphviz` raised an error
+  when the length of features_names does not match n_features in the decision
+  tree. :issue:`8512` by :user:`Li Li <aikinogard>`.
+
+Linear, kernelized and related models
+
+- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
+  ``max_iter`` if it finds a large inlier group early. :issue:`8251` by
+  :user:`aivision2020`.
+
+- Fixed a bug where :class:`naive_bayes.MultinomialNB` and
+  :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by
+  :user:`Yichuan Liu <yl565>` and :user:`Herilalaina Rakotoarison
+  <herilalaina>`.
+
+- Fixed a bug where :class:`linear_model.LassoLars` does not give
+  the same result as the LassoLars implementation available
+  in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
+
+- Fixed a bug in :class:`linear_model.RandomizedLasso`,
+  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
+  :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
+  where the parameter ``precompute`` was not used consistently across
+  classes, and some values proposed in the docstring could raise errors.
+  :issue:`5359` by `Tom Dupre la Tour`_.
+
+- Fix inconsistent results between :class:`linear_model.RidgeCV` and
+  :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302`
+  by `Alexandre Gramfort`_.
+
+- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes
+  left ``coef_`` as a list, rather than an ndarray.
+  :issue:`8160` by :user:`CJ Carey <perimosocordiae>`.
+
+- Fix :func:`linear_model.BayesianRidge.fit` to return
+  ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated
+  coefficients ``coef_`` and ``intercept_``.
+  :issue:`8224` by :user:`Peter Gedeck <gedeck>`.
+
+- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of
+  integer classes. :issue:`8676` by :user:`Vathsala Achar <VathsalaAchar>`.
+
+- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`.
+  :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
+
+- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
+  :user:`Sergei Lebedev <superbobry>`
+
+- Fix bug where stratified CV splitters did not work with
+  :class:`linear_model.LassoCV`. :issue:`8973` by
+  :user:`Paulo Haddad <paulochf>`.
+
+- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor`
+  when the standard deviation and covariance predicted without fit
+  would fail with a unmeaningful error by default.
+  :issue:`6573` by :user:`Quazi Marufur Rahman <qmaruf>` and
+  `Manoj Kumar`_.
+
+Other predictors
+
+- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
+  ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
+  papers. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
+
+Decomposition, manifold learning and clustering
+
+- Fixed the implementation of :class:`manifold.TSNE`:
+- ``early_exageration`` parameter had no effect and is now used for the
+  first 250 optimization iterations.
+- Fixed the ``AssertionError: Tree consistency failed`` exception
+  reported in :issue:`8992`.
+- Improve the learning schedule to match the one from the reference
+  implementation `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_.
+  by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Fix a bug in :class:`decomposition.LatentDirichletAllocation`
+  where the ``perplexity`` method was returning incorrect results because
+  the ``transform`` method returns normalized document topic distributions
+  as of version 0.18. :issue:`7954` by :user:`Gary Foreman <garyForeman>`.
+
+- Fix output shape and bugs with n_jobs > 1 in
+  :class:`decomposition.SparseCoder` transform and
+  :func:`decomposition.sparse_encode`
+  for one-dimensional data and one component.
+  This also impacts the output shape of :class:`decomposition.DictionaryLearning`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed the implementation of ``explained_variance_``
+  in :class:`decomposition.PCA`,
+  :class:`decomposition.RandomizedPCA` and
+  :class:`decomposition.IncrementalPCA`.
+  :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed the implementation of ``noise_variance_`` in :class:`decomposition.PCA`.
+  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect
+  result when input is a precomputed sparse matrix with initial
+  rows all zero. :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
+
+- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse
+  array X and initial centroids, where X's means were unnecessarily being
+  subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
+
+- Fixes to the input validation in :class:`covariance.EllipticEnvelope`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed a bug in :class:`covariance.MinCovDet` where inputting data
+  that produced a singular covariance matrix would cause the helper method
+  ``_c_step`` to throw an exception.
+  :issue:`3367` by :user:`Jeremy Steward <ThatGeoGuy>`
+
+- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the
+  gradient descent. :issue:`8768` by :user:`David DeTomaso <deto>`.
+
+- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect
+  ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger <ssaeger>`.
+
+- Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
+  with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
+
+- :class:`cluster.bicluster.SpectralCoclustering` and
+  :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
+  with API by accepting ``y`` and returning the object.  :issue:`6126`,
+  :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
+  Nandana <maniteja123>`.
+
+- Fix bug where :mod:`mixture` ``sample`` methods did not return as many
+  samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
+
+- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`.
+  :issue:`9219` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+Preprocessing and feature selection
+
+- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
+  will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with
+  norm 'max' the norms returned will be the same as for dense matrices.
+  :issue:`7771` by `Ang Lu <https://github.com/luang008>`_.
+
+- Fix a bug where :class:`feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+
+- Fixed a bug where :class:`linear_model.RandomizedLasso` and
+  :class:`linear_model.RandomizedLogisticRegression` breaks for
+  sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
+
+- Fix a bug where :class:`feature_extraction.FeatureHasher`
+  mandatorily applied a sparse random projection to the hashed features,
+  preventing the use of
+  :class:`feature_extraction.text.HashingVectorizer` in a
+  pipeline with  :class:`feature_extraction.text.TfidfTransformer`.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
+
+- Fix a bug where :class:`feature_selection.mutual_info_regression` did not
+  correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
+
+Model evaluation and meta-estimators
+
+- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
+  returns ``self.best_estimator_.transform()`` instead of
+  ``self.best_estimator_.inverse_transform()``.
+  :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
+
+- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
+  and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
+  attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
+  by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
+  and :user:`Stephen Hoover <stephen-hoover>`.
+
+- Fixed a bug where :func:`model_selection.validation_curve`
+  reused the same estimator for each parameter value.
+  :issue:`7365` by :user:`Aleksandr Sandrovskii <Sundrique>`.
+
+- :func:`model_selection.permutation_test_score` now works with Pandas
+  types. :issue:`5697` by :user:`Stijn Tonk <equialgo>`.
+
+- Several fixes to input validation in
+  :class:`multiclass.OutputCodeClassifier`
+  :issue:`8086` by `Andreas Müller`_.
+
+- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all
+  classes are provided up-front. :issue:`6250` by
+  :user:`Asish Panda <kaichogami>`.
+
+- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a
+  list of 2d arrays, rather than a 3d array. In the case where different
+  target columns had different numbers of classes, a ``ValueError`` would be
+  raised on trying to stack matrices with different dimensions.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
+
+- Cross validation now works with Pandas datatypes that that have a
+  read-only index. :issue:`9507` by `Loic Esteve`_.
+
+Metrics
+
+- :func:`metrics.average_precision_score` no longer linearly
+  interpolates between operating points, and instead weighs precisions
+  by the change in recall since the last operating point, as per the
+  `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
+  (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
+  :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
+
+- Fix a bug in :func:`metrics.classification._check_targets`
+  which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
+  both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
+  ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
+
+- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and
+  hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929`
+  by `Joel Nothman`_ and :user:`Jon Crall <Erotemic>`.
+
+- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in
+  :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by
+  :user:`Nick Rhinehart <nrhine1>`,
+  :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+
+Miscellaneous
+
+- Fixed a bug when :func:`datasets.make_classification` fails
+  when generating more than 30 features. :issue:`8159` by
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+- Fixed a bug where :func:`datasets.make_moons` gives an
+  incorrect result when ``n_samples`` is odd.
+  :issue:`8198` by :user:`Josh Levy <levy5674>`.
+
+- Some ``fetch_`` functions in :mod:`datasets` were ignoring the
+  ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
+
+- Fix estimators to accept a ``sample_weight`` parameter of type
+  ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
+  `Kathleen Chen`_.
+
+- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable,
+  raising an exception if instability is identified. :issue:`7376` and
+  :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
+
+- Fix a bug where :meth:`base.BaseEstimator.__getstate__`
+  obstructed pickling customizations of child-classes, when used in a
+  multiple inheritance context.
+  :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
+
+- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
+  documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by
+  :user:`Oscar Najera <Titan-C>`
+
+- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`.
+  :issue:`9289` by `Loic Esteve`_.
+
+- Fix dataset loaders using Python 3 version of makedirs to also work in
+  Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
+
+- Several minor issues were fixed with thanks to the alerts of
+  [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
+  among others.
+
+API changes summary
+-------------------
+
+Trees and ensembles
+
+- Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
+
+- All tree based estimators now accept a ``min_impurity_decrease``
+  parameter in lieu of the ``min_impurity_split``, which is now deprecated.
+  The ``min_impurity_decrease`` helps stop splitting the nodes in which
+  the weighted impurity decrease from splitting is no longer alteast
+  ``min_impurity_decrease``.  :issue:`8449` by `Raghav RV`_.
+
+Linear, kernelized and related models
+
+- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_.
+
+Other predictors
+
+- :class:`neighbors.LSHForest` has been deprecated and will be
+  removed in 0.21 due to poor performance.
+  :issue:`9078` by :user:`Laurent Direr <ldirer>`.
+
+- :class:`neighbors.NearestCentroid` no longer purports to support
+  ``metric='precomputed'`` which now raises an error. :issue:`8515` by
+  :user:`Sergul Aydore <sergulaydore>`.
+
+- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now
+  has no effect and is deprecated to be removed in 0.21. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
+
+Decomposition, manifold learning and clustering
+
+- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method
+  in :class:`decomposition.LatentDirichletAllocation` because the
+  user no longer has access to the unnormalized document topic distribution
+  needed for the perplexity calculation. :issue:`7954` by
+  :user:`Gary Foreman <garyForeman>`.
+
+- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation`
+  has been renamed to ``n_components`` and will be removed in version 0.21.
+  :issue:`8922` by :user:`Attractadore`.
+
+- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is
+  deprecated in preference for class parameter.
+  :issue:`8137` by :user:`Naoya Kanai <naoyak>`.
+
+- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter.
+  :issue:`8139` by :user:`Naoya Kanai <naoyak>`.
+
+Preprocessing and feature selection
+
+- :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
+  method only if the underlying estimator does. By `Andreas Müller`_.
+
+- :class:`feature_selection.SelectFromModel` now validates the ``threshold``
+  parameter and sets the ``threshold_`` attribute during the call to
+  ``fit``, and no longer during the call to ``transform```. By `Andreas
+  Müller`_.
+
+- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher`
+  has been deprecated, and replaced with a more principled alternative,
+  ``alternate_sign``.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
+
+- :class:`linear_model.RandomizedLogisticRegression`,
+  and :class:`linear_model.RandomizedLasso` have been deprecated and will
+  be removed in version 0.21.
+  :issue:`8995` by :user:`Ramana.S <sentient07>`.
+
+Model evaluation and meta-estimators
+
+- Deprecate the ``fit_params`` constructor input to the
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` in favor
+  of passing keyword parameters to the ``fit`` methods
+  of those classes. Data-dependent parameters needed for model
+  training should be passed as keyword arguments to ``fit``,
+  and conforming to this convention will allow the hyperparameter
+  selection classes to be used with tools such as
+  :func:`model_selection.cross_val_predict`.
+  :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
+
+- In version 0.21, the default behavior of splitters that use the
+  ``test_size`` and ``train_size`` parameter will change, such that
+  specifying ``train_size`` alone will cause ``test_size`` to be the
+  remainder. :issue:`7459` by :user:`Nelson Liu <nelson-liu>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``,
+  ``decision_function`` and ``predict_proba`` methods only when the
+  underlying estimator does.  :issue:`7812` by `Andreas Müller`_ and
+  :user:`Mikhail Korobov <kmike>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+  only if the underlying estimator does.  By `Andreas Müller`_.
+
+- The ``decision_function`` output shape for binary classification in
+  :class:`multiclass.OneVsRestClassifier` and
+  :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform
+  to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_.
+
+- The :func:`multioutput.MultiOutputClassifier.predict_proba`
+  function used to return a 3d array (``n_samples``, ``n_classes``,
+  ``n_outputs``). In the case where different target columns had different
+  numbers of classes, a ``ValueError`` would be raised on trying to stack
+  matrices with different dimensions. This function now returns a list of
+  arrays where the length of the list is ``n_outputs``, and each array is
+  (``n_samples``, ``n_classes``) for that particular output.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
+
+- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch`
+  in :class:`pipeline.Pipeline` to enable tab completion in interactive
+  environment. In the case conflict value on ``named_steps`` and ``dict``
+  attribute, ``dict`` behavior will be prioritized.
+  :issue:`8481` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+Miscellaneous
+
+- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``.
+  The method  should not accept ``y`` parameter, as it's used at the prediction time.
+  :issue:`8174` by :user:`Tahar Zanouda <tzano>`, `Alexandre Gramfort`_
+  and `Raghav RV`_.
+
+- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
+  for scikit-learn. The following backported functions in
+  :mod:`utils` have been removed or deprecated accordingly.
+  :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
+
+- The ``store_covariances`` and ``covariances_`` parameters of
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
+  has been renamed to ``store_covariance`` and ``covariance_`` to be
+  consistent with the corresponding parameter names of the
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be
+  removed in version 0.21. :issue:`7998` by :user:`Jiacheng <mrbeann>`
+
+  Removed in 0.19:
+
+  - ``utils.fixes.argpartition``
+  - ``utils.fixes.array_equal``
+  - ``utils.fixes.astype``
+  - ``utils.fixes.bincount``
+  - ``utils.fixes.expit``
+  - ``utils.fixes.frombuffer_empty``
+  - ``utils.fixes.in1d``
+  - ``utils.fixes.norm``
+  - ``utils.fixes.rankdata``
+  - ``utils.fixes.safe_copy``
+
+  Deprecated in 0.19, to be removed in 0.21:
+
+  - ``utils.arpack.eigs``
+  - ``utils.arpack.eigsh``
+  - ``utils.arpack.svds``
+  - ``utils.extmath.fast_dot``
+  - ``utils.extmath.logsumexp``
+  - ``utils.extmath.norm``
+  - ``utils.extmath.pinvh``
+  - ``utils.graph.graph_laplacian``
+  - ``utils.random.choice``
+  - ``utils.sparsetools.connected_components``
+  - ``utils.stats.rankdata``
+
+- Estimators with both methods ``decision_function`` and ``predict_proba``
+  are now required to have a monotonic relation between them. The
+  method ``check_decision_proba_consistency`` has been added in
+  **utils.estimator_checks** to check their consistency.
+  :issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`
+
+- All checks in ``utils.estimator_checks``, in particular
+  :func:`utils.estimator_checks.check_estimator` now accept estimator
+  instances. Most other checks do not accept
+  estimator classes any more. :issue:`9019` by `Andreas Müller`_.
+
+- Ensure that estimators' attributes ending with ``_`` are not set
+  in the constructor but only in the ``fit`` method. Most notably,
+  ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
+  now only have ``self.estimators_`` available after ``fit``.
+  :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
+
+
+Code and Documentation Contributors
+-----------------------------------
+
+Thanks to everyone who has contributed to the maintenance and improvement of the
+project since version 0.18, including:
+
+Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel,
+Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael
+Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee,
+Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman
+Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol
+Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay,
+Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake
+VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera,
+Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David
+Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland
+McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj,
+akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf
+Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer,
+Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J.
+Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev,
+Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar,
+Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt,
+Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti,
+Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar,
+Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan
+LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann,
+Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik
+Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev,
+Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li
+Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh,
+Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie
+Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem
+Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel,
+Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus
+Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich,
+Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul
+Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter
+Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry,
+Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar
+Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert
+Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin
+Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian
+Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap
+Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth
+Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou,
+Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima,
+Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon,
+Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou,
+Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi
+Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus,
+Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck,
+guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber,
+jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel,
+leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112,
+mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas,
+Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton
+Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen,
+Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk,
+Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David
+Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges,
+Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed
+Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian
+Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo
+Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor
+Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia,
+Jacob Schreiber, Asish Mahapatra
+
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
new file mode 100644
index 0000000000000..06bcc9a4e6cf8
--- /dev/null
+++ b/doc/whats_new/v0.20.rst
@@ -0,0 +1,113 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_20:
+
+Version 0.20 (under development)
+================================
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
+- :class:`isotonic.IsotonicRegression` (bug fix)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` now support early stopping
+  via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071`
+  by `Raghav RV`_
+
+- Added :class:`naive_bayes.ComplementNB`, which implements the Complement
+  Naive Bayes classifier described in Rennie et al. (2003).
+  By :user:`Michael A. Alcorn <airalcorn2>`.
+
+Enhancements
+............
+
+Classifiers and regressors
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is faster when using ``return_std=True`` in particular more when called
+  several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
+  and :user:`Minghui Liu <minghui-liu>`.
+
+- Add `named_estimators_` parameter in
+  :class:`sklearn.ensemble.voting_classifier` to access fitted
+  estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+
+Model evaluation and meta-estimators
+
+- A scorer based on :func:`metrics.brier_score_loss` is also available.
+  :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
+
+Linear, kernelized and related models
+
+- Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the
+  underlying implementation is not random.
+  :issue:`9497` by :user:`Albert Thomas <albertcthomas>`.
+
+Bug fixes
+.........
+
+Classifiers and regressors
+
+- Fixed a bug in :class:`isotonic.IsotonicRegression` which incorrectly
+  combined weights when fitting a model to data involving points with
+  identical X values.
+  :issue:`9432` by :user:`Dallas Card <dallascard>`
+
+Decomposition, manifold learning and clustering
+
+- Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
+  now an error is raised if the number of components is larger than the
+  chosen batch size. The ``n_components=None`` case was adapted accordingly.
+  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
+- Fixed a bug where the ``partial_fit`` method of
+  :class:`decomposition.IncrementalPCA` used integer division instead of float
+  division on Python 2 versions. :issue:`9492` by
+  :user:`James Bourbeau <jrbourbeau>`.
+
+- Fixed a bug where the ``fit`` method of
+  :class:`cluster.affinity_propagation_.AffinityPropagation` stored cluster
+  centers as 3d array instead of 2d array in case of non-convergence. For the
+  same class, fixed undefined and arbitrary behavior in case of training data
+  where all samples had equal similarity.
+  :issue:`9612`. By :user:`Jonatan Samoocha <jsamoocha>`.
+
+- In :class:`decomposition.PCA` selecting a n_components parameter greater than
+  the number of samples now raises an error.
+  Similarly, the ``n_components=None`` case now selects the minimum of
+  n_samples and n_features. :issue:`8484`. By :user:`Wally Gauze <wallygauze>`.
+
+- Fixed a bug in :func:`datasets.fetch_kddcup99`, where data were not properly
+  shuffled. :issue:`9731` by `Nicolas Goix`_.
+
+API changes summary
+-------------------
+
+Linear, kernelized and related models
+
+- Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the
+  underlying implementation is not random.
+  :issue:`9497` by :user:`Albert Thomas <albertcthomas>`.
diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py
index 8a85b0645cb8c..6f4dd13eb36f6 100644
--- a/examples/applications/plot_stock_market.py
+++ b/examples/applications/plot_stock_market.py
@@ -59,11 +59,12 @@
 heuristic based on the direction of the nearest neighbor along each
 axis.
 """
-print(__doc__)
+from __future__ import print_function
 
 # Author: Gael Varoquaux gael.varoquaux@normalesup.org
 # License: BSD 3 clause
 
+import sys
 from datetime import datetime
 
 import numpy as np
@@ -73,9 +74,8 @@
 from six.moves.urllib.parse import urlencode
 from sklearn import cluster, covariance, manifold
 
+print(__doc__)
 
-# #############################################################################
-# Retrieve the data from Internet
 
 def retry(f, n_attempts=3):
     "Wrapper function to retry function calls in case of exceptions"
@@ -83,7 +83,7 @@ def wrapper(*args, **kwargs):
         for i in range(n_attempts):
             try:
                 return f(*args, **kwargs)
-            except Exception as e:
+            except Exception:
                 if i == n_attempts - 1:
                     raise
     return wrapper
@@ -120,15 +120,33 @@ def quotes_historical_google(symbol, date1, date2):
         'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
     }
     converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
-    return np.genfromtxt(response, delimiter=',', skip_header=1,
+    data = np.genfromtxt(response, delimiter=',', skip_header=1,
                          dtype=dtype, converters=converters,
                          missing_values='-', filling_values=-1)
+    expected_len_data = 1258
+    len_data = len(data)
+    min_date = data['date'].min()
+    max_date = data['date'].max()
+    if (len_data != expected_len_data or min_date != d1 or max_date != d2):
+        message = (
+            'Got wrong data for symbol {}, url {}\n'
+            '  - min_date should be {}, got {}\n'
+            '  - max_date should be {}, got {}\n'
+            '  - len(data) should be {}, got {}'.format(
+                symbol, url,
+                d1.date(), min_date.date(),
+                d2.date(), max_date.date(),
+                expected_len_data, len_data))
+        raise ValueError(message)
+    return data
 
+# #############################################################################
+# Retrieve the data from Internet
 
 # Choose a time period reasonably calm (not too long ago so that we get
 # high-tech firms, and before the 2008 crash)
-d1 = datetime(2003, 1, 1)
-d2 = datetime(2008, 1, 1)
+d1 = datetime(2003, 1, 2)
+d2 = datetime(2007, 12, 31)
 
 symbol_dict = {
     'TOT': 'Total',
@@ -170,7 +188,7 @@ def quotes_historical_google(symbol, date1, date2):
     'BAC': 'Bank of America',
     'GS': 'Goldman Sachs',
     'AAPL': 'Apple',
-    'SAP': 'SAP',
+    'NYSE:SAP': 'SAP',
     'CSCO': 'Cisco',
     'TXN': 'Texas Instruments',
     'XRX': 'Xerox',
@@ -188,13 +206,15 @@ def quotes_historical_google(symbol, date1, date2):
     'CAT': 'Caterpillar',
     'DD': 'DuPont de Nemours'}
 
-symbols, names = np.array(list(symbol_dict.items())).T
+symbols, names = np.array(sorted(symbol_dict.items())).T
 
 # retry is used because quotes_historical_google can temporarily fail
 # for various reasons (e.g. empty result from Google API).
-quotes = [
-    retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols
-]
+quotes = []
+
+for symbol in symbols:
+    print('Fetching quote history for %r' % symbol, file=sys.stderr)
+    quotes.append(retry(quotes_historical_google)(symbol, d1, d2))
 
 close_prices = np.vstack([q['close'] for q in quotes])
 open_prices = np.vstack([q['open'] for q in quotes])
diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py
index 0bda5c66ce4a3..7ef4ad6353654 100644
--- a/examples/cluster/plot_color_quantization.py
+++ b/examples/cluster/plot_color_quantization.py
@@ -84,21 +84,18 @@ def recreate_image(codebook, labels, w, h):
 # Display all results, alongside original image
 plt.figure(1)
 plt.clf()
-ax = plt.axes([0, 0, 1, 1])
 plt.axis('off')
 plt.title('Original image (96,615 colors)')
 plt.imshow(china)
 
 plt.figure(2)
 plt.clf()
-ax = plt.axes([0, 0, 1, 1])
 plt.axis('off')
 plt.title('Quantized image (64 colors, K-Means)')
 plt.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
 
 plt.figure(3)
 plt.clf()
-ax = plt.axes([0, 0, 1, 1])
 plt.axis('off')
 plt.title('Quantized image (64 colors, Random)')
 plt.imshow(recreate_image(codebook_random, labels_random, w, h))
diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py
index ac2fde3e2cc6a..6d33f01e6a7cb 100644
--- a/examples/cluster/plot_dict_face_patches.py
+++ b/examples/cluster/plot_dict_face_patches.py
@@ -41,7 +41,6 @@
 patch_size = (20, 20)
 
 buffer = []
-index = 1
 t0 = time.time()
 
 # The online learning part: cycle over the whole dataset 6 times
diff --git a/examples/cluster/plot_kmeans_stability_low_dim_dense.py b/examples/cluster/plot_kmeans_stability_low_dim_dense.py
index b5d4326c5c713..109d2097b6be9 100644
--- a/examples/cluster/plot_kmeans_stability_low_dim_dense.py
+++ b/examples/cluster/plot_kmeans_stability_low_dim_dense.py
@@ -69,7 +69,7 @@ def make_data(random_state, n_samples_per_center, grid_size, scale):
 
 # Part 1: Quantitative evaluation of various init methods
 
-fig = plt.figure()
+plt.figure()
 plots = []
 legends = []
 
@@ -105,7 +105,7 @@ def make_data(random_state, n_samples_per_center, grid_size, scale):
 km = MiniBatchKMeans(n_clusters=n_clusters, init='random', n_init=1,
                      random_state=random_state).fit(X)
 
-fig = plt.figure()
+plt.figure()
 for k in range(n_clusters):
     my_members = km.labels_ == k
     color = cm.spectral(float(k) / n_clusters, 1)
diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py
index d9db17ffaec39..58494f7ef816d 100644
--- a/examples/decomposition/plot_pca_3d.py
+++ b/examples/decomposition/plot_pca_3d.py
@@ -73,8 +73,6 @@ def plot_figs(fig_num, elev, azim):
     pca_score = pca.explained_variance_ratio_
     V = pca.components_
 
-    x_pca_axis, y_pca_axis, z_pca_axis = V.T * pca_score / pca_score.min()
-
     x_pca_axis, y_pca_axis, z_pca_axis = 3 * V.T
     x_pca_plane = np.r_[x_pca_axis[:2], - x_pca_axis[1::-1]]
     y_pca_plane = np.r_[y_pca_axis[:2], - y_pca_axis[1::-1]]
diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py
index 8d88f99df1668..0f0a2478472c3 100644
--- a/examples/ensemble/plot_bias_variance.py
+++ b/examples/ensemble/plot_bias_variance.py
@@ -88,12 +88,14 @@
 
 n_estimators = len(estimators)
 
+
 # Generate data
 def f(x):
     x = x.ravel()
 
     return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)
 
+
 def generate(n_samples, noise, n_repeat=1):
     X = np.random.rand(n_samples) * 10 - 5
     X = np.sort(X)
@@ -110,6 +112,7 @@ def generate(n_samples, noise, n_repeat=1):
 
     return X, y
 
+
 X_train = []
 y_train = []
 
@@ -120,6 +123,8 @@ def generate(n_samples, noise, n_repeat=1):
 
 X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)
 
+plt.figure(figsize=(10, 8))
+
 # Loop over estimators to compare
 for n, (name, estimator) in enumerate(estimators):
     # Compute predictions
@@ -166,8 +171,8 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.title(name)
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+        plt.legend(loc=(1.1, .5))
 
     plt.subplot(2, n_estimators, n_estimators + n + 1)
     plt.plot(X_test, y_error, "r", label="$error(x)$")
@@ -178,7 +183,9 @@ def generate(n_samples, noise, n_repeat=1):
     plt.xlim([-5, 5])
     plt.ylim([0, 0.1])
 
-    if n == 0:
-        plt.legend(loc="upper left", prop={"size": 11})
+    if n == n_estimators - 1:
+
+        plt.legend(loc=(1.1, .5))
 
+plt.subplots_adjust(right=.75)
 plt.show()
diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py
index f0fd5dc7d003e..73db88d829b1f 100644
--- a/examples/ensemble/plot_forest_iris.py
+++ b/examples/ensemble/plot_forest_iris.py
@@ -46,7 +46,6 @@
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 
-from sklearn import clone
 from sklearn.datasets import load_iris
 from sklearn.ensemble import (RandomForestClassifier, ExtraTreesClassifier,
                               AdaBoostClassifier)
@@ -90,10 +89,9 @@
         X = (X - mean) / std
 
         # Train
-        clf = clone(model)
-        clf = model.fit(X, y)
+        model.fit(X, y)
 
-        scores = clf.score(X, y)
+        scores = model.score(X, y)
         # Create a title for each column and the console by using str() and
         # slicing away useless parts of the string
         model_title = str(type(model)).split(
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 323aa67bd5040..366d9e0b148d6 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -102,8 +102,6 @@
 bar2 = plt.bar(index + bar_width, score_gbes, bar_width,
                label='With early stopping', color='coral')
 
-max_y = np.amax(np.maximum(score_gb, score_gbes))
-
 plt.xticks(index + bar_width, names)
 plt.yticks(np.arange(0, 1.3, 0.1))
 
diff --git a/examples/gaussian_process/plot_gpc_isoprobability.py b/examples/gaussian_process/plot_gpc_isoprobability.py
index 2a27434cf148f..0639a65a384a4 100644
--- a/examples/gaussian_process/plot_gpc_isoprobability.py
+++ b/examples/gaussian_process/plot_gpc_isoprobability.py
@@ -85,7 +85,7 @@ def g(x):
 
 plt.plot(X[y > 0, 0], X[y > 0, 1], 'b.', markersize=12)
 
-cs = plt.contour(x1, x2, y_true, [0.], colors='k', linestyles='dashdot')
+plt.contour(x1, x2, y_true, [0.], colors='k', linestyles='dashdot')
 
 cs = plt.contour(x1, x2, y_prob, [0.666], colors='b',
                  linestyles='solid')
diff --git a/examples/gaussian_process/plot_gpr_noisy_targets.py b/examples/gaussian_process/plot_gpr_noisy_targets.py
index e90b5e57ad257..8841f04a3987f 100644
--- a/examples/gaussian_process/plot_gpr_noisy_targets.py
+++ b/examples/gaussian_process/plot_gpr_noisy_targets.py
@@ -61,7 +61,7 @@ def f(x):
 
 # Plot the function, the prediction and the 95% confidence interval based on
 # the MSE
-fig = plt.figure()
+plt.figure()
 plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
 plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
 plt.plot(x, y_pred, 'b-', label=u'Prediction')
@@ -97,7 +97,7 @@ def f(x):
 
 # Plot the function, the prediction and the 95% confidence interval based on
 # the MSE
-fig = plt.figure()
+plt.figure()
 plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
 plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
 plt.plot(x, y_pred, 'b-', label=u'Prediction')
diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py
index 7b6d2a52cae87..3cd96d6692e8d 100644
--- a/examples/linear_model/plot_lasso_coordinate_descent_path.py
+++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py
@@ -47,8 +47,6 @@
 # Display results
 
 plt.figure(1)
-ax = plt.gca()
-
 colors = cycle(['b', 'r', 'g', 'c', 'k'])
 neg_log_alphas_lasso = -np.log10(alphas_lasso)
 neg_log_alphas_enet = -np.log10(alphas_enet)
@@ -64,7 +62,6 @@
 
 
 plt.figure(2)
-ax = plt.gca()
 neg_log_alphas_positive_lasso = -np.log10(alphas_positive_lasso)
 for coef_l, coef_pl, c in zip(coefs_lasso, coefs_positive_lasso, colors):
     l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
@@ -78,7 +75,6 @@
 
 
 plt.figure(3)
-ax = plt.gca()
 neg_log_alphas_positive_enet = -np.log10(alphas_positive_enet)
 for (coef_e, coef_pe, c) in zip(coefs_enet, coefs_positive_enet, colors):
     l1 = plt.plot(neg_log_alphas_enet, coef_e, c=c)
diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py
index ba59fb5ece537..8367d16b955fe 100644
--- a/examples/neighbors/plot_digits_kde_sampling.py
+++ b/examples/neighbors/plot_digits_kde_sampling.py
@@ -20,7 +20,6 @@
 
 # load the data
 digits = load_digits()
-data = digits.data
 
 # project the 64-dimensional data to a lower dimension
 pca = PCA(n_components=15, whiten=False)
diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
index 5c8543937beba..f46b7ece7cd78 100644
--- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
+++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -65,7 +65,8 @@
 
     print("Iteration %i %s" % (i, 70 * "_"))
     print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
-          % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
+          % (n_labeled_points, n_total_samples - n_labeled_points,
+             n_total_samples))
 
     print(classification_report(true_labels, predicted_labels))
 
@@ -95,7 +96,7 @@
         # for more than 5 iterations, visualize the gain only on the first 5
         if i < 5:
             sub = f.add_subplot(5, 5, index + 1 + (5 * i))
-            sub.imshow(image, cmap=plt.cm.gray_r)
+            sub.imshow(image, cmap=plt.cm.gray_r, interpolation='none')
             sub.set_title("predict: %i\ntrue: %i" % (
                 lp_model.transduction_[image_index], y[image_index]), size=10)
             sub.axis('off')
@@ -108,6 +109,7 @@
     n_labeled_points += len(uncertainty_index)
 
 f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
-           "uncertain labels to learn with the next model.")
-plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
+           "uncertain labels to learn with the next model.", y=1.15)
+plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2,
+                    hspace=0.85)
 plt.show()
diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py
index 005f73683921b..b47bfcd80e49a 100644
--- a/examples/tree/plot_tree_regression_multioutput.py
+++ b/examples/tree/plot_tree_regression_multioutput.py
@@ -42,7 +42,6 @@
 
 # Plot the results
 plt.figure()
-s = 50
 s = 25
 plt.scatter(y[:, 0], y[:, 1], c="navy", s=s,
             edgecolor="black", label="data")
diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx
index 1cec075fc6fc7..ff18e3cad7312 100644
--- a/sklearn/_isotonic.pyx
+++ b/sklearn/_isotonic.pyx
@@ -100,7 +100,7 @@ def _make_unique(np.ndarray[dtype=np.float64_t] X,
         if x != current_x:
             # next unique value
             x_out[i] = current_x
-            weights_out[i] = current_weight / current_count
+            weights_out[i] = current_weight
             y_out[i] = current_y / current_weight
             i += 1
             current_x = x
@@ -113,6 +113,6 @@ def _make_unique(np.ndarray[dtype=np.float64_t] X,
             current_count += 1
 
     x_out[i] = current_x
-    weights_out[i] = current_weight / current_count
+    weights_out[i] = current_weight
     y_out[i] = current_y / current_weight
     return x_out, y_out, weights_out
diff --git a/sklearn/base.py b/sklearn/base.py
index aa4f9f9ce17c1..d97fe92ccdd47 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -225,21 +225,7 @@ def get_params(self, deep=True):
         """
         out = dict()
         for key in self._get_param_names():
-            # We need deprecation warnings to always be on in order to
-            # catch deprecated param values.
-            # This is set in utils/__init__.py but it gets overwritten
-            # when running under python3 somehow.
-            warnings.simplefilter("always", DeprecationWarning)
-            try:
-                with warnings.catch_warnings(record=True) as w:
-                    value = getattr(self, key, None)
-                if len(w) and w[0].category == DeprecationWarning:
-                    # if the parameter is deprecated, don't show it
-                    continue
-            finally:
-                warnings.filters.pop(0)
-
-            # XXX: should we rather test if instance of estimator?
+            value = getattr(self, key, None)
             if deep and hasattr(value, 'get_params'):
                 deep_items = value.get_params().items()
                 out.update((key + '__' + k, val) for k, val in deep_items)
@@ -316,7 +302,6 @@ def __setstate__(self, state):
             self.__dict__.update(state)
 
 
-
 ###############################################################################
 class ClassifierMixin(object):
     """Mixin class for all classifiers in scikit-learn."""
diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py
index 47ed14f826f33..d3bbe529b7c25 100644
--- a/sklearn/cluster/affinity_propagation_.py
+++ b/sklearn/cluster/affinity_propagation_.py
@@ -6,7 +6,9 @@
 # License: BSD 3 clause
 
 import numpy as np
+import warnings
 
+from sklearn.exceptions import ConvergenceWarning
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import as_float_array, check_array
 from ..utils.validation import check_is_fitted
@@ -14,6 +16,20 @@
 from ..metrics import pairwise_distances_argmin
 
 
+def _equal_similarities_and_preferences(S, preference):
+    def all_equal_preferences():
+        return np.all(preference == preference.flat[0])
+
+    def all_equal_similarities():
+        # Create mask to ignore diagonal of S
+        mask = np.ones(S.shape, dtype=bool)
+        np.fill_diagonal(mask, 0)
+
+        return np.all(S[mask].flat == S[mask].flat[0])
+
+    return all_equal_preferences() and all_equal_similarities()
+
+
 def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
                          damping=0.5, copy=True, verbose=False,
                          return_n_iter=False):
@@ -74,6 +90,16 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
     For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
     <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
 
+    When the algorithm does not converge, it returns an empty array as
+    ``cluster_center_indices`` and ``-1`` as label for each training sample.
+
+    When all training samples have equal similarities and equal preferences,
+    the assignment of cluster centers and labels depends on the preference.
+    If the preference is smaller than the similarities, a single cluster center
+    and label ``0`` for every sample will be returned. Otherwise, every
+    training sample becomes its own cluster center and is assigned a unique
+    label.
+
     References
     ----------
     Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
@@ -90,6 +116,23 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
     if damping < 0.5 or damping >= 1:
         raise ValueError('damping must be >= 0.5 and < 1')
 
+    preference = np.array(preference)
+
+    if (n_samples == 1 or
+            _equal_similarities_and_preferences(S, preference)):
+        # It makes no sense to run the algorithm in this case, so return 1 or
+        # n_samples clusters, depending on preferences
+        warnings.warn("All samples have mutually equal similarities. "
+                      "Returning arbitrary cluster center(s).")
+        if preference.flat[0] >= S.flat[n_samples - 1]:
+            return ((np.arange(n_samples), np.arange(n_samples), 0)
+                    if return_n_iter
+                    else (np.arange(n_samples), np.arange(n_samples)))
+        else:
+            return ((np.array([0]), np.array([0] * n_samples), 0)
+                    if return_n_iter
+                    else (np.array([0]), np.array([0] * n_samples)))
+
     random_state = np.random.RandomState(0)
 
     # Place preference on the diagonal of S
@@ -177,9 +220,10 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
         cluster_centers_indices = np.unique(labels)
         labels = np.searchsorted(cluster_centers_indices, labels)
     else:
-        labels = np.empty((n_samples, 1))
-        cluster_centers_indices = None
-        labels.fill(np.nan)
+        warnings.warn("Affinity propagation did not converge, this model "
+                      "will not have any cluster centers.", ConvergenceWarning)
+        labels = np.array([-1] * n_samples)
+        cluster_centers_indices = []
 
     if return_n_iter:
         return cluster_centers_indices, labels, it + 1
@@ -254,6 +298,17 @@ class AffinityPropagation(BaseEstimator, ClusterMixin):
     The algorithmic complexity of affinity propagation is quadratic
     in the number of points.
 
+    When ``fit`` does not converge, ``cluster_centers_`` becomes an empty
+    array and all training samples will be labelled as ``-1``. In addition,
+    ``predict`` will then label every sample as ``-1``.
+
+    When all training samples have equal similarities and equal preferences,
+    the assignment of cluster centers and labels depends on the preference.
+    If the preference is smaller than the similarities, ``fit`` will result in
+    a single cluster center and label ``0`` for every sample. Otherwise, every
+    training sample becomes its own cluster center and is assigned a unique
+    label.
+
     References
     ----------
 
@@ -287,6 +342,9 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
             Data matrix or, if affinity is ``precomputed``, matrix of
             similarities / affinities.
+
+        y : Ignored
+
         """
         X = check_array(X, accept_sparse='csr')
         if self.affinity == "precomputed":
@@ -327,4 +385,10 @@ def predict(self, X):
             raise ValueError("Predict method is not supported when "
                              "affinity='precomputed'.")
 
-        return pairwise_distances_argmin(X, self.cluster_centers_)
+        if self.cluster_centers_.size > 0:
+            return pairwise_distances_argmin(X, self.cluster_centers_)
+        else:
+            warnings.warn("This model does not have any cluster centers "
+                          "because affinity propagation did not converge. "
+                          "Labeling every sample as '-1'.")
+            return np.array([-1] * X.shape[0])
diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py
index 38319a5d8c88b..6c61d6b983bbe 100644
--- a/sklearn/cluster/bicluster.py
+++ b/sklearn/cluster/bicluster.py
@@ -117,6 +117,8 @@ def fit(self, X, y=None):
         ----------
         X : array-like, shape (n_samples, n_features)
 
+        y : Ignored
+
         """
         X = check_array(X, accept_sparse='csr', dtype=np.float64)
         self._check_parameters()
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 04d7726743b06..d2dcd8d9a016f 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -441,6 +441,9 @@ def fit(self, X, y=None):
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Input data.
+
+        y : Ignored
+
         """
         self.fit_, self.partial_fit_ = True, False
         return self._fit(X)
@@ -521,6 +524,9 @@ def partial_fit(self, X=None, y=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features), None
             Input data. If X is not provided, only the global clustering
             step is done.
+
+        y : Ignored
+
         """
         self.partial_fit_, self.fit_ = True, False
         if X is None:
diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index 115e534b448cb..45bedb26e76b1 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -275,6 +275,9 @@ def fit(self, X, y=None, sample_weight=None):
             ``min_samples`` is by itself a core sample; a sample with negative
             weight may inhibit its eps-neighbor from being core.
             Note that weights are absolute, and default to 1.
+
+        y : Ignored
+
         """
         X = check_array(X, accept_sparse='csr')
         clust = dbscan(X, sample_weight=sample_weight,
@@ -303,6 +306,8 @@ def fit_predict(self, X, y=None, sample_weight=None):
             weight may inhibit its eps-neighbor from being core.
             Note that weights are absolute, and default to 1.
 
+        y : Ignored
+
         Returns
         -------
         y : ndarray, shape (n_samples,)
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 966ed5e2cc121..c8ead243192b0 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -685,7 +685,10 @@ def fit(self, X, y=None):
         Parameters
         ----------
         X : array-like, shape = [n_samples, n_features]
-            The samples a.k.a. observations.
+            Training data. Shape [n_samples, n_features], or [n_samples,
+            n_samples] if affinity=='precomputed'.
+
+        y : Ignored
 
         Returns
         -------
@@ -834,6 +837,8 @@ def fit(self, X, y=None, **params):
         X : array-like, shape = [n_samples, n_features]
             The data
 
+        y : Ignored
+
         Returns
         -------
         self
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index af2fc67e083db..06f26b52aa0e6 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -879,6 +879,9 @@ def fit(self, X, y=None):
         ----------
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Training instances to cluster.
+
+        y : Ignored
+
         """
         random_state = check_random_state(self.random_state)
         X = self._check_fit_data(X)
@@ -904,6 +907,8 @@ def fit_predict(self, X, y=None):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             New data to transform.
 
+        u : Ignored
+
         Returns
         -------
         labels : array, shape [n_samples,]
@@ -921,6 +926,8 @@ def fit_transform(self, X, y=None):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             New data to transform.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array, shape [n_samples, k]
@@ -990,6 +997,8 @@ def score(self, X, y=None):
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             New data.
 
+        y : Ignored
+
         Returns
         -------
         score : float
@@ -1336,6 +1345,9 @@ def fit(self, X, y=None):
         ----------
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Training instances to cluster.
+
+        y : Ignored
+
         """
         random_state = check_random_state(self.random_state)
         X = check_array(X, accept_sparse="csr", order='C',
@@ -1498,6 +1510,9 @@ def partial_fit(self, X, y=None):
         ----------
         X : array-like, shape = [n_samples, n_features]
             Coordinates of the data points to cluster.
+
+        y : Ignored
+
         """
 
         X = check_array(X, accept_sparse="csr")
diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index b1680fea3f2e7..37c31777a5a1f 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -389,6 +389,9 @@ def fit(self, X, y=None):
         -----------
         X : array-like, shape=[n_samples, n_features]
             Samples to cluster.
+
+        y : Ignored
+
         """
         X = check_array(X)
         self.cluster_centers_, self.labels_ = \
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
index 5f5f0a4e9d452..8532110acb6c4 100644
--- a/sklearn/cluster/spectral.py
+++ b/sklearn/cluster/spectral.py
@@ -432,6 +432,9 @@ def fit(self, X, y=None):
         X : array-like or sparse matrix, shape (n_samples, n_features)
             OR, if affinity==`precomputed`, a precomputed affinity
             matrix of shape (n_samples, n_samples)
+
+        y : Ignored
+
         """
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype=np.float64)
diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index e0e4091d4d2de..408783cd98ff0 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -5,11 +5,15 @@
 
 import numpy as np
 
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_raises
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils.testing import (
+    assert_equal, assert_false, assert_true, assert_array_equal, assert_raises,
+    assert_warns, assert_warns_message, assert_no_warnings)
 
 from sklearn.cluster.affinity_propagation_ import AffinityPropagation
+from sklearn.cluster.affinity_propagation_ import (
+    _equal_similarities_and_preferences
+)
 from sklearn.cluster.affinity_propagation_ import affinity_propagation
 from sklearn.datasets.samples_generator import make_blobs
 from sklearn.metrics import euclidean_distances
@@ -78,3 +82,81 @@ def test_affinity_propagation_predict_error():
     af = AffinityPropagation(affinity="precomputed")
     af.fit(S)
     assert_raises(ValueError, af.predict, X)
+
+
+def test_affinity_propagation_fit_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array and training samples should be labelled
+    # as noise (-1)
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = AffinityPropagation(preference=-10, max_iter=1)
+
+    assert_warns(ConvergenceWarning, af.fit, X)
+    assert_array_equal(np.empty((0, 2)), af.cluster_centers_)
+    assert_array_equal(np.array([-1, -1, -1]), af.labels_)
+
+
+def test_affinity_propagation_equal_mutual_similarities():
+    X = np.array([[-1, 1], [1, -1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # setting preference > similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=0)
+
+    # expect every sample to become an exemplar
+    assert_array_equal([0, 1], cluster_center_indices)
+    assert_array_equal([0, 1], labels)
+
+    # setting preference < similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)
+
+    # expect one cluster, with arbitrary (first) sample as exemplar
+    assert_array_equal([0], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+    # setting different preferences
+    cluster_center_indices, labels = assert_no_warnings(
+        affinity_propagation, S, preference=[-20, -10])
+
+    # expect one cluster, with highest-preference sample as exemplar
+    assert_array_equal([1], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+
+def test_affinity_propagation_predict_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = AffinityPropagation(preference=-10, max_iter=1).fit(X)
+
+    # At prediction time, consider new samples as noise since there are no
+    # clusters
+    assert_array_equal(np.array([-1, -1, -1]),
+                       af.predict(np.array([[2, 2], [3, 3], [4, 4]])))
+
+
+def test_equal_similarities_and_preferences():
+    # Unequal distances
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+    S = -euclidean_distances(X, squared=True)
+
+    assert_false(_equal_similarities_and_preferences(S, np.array(0)))
+    assert_false(_equal_similarities_and_preferences(S, np.array([0, 0])))
+    assert_false(_equal_similarities_and_preferences(S, np.array([0, 1])))
+
+    # Equal distances
+    X = np.array([[0, 0], [1, 1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # Different preferences
+    assert_false(_equal_similarities_and_preferences(S, np.array([0, 1])))
+
+    # Same preferences
+    assert_true(_equal_similarities_and_preferences(S, np.array([0, 0])))
+    assert_true(_equal_similarities_and_preferences(S, np.array(0)))
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 5bef7255e37da..4b7b769d7017d 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -140,7 +140,9 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
         Whether to shuffle dataset.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        Random state for shuffling the dataset.
+        Random state for shuffling the dataset. If subset='SA', this random
+        state is also used to randomly select the small proportion of abnormal
+        samples.
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
@@ -175,7 +177,7 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
 
     """
     data_home = get_data_home(data_home=data_home)
-    kddcup99 = _fetch_brute_kddcup99(data_home=data_home, shuffle=shuffle,
+    kddcup99 = _fetch_brute_kddcup99(data_home=data_home,
                                      percent10=percent10,
                                      download_if_missing=download_if_missing)
 
@@ -225,12 +227,15 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
         if subset == 'SF':
             data = np.c_[data[:, 0], data[:, 2], data[:, 4], data[:, 5]]
 
+    if shuffle:
+        data, target = shuffle_method(data, target, random_state=random_state)
+
     return Bunch(data=data, target=target)
 
 
 def _fetch_brute_kddcup99(data_home=None,
                           download_if_missing=True, random_state=None,
-                          shuffle=False, percent10=True):
+                          percent10=True):
 
     """Load the kddcup99 dataset, downloading it if necessary.
 
@@ -251,9 +256,6 @@ def _fetch_brute_kddcup99(data_home=None,
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    shuffle : bool, default=False
-        Whether to shuffle dataset.
-
     percent10 : bool, default=True
         Whether to load only 10 percent of the data.
 
@@ -372,9 +374,6 @@ def _fetch_brute_kddcup99(data_home=None,
         X = joblib.load(samples_path)
         y = joblib.load(targets_path)
 
-    if shuffle:
-        X, y = shuffle_method(X, y, random_state=random_state)
-
     return Bunch(data=X, target=y, DESCR=__doc__)
 
 
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index a7cf278e37e44..04fa79f4160f4 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -27,7 +27,6 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import with_setup
 
 
 DATA_HOME = tempfile.mkdtemp(prefix="scikit_learn_data_home_test_")
@@ -85,33 +84,42 @@ def test_default_empty_load_files():
     assert_equal(res.DESCR, None)
 
 
-@with_setup(setup_load_files, teardown_load_files)
 def test_default_load_files():
-    res = load_files(LOAD_FILES_ROOT)
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 2)
-    assert_equal(res.DESCR, None)
-    assert_equal(res.data, [b("Hello World!\n")])
+    try:
+        setup_load_files()
+        res = load_files(LOAD_FILES_ROOT)
+        assert_equal(len(res.filenames), 1)
+        assert_equal(len(res.target_names), 2)
+        assert_equal(res.DESCR, None)
+        assert_equal(res.data, [b("Hello World!\n")])
+    finally:
+        teardown_load_files()
 
 
-@with_setup(setup_load_files, teardown_load_files)
 def test_load_files_w_categories_desc_and_encoding():
-    category = os.path.abspath(TEST_CATEGORY_DIR1).split('/').pop()
-    res = load_files(LOAD_FILES_ROOT, description="test",
-                     categories=category, encoding="utf-8")
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 1)
-    assert_equal(res.DESCR, "test")
-    assert_equal(res.data, [u("Hello World!\n")])
+    try:
+        setup_load_files()
+        category = os.path.abspath(TEST_CATEGORY_DIR1).split('/').pop()
+        res = load_files(LOAD_FILES_ROOT, description="test",
+                         categories=category, encoding="utf-8")
+        assert_equal(len(res.filenames), 1)
+        assert_equal(len(res.target_names), 1)
+        assert_equal(res.DESCR, "test")
+        assert_equal(res.data, [u("Hello World!\n")])
+    finally:
+        teardown_load_files()
 
 
-@with_setup(setup_load_files, teardown_load_files)
 def test_load_files_wo_load_content():
-    res = load_files(LOAD_FILES_ROOT, load_content=False)
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 2)
-    assert_equal(res.DESCR, None)
-    assert_equal(res.get('data'), None)
+    try:
+        setup_load_files()
+        res = load_files(LOAD_FILES_ROOT, load_content=False)
+        assert_equal(len(res.filenames), 1)
+        assert_equal(len(res.target_names), 2)
+        assert_equal(res.DESCR, None)
+        assert_equal(res.get('data'), None)
+    finally:
+        teardown_load_files()
 
 
 def test_load_sample_images():
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 498b98f4e67ed..77dc2be185b02 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -37,3 +37,13 @@ def test_percent10():
     data = fetch_kddcup99('smtp')
     assert_equal(data.data.shape, (9571, 3))
     assert_equal(data.target.shape, (9571,))
+
+
+def test_shuffle():
+    try:
+        dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True,
+                                 percent10=True, download_if_missing=False)
+    except IOError:
+        raise SkipTest("kddcup99 dataset can not be loaded.")
+
+    assert(any(dataset.target[-100:] == b'normal.'))
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 3e5875a060be1..ac6395c4958be 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -28,7 +28,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import raises
+from sklearn.utils.testing import assert_raises
 
 
 SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
@@ -110,10 +110,9 @@ def teardown_module():
         shutil.rmtree(SCIKIT_LEARN_EMPTY_DATA)
 
 
-@raises(IOError)
 def test_load_empty_lfw_people():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                     download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_people():
@@ -148,16 +147,15 @@ def test_load_fake_lfw_people():
                         'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
 
 
-@raises(ValueError)
 def test_load_fake_lfw_people_too_restrictive():
-    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
-                     download_if_missing=False)
+    assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
+                  min_faces_per_person=100, download_if_missing=False)
 
 
-@raises(IOError)
 def test_load_empty_lfw_pairs():
-    fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA,
-                    download_if_missing=False)
+    assert_raises(IOError, fetch_lfw_pairs,
+                  data_home=SCIKIT_LEARN_EMPTY_DATA,
+                  download_if_missing=False)
 
 
 def test_load_fake_lfw_pairs():
diff --git a/sklearn/datasets/tests/test_mldata.py b/sklearn/datasets/tests/test_mldata.py
index 1ce22079bdd11..7405b8e025c0f 100644
--- a/sklearn/datasets/tests/test_mldata.py
+++ b/sklearn/datasets/tests/test_mldata.py
@@ -13,7 +13,6 @@
 from sklearn.utils.testing import mock_mldata_urlopen
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import with_setup
 from sklearn.utils.testing import assert_array_equal
 
 
@@ -43,10 +42,9 @@ def test_mldata_filename():
         assert_equal(mldata_filename(name), desired)
 
 
-@with_setup(setup_tmpdata, teardown_tmpdata)
 def test_download():
     """Test that fetch_mldata is able to download and cache a data set."""
-
+    setup_tmpdata()
     _urlopen_ref = datasets.mldata.urlopen
     datasets.mldata.urlopen = mock_mldata_urlopen({
         'mock': {
@@ -66,10 +64,11 @@ def test_download():
                       fetch_mldata, 'not_existing_name')
     finally:
         datasets.mldata.urlopen = _urlopen_ref
+        teardown_tmpdata()
 
 
-@with_setup(setup_tmpdata, teardown_tmpdata)
 def test_fetch_one_column():
+    setup_tmpdata()
     _urlopen_ref = datasets.mldata.urlopen
     try:
         dataname = 'onecol'
@@ -90,10 +89,11 @@ def test_fetch_one_column():
         assert_equal(dset.data.shape, (3, 2))
     finally:
         datasets.mldata.urlopen = _urlopen_ref
+        teardown_tmpdata()
 
 
-@with_setup(setup_tmpdata, teardown_tmpdata)
 def test_fetch_multiple_column():
+    setup_tmpdata()
     _urlopen_ref = datasets.mldata.urlopen
     try:
         # create fake data set in cache
@@ -167,3 +167,4 @@ def test_fetch_multiple_column():
 
     finally:
         datasets.mldata.urlopen = _urlopen_ref
+        teardown_tmpdata()
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index d688dc798237b..2e3b7982476b0 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -15,7 +15,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import assert_in
 from sklearn.utils.fixes import sp_version
 
@@ -138,20 +137,17 @@ def test_load_compressed():
     assert_array_equal(y, ybz)
 
 
-@raises(ValueError)
 def test_load_invalid_file():
-    load_svmlight_file(invalidfile)
+    assert_raises(ValueError, load_svmlight_file, invalidfile)
 
 
-@raises(ValueError)
 def test_load_invalid_order_file():
-    load_svmlight_file(invalidfile2)
+    assert_raises(ValueError, load_svmlight_file, invalidfile2)
 
 
-@raises(ValueError)
 def test_load_zero_based():
     f = BytesIO(b("-1 4:1.\n1 0:1\n"))
-    load_svmlight_file(f, zero_based=False)
+    assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
@@ -186,21 +182,19 @@ def test_load_with_qid():
         assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
 
 
-@raises(ValueError)
 def test_load_invalid_file2():
-    load_svmlight_files([datafile, invalidfile, datafile])
+    assert_raises(ValueError, load_svmlight_files,
+                  [datafile, invalidfile, datafile])
 
 
-@raises(TypeError)
 def test_not_a_filename():
     # in python 3 integers are valid file opening arguments (taken as unix
     # file descriptors)
-    load_svmlight_file(.42)
+    assert_raises(TypeError, load_svmlight_file, .42)
 
 
-@raises(IOError)
 def test_invalid_filename():
-    load_svmlight_file("trou pic nic douille")
+    assert_raises(IOError, load_svmlight_file, "trou pic nic douille")
 
 
 def test_dump():
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 62cd2cd2aa101..e4b36d120773a 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -824,7 +824,6 @@ def transform(self, X):
         check_is_fitted(self, 'components_')
 
         X = check_array(X)
-        n_samples, n_features = X.shape
 
         code = sparse_encode(
             X, self.components_, algorithm=self.transform_algorithm,
@@ -927,9 +926,9 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            Training vector, where n_samples in the number of samples
-            and n_features is the number of features.
+        X : Ignored
+
+        y : Ignored
 
         Returns
         -------
@@ -1081,6 +1080,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -1251,6 +1252,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -1284,6 +1287,8 @@ def partial_fit(self, X, y=None, iter_offset=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         iter_offset : integer, optional
             The number of iteration on data batches that has been
             performed before this call to partial_fit. This is optional:
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index 4440ee90bd84a..481a5e2322e3f 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -149,6 +149,8 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored
+
         Returns
         -------
         self
@@ -324,7 +326,6 @@ def score_samples(self, X):
         Xr = X - self.mean_
         precision = self.get_precision()
         n_features = X.shape[1]
-        log_like = np.zeros(X.shape[0])
         log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
         log_like -= .5 * (n_features * log(2. * np.pi)
                           - fast_logdet(precision))
@@ -338,6 +339,8 @@ def score(self, X, y=None):
         X : array, shape (n_samples, n_features)
             The data
 
+        y : Ignored
+
         Returns
         -------
         ll : float
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index fcc11ff643a5e..6cb58a250be78 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -509,6 +509,8 @@ def fit_transform(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
@@ -524,6 +526,8 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self
diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index f0604001fab53..13e51090dd82e 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -158,7 +158,7 @@ def fit(self, X, y=None):
             Training data, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : Passthrough for ``Pipeline`` compatibility.
+        y : Ignored
 
         Returns
         -------
@@ -199,6 +199,8 @@ def partial_fit(self, X, y=None, check_input=True):
         check_input : bool
             Run check_array on X.
 
+        y : Ignored
+
         Returns
         -------
         self : object
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index 153731cb83651..8b3830470921b 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -1211,6 +1211,8 @@ def fit_transform(self, X, y=None, W=None, H=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored
+
         W : array-like, shape (n_samples, n_components)
             If init='custom', it is used as initial guess for the solution.
 
@@ -1249,6 +1251,8 @@ def fit(self, X, y=None, **params):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
+        y : Ignored
+
         Returns
         -------
         self
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index e9743c69422fb..01b521cb7a76f 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -473,6 +473,8 @@ def partial_fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored
+
         Returns
         -------
         self
@@ -515,6 +517,8 @@ def fit(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored
+
         Returns
         -------
         self
@@ -714,6 +718,8 @@ def score(self, X, y=None):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
+        y : Ignored
+
         Returns
         -------
         score : float
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 171774321cec0..cbd688f3d748d 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -134,8 +134,12 @@ class PCA(_BasePCA):
         to guess the dimension
         if ``0 < n_components < 1`` and svd_solver == 'full', select the number
         of components such that the amount of variance that needs to be
-        explained is greater than the percentage specified by n_components
-        n_components cannot be equal to n_features for svd_solver == 'arpack'.
+        explained is greater than the percentage specified by n_components.
+        If svd_solver == 'arpack', the number of components must be strictly
+        less than the minimum of n_features and n_samples.
+        Hence, the None case results in:
+
+            n_components == min(n_samples, n_features) - 1
 
     copy : bool (default True)
         If False, data passed to fit are overwritten and running
@@ -166,7 +170,7 @@ class PCA(_BasePCA):
         arpack :
             run SVD truncated to n_components calling ARPACK solver via
             `scipy.sparse.linalg.svds`. It requires strictly
-            0 < n_components < X.shape[1]
+            0 < n_components < min(X.shape)
         randomized :
             run randomized SVD by the method of Halko et al.
 
@@ -210,7 +214,7 @@ class PCA(_BasePCA):
         Percentage of variance explained by each of the selected components.
 
         If ``n_components`` is not set then all components are stored and the
-        sum of explained variances is equal to 1.0.
+        sum of the ratios is equal to 1.0.
 
     singular_values_ : array, shape (n_components,)
         The singular values corresponding to each of the selected components.
@@ -226,7 +230,8 @@ class PCA(_BasePCA):
         The estimated number of components. When n_components is set
         to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this
         number is estimated from input data. Otherwise it equals the parameter
-        n_components, or n_features if n_components is None.
+        n_components, or the lesser value of n_features and n_samples
+        if n_components is None.
 
     noise_variance_ : float
         The estimated noise covariance following the Probabilistic PCA model
@@ -319,6 +324,8 @@ def fit(self, X, y=None):
             Training data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -336,6 +343,8 @@ def fit_transform(self, X, y=None):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
@@ -367,7 +376,10 @@ def _fit(self, X):
 
         # Handle n_components==None
         if self.n_components is None:
-            n_components = X.shape[1]
+            if self.svd_solver != 'arpack':
+                n_components = min(X.shape)
+            else:
+                n_components = min(X.shape) - 1
         else:
             n_components = self.n_components
 
@@ -400,10 +412,11 @@ def _fit_full(self, X, n_components):
             if n_samples < n_features:
                 raise ValueError("n_components='mle' is only supported "
                                  "if n_samples >= n_features")
-        elif not 0 <= n_components <= n_features:
+        elif not 0 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 0 and "
-                             "n_features=%r with svd_solver='full'"
-                             % (n_components, n_features))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='full'"
+                             % (n_components, min(n_samples, n_features)))
 
         # Center data
         self.mean_ = np.mean(X, axis=0)
@@ -458,14 +471,19 @@ def _fit_truncated(self, X, n_components, svd_solver):
             raise ValueError("n_components=%r cannot be a string "
                              "with svd_solver='%s'"
                              % (n_components, svd_solver))
-        elif not 1 <= n_components <= n_features:
+        elif not 1 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 1 and "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
-        elif svd_solver == 'arpack' and n_components == n_features:
-            raise ValueError("n_components=%r must be stricly less than "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                                svd_solver))
+        elif svd_solver == 'arpack' and n_components == min(n_samples,
+                                                            n_features):
+            raise ValueError("n_components=%r must be strictly less than "
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                                svd_solver))
 
         random_state = check_random_state(self.random_state)
 
@@ -500,6 +518,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         self.explained_variance_ratio_ = \
             self.explained_variance_ / total_var.sum()
         self.singular_values_ = S.copy()  # Store the singular values.
+
         if self.n_components_ < min(n_features, n_samples):
             self.noise_variance_ = (total_var.sum() -
                                     self.explained_variance_.sum())
@@ -531,7 +550,6 @@ def score_samples(self, X):
         X = check_array(X)
         Xr = X - self.mean_
         n_features = X.shape[1]
-        log_like = np.zeros(X.shape[0])
         precision = self.get_precision()
         log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
         log_like -= .5 * (n_features * log(2. * np.pi) -
@@ -550,6 +568,8 @@ def score(self, X, y=None):
         X : array, shape(n_samples, n_features)
             The data.
 
+        y : Ignored
+
         Returns
         -------
         ll : float
@@ -676,6 +696,8 @@ def fit(self, X, y=None):
             Training data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -762,6 +784,8 @@ def fit_transform(self, X, y=None):
             New data, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 47c03a80278b9..68db09b5d277c 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -107,6 +107,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -275,6 +277,8 @@ def fit(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y : Ignored
+
         Returns
         -------
         self : object
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 6795013b0790a..aa67189407296 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -8,6 +8,7 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
@@ -349,11 +350,58 @@ def test_pca_inverse():
 
 
 def test_pca_validation():
-    X = [[0, 1], [1, 0]]
+    # Ensures that solver-specific extreme inputs for the n_components
+    # parameter raise errors
+    X = np.array([[0, 1, 0], [1, 0, 0]])
+    smallest_d = 2  # The smallest dimension
+    lower_limit = {'randomized': 1, 'arpack': 1, 'full': 0, 'auto': 0}
+
     for solver in solver_list:
-        for n_components in [-1, 3]:
-            assert_raises(ValueError,
-                          PCA(n_components, svd_solver=solver).fit, X)
+        # We conduct the same test on X.T so that it is invariant to axis.
+        for data in [X, X.T]:
+            for n_components in [-1, 3]:
+
+                if solver == 'auto':
+                    solver_reported = 'full'
+                else:
+                    solver_reported = solver
+
+                assert_raises_regex(ValueError,
+                                    "n_components={}L? must be between "
+                                    "{}L? and min\(n_samples, n_features\)="
+                                    "{}L? with svd_solver=\'{}\'"
+                                    .format(n_components,
+                                            lower_limit[solver],
+                                            smallest_d,
+                                            solver_reported),
+                                    PCA(n_components,
+                                        svd_solver=solver).fit, data)
+            if solver == 'arpack':
+
+                n_components = smallest_d
+
+                assert_raises_regex(ValueError,
+                                    "n_components={}L? must be "
+                                    "strictly less than "
+                                    "min\(n_samples, n_features\)={}L?"
+                                    " with svd_solver=\'arpack\'"
+                                    .format(n_components, smallest_d),
+                                    PCA(n_components, svd_solver=solver)
+                                    .fit, data)
+
+
+def test_n_components_none():
+    # Ensures that n_components == None is handled correctly
+    X = iris.data
+    # We conduct the same test on X.T so that it is invariant to axis.
+    for data in [X, X.T]:
+        for solver in solver_list:
+            pca = PCA(svd_solver=solver)
+            pca.fit(data)
+            if solver == 'arpack':
+                assert_equal(pca.n_components_, min(data.shape) - 1)
+            else:
+                assert_equal(pca.n_components_, min(data.shape))
 
 
 def test_randomized_pca_check_projection():
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
index 87b8b45e1543a..028304672e4da 100644
--- a/sklearn/decomposition/truncated_svd.py
+++ b/sklearn/decomposition/truncated_svd.py
@@ -132,6 +132,8 @@ def fit(self, X, y=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored
+
         Returns
         -------
         self : object
@@ -148,6 +150,8 @@ def fit_transform(self, X, y=None):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array, shape (n_samples, n_components)
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index a72f25a5f7b9b..854f728c5638a 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -153,7 +153,7 @@ class ZeroEstimator(object):
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):
-        if np.issubdtype(y.dtype, int):
+        if np.issubdtype(y.dtype, np.signedinteger):
             # classification
             self.n_classes = np.unique(y).shape[0]
             if self.n_classes == 2:
diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index 023be79912d12..22665384ed7ce 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -296,7 +296,14 @@ def test_set_params():
     clf3 = GaussianNB()
     eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
                              weights=[1, 2])
+    assert_true('lr' in eclf1.named_estimators)
+    assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1])
+    assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr'])
     eclf1.fit(X, y)
+    assert_true('lr' in eclf1.named_estimators_)
+    assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0])
+    assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'])
+
     eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
                              weights=[1, 2])
     eclf2.set_params(nb=clf2).fit(X, y)
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index ad6c0125dd664..26bc8e66df01a 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -21,6 +21,7 @@
 from ..externals.joblib import Parallel, delayed
 from ..utils.validation import has_fit_parameter, check_is_fitted
 from ..utils.metaestimators import _BaseComposition
+from ..utils import Bunch
 
 
 def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
@@ -75,6 +76,11 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
         The collection of fitted sub-estimators as defined in ``estimators``
         that are not `None`.
 
+    named_estimators_ : Bunch object, a dictionary with attribute access
+        Attribute to access any fitted sub-estimators by name.
+
+        .. versionadded:: 0.20
+
     classes_ : array-like, shape = [n_predictions]
         The classes labels.
 
@@ -94,6 +100,9 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
     >>> eclf1 = eclf1.fit(X, y)
     >>> print(eclf1.predict(X))
     [1 1 1 2 2 2]
+    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),
+    ...                eclf1.named_estimators_['lr'].predict(X))
+    True
     >>> eclf2 = VotingClassifier(estimators=[
     ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
     ...         voting='soft')
@@ -122,7 +131,7 @@ def __init__(self, estimators, voting='hard', weights=None, n_jobs=1,
 
     @property
     def named_estimators(self):
-        return dict(self.estimators)
+        return Bunch(**dict(self.estimators))
 
     def fit(self, X, y, sample_weight=None):
         """ Fit the estimators.
@@ -188,6 +197,9 @@ def fit(self, X, y, sample_weight=None):
                                                  sample_weight=sample_weight)
                 for clf in clfs if clf is not None)
 
+        self.named_estimators_ = Bunch(**dict())
+        for k, e in zip(self.estimators, self.estimators_):
+            self.named_estimators_[k[0]] = e
         return self
 
     @property
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index d258625897e27..6f0d6b0214953 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -112,23 +112,19 @@ def test_hasher_zeros():
 
 @ignore_warnings(category=DeprecationWarning)
 def test_hasher_alternate_sign():
-    # the last two tokens produce a hash collision that sums as 0
-    X = [["foo", "bar", "baz", "investigation need", "records"]]
+    X = [list("Thequickbrownfoxjumped")]
 
     Xt = FeatureHasher(alternate_sign=True, non_negative=False,
                        input_type='string').fit_transform(X)
-    assert_true(Xt.data.min() < 0 and Xt.data.max() > 0)
-    # check that we have a collision that produces a 0 count
-    assert_true(len(Xt.data) < len(X[0]))
-    assert_true((Xt.data == 0.).any())
+    assert Xt.data.min() < 0 and Xt.data.max() > 0
 
     Xt = FeatureHasher(alternate_sign=True, non_negative=True,
                        input_type='string').fit_transform(X)
-    assert_true((Xt.data >= 0).all())   # all counts are positive
-    assert_true((Xt.data == 0.).any())  # we still have a collision
+    assert Xt.data.min() > 0
+
     Xt = FeatureHasher(alternate_sign=False, non_negative=True,
                        input_type='string').fit_transform(X)
-    assert_true((Xt.data > 0).all())    # strictly positive counts
+    assert Xt.data.min() > 0
     Xt_2 = FeatureHasher(alternate_sign=False, non_negative=False,
                          input_type='string').fit_transform(X)
     # With initially positive features, the non_negative option should
@@ -136,6 +132,25 @@ def test_hasher_alternate_sign():
     assert_array_equal(Xt.data, Xt_2.data)
 
 
+@ignore_warnings(category=DeprecationWarning)
+def test_hash_collisions():
+    X = [list("Thequickbrownfoxjumped")]
+
+    Xt = FeatureHasher(alternate_sign=True, non_negative=False,
+                       n_features=1, input_type='string').fit_transform(X)
+    # check that some of the hashed tokens are added
+    # with an opposite sign and cancel out
+    assert abs(Xt.data[0]) < len(X[0])
+
+    Xt = FeatureHasher(alternate_sign=True, non_negative=True,
+                       n_features=1, input_type='string').fit_transform(X)
+    assert abs(Xt.data[0]) < len(X[0])
+
+    Xt = FeatureHasher(alternate_sign=False, non_negative=True,
+                       n_features=1, input_type='string').fit_transform(X)
+    assert Xt.data[0] == len(X[0])
+
+
 @ignore_warnings(category=DeprecationWarning)
 def test_hasher_negative():
     X = [{"foo": 2, "bar": -4, "baz": -1}.items()]
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
index 276835c10caf1..5e1b53040f438 100644
--- a/sklearn/feature_extraction/tests/test_image.py
+++ b/sklearn/feature_extraction/tests/test_image.py
@@ -7,12 +7,10 @@
 from scipy import ndimage
 from scipy.sparse.csgraph import connected_components
 
-from numpy.testing import assert_raises
-
 from sklearn.feature_extraction.image import (
     img_to_graph, grid_to_graph, extract_patches_2d,
     reconstruct_from_patches_2d, PatchExtractor, extract_patches)
-from sklearn.utils.testing import assert_equal, assert_true
+from sklearn.utils.testing import assert_equal, assert_true, assert_raises
 
 
 def test_img_to_graph():
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 9e613b1bca8c1..ff13cd6e00179 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -23,13 +23,12 @@
 import numpy as np
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
-from numpy.testing import assert_raises
 from sklearn.utils.testing import (assert_equal, assert_false, assert_true,
                                    assert_not_equal, assert_almost_equal,
                                    assert_in, assert_less, assert_greater,
                                    assert_warns_message, assert_raise_message,
                                    clean_warning_registry, ignore_warnings,
-                                   SkipTest)
+                                   SkipTest, assert_raises)
 
 from collections import defaultdict, Mapping
 from functools import partial
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index fa7306ab9def5..417aeef2f8bc2 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1086,7 +1086,7 @@ def transform(self, X, copy=True):
         -------
         vectors : sparse matrix, [n_samples, n_features]
         """
-        if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
+        if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.floating):
             # preserve float family dtype
             X = sp.csr_matrix(X, copy=copy)
         else:
diff --git a/sklearn/feature_selection/variance_threshold.py b/sklearn/feature_selection/variance_threshold.py
index c9e018d94a84e..13e1aa7078310 100644
--- a/sklearn/feature_selection/variance_threshold.py
+++ b/sklearn/feature_selection/variance_threshold.py
@@ -54,7 +54,7 @@ def fit(self, X, y=None):
             Sample vectors from which to compute variances.
 
         y : any
-            Ignored. This parameter exists only for compatibility with
+            Ignored This parameter exists only for compatibility with
             sklearn.pipeline.Pipeline.
 
         Returns
diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index 53c519e5d5ac8..5bc89d28df6b6 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -444,11 +444,6 @@ def predict(self, X, eval_MSE=False, batch_size=None):
             # Normalize input
             X = (X - self.X_mean) / self.X_std
 
-            # Initialize output
-            y = np.zeros(n_eval)
-            if eval_MSE:
-                MSE = np.zeros(n_eval)
-
             # Get pairwise componentwise L1-distances to the input training set
             dx = manhattan_distances(X, Y=self.X, sum_over_features=False)
             # Get regression function and correlation
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index 4f9ff9cee7911..c92ca7f68f368 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -245,6 +245,8 @@ def obj_func(theta, eval_gradient=True):
         K[np.diag_indices_from(K)] += self.alpha
         try:
             self.L_ = cholesky(K, lower=True)  # Line 2
+            # self.L_ changed, self._K_inv needs to be recomputed
+            self._K_inv = None
         except np.linalg.LinAlgError as exc:
             exc.args = ("The kernel, %s, is not returning a "
                         "positive definite matrix. Try gradually "
@@ -320,13 +322,18 @@ def predict(self, X, return_std=False, return_cov=False):
                 y_cov = self.kernel_(X) - K_trans.dot(v)  # Line 6
                 return y_mean, y_cov
             elif return_std:
-                # compute inverse K_inv of K based on its Cholesky
-                # decomposition L and its inverse L_inv
-                L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0]))
-                K_inv = L_inv.dot(L_inv.T)
+                # cache result of K_inv computation
+                if self._K_inv is None:
+                    # compute inverse K_inv of K based on its Cholesky
+                    # decomposition L and its inverse L_inv
+                    L_inv = solve_triangular(self.L_.T,
+                                             np.eye(self.L_.shape[0]))
+                    self._K_inv = L_inv.dot(L_inv.T)
+
                 # Compute variance of predictive distribution
                 y_var = self.kernel_.diag(X)
-                y_var -= np.einsum("ij,ij->i", np.dot(K_trans, K_inv), K_trans)
+                y_var -= np.einsum("ij,ij->i",
+                                   np.dot(K_trans, self._K_inv), K_trans)
 
                 # Check if any of the variances is negative because of
                 # numerical issues. If yes: set the variance to 0.
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index 860e3f290f3ea..37d872fc99fb5 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -11,7 +11,7 @@
 from sklearn.gaussian_process import regression_models as regression
 from sklearn.gaussian_process import correlation_models as correlation
 from sklearn.datasets import make_regression
-from sklearn.utils.testing import assert_greater, assert_true, raises
+from sklearn.utils.testing import assert_greater, assert_true, assert_raises
 
 
 f = lambda x: x * np.sin(x)
@@ -95,10 +95,9 @@ def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential,
     assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.))
 
 
-@raises(ValueError)
 def test_wrong_number_of_outputs():
     gp = GaussianProcess()
-    gp.fit([[1, 2, 3], [4, 5, 6]], [1, 2, 3])
+    assert_raises(ValueError, gp.fit, [[1, 2, 3], [4, 5, 6]], [1, 2, 3])
 
 
 def test_more_builtin_correlation_models(random_start=1):
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index b645a6be18e22..602b2b88ae9c9 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -15,11 +15,13 @@
 from sklearn.utils.testing \
     import (assert_true, assert_greater, assert_array_less,
             assert_almost_equal, assert_equal, assert_raise_message,
-            assert_array_almost_equal)
+            assert_array_almost_equal, assert_array_equal)
 
 
 def f(x):
     return x * np.sin(x)
+
+
 X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
 X2 = np.atleast_2d([2., 4., 5.5, 6.5, 7.5]).T
 y = f(X).ravel()
@@ -344,3 +346,21 @@ def test_no_fit_default_predict():
 
     assert_array_almost_equal(y_std1, y_std2)
     assert_array_almost_equal(y_cov1, y_cov2)
+
+
+def test_K_inv_reset():
+    y2 = f(X2).ravel()
+    for kernel in kernels:
+        # Test that self._K_inv is reset after a new fit
+        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
+        assert_true(hasattr(gpr, '_K_inv'))
+        assert_true(gpr._K_inv is None)
+        gpr.predict(X, return_std=True)
+        assert_true(gpr._K_inv is not None)
+        gpr.fit(X2, y2)
+        assert_true(gpr._K_inv is None)
+        gpr.predict(X2, return_std=True)
+        gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2)
+        gpr2.predict(X2, return_std=True)
+        # the value of K_inv should be independent of the first fit
+        assert_array_equal(gpr._K_inv, gpr2._K_inv)
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index cfe1aba4ea178..5571138d68d83 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -206,7 +206,7 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
     n_ticks = train_sizes_abs.shape[0]
     n_min_required_samples = np.min(train_sizes_abs)
     n_max_required_samples = np.max(train_sizes_abs)
-    if np.issubdtype(train_sizes_abs.dtype, np.float):
+    if np.issubdtype(train_sizes_abs.dtype, np.floating):
         if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0:
             raise ValueError("train_sizes has been interpreted as fractions "
                              "of the maximum number of training samples and "
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 17b988b08e6c7..bb7c12ab601a2 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -414,8 +414,6 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
                 alphas[-add_features:] = 0
             coef = coefs[n_iter]
             prev_coef = coefs[n_iter - 1]
-            alpha = alphas[n_iter, np.newaxis]
-            prev_alpha = alphas[n_iter - 1, np.newaxis]
         else:
             # mimic the effect of incrementing n_iter on the array references
             prev_coef = coef
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 94eb3ea3d2dcb..ea4300df01100 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import raises
 
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model.logistic import (
@@ -249,13 +248,13 @@ def test_write_parameters():
     assert_array_almost_equal(clf.decision_function(X), 0)
 
 
-@raises(ValueError)
 def test_nan():
     # Test proper NaN handling.
     # Regression test for Issue #252: fit used to go into an infinite loop.
     Xnan = np.array(X, dtype=np.float64)
     Xnan[0, 1] = np.nan
-    LogisticRegression(random_state=0).fit(Xnan, Y1)
+    logistic = LogisticRegression(random_state=0)
+    assert_raises(ValueError, logistic.fit, Xnan, Y1)
 
 
 def test_consistency_path():
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index 7146ed1a129b2..6f8e716f9ad19 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -1,7 +1,7 @@
 import numpy as np
 from scipy import sparse
 
-from numpy.testing import assert_equal, assert_raises
+from numpy.testing import assert_equal
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
 
@@ -10,6 +10,7 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises_regexp
+from sklearn.utils.testing import assert_raises
 from sklearn.linear_model import LinearRegression, RANSACRegressor, Lasso
 from sklearn.linear_model.ransac import _dynamic_max_trials
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index f033a4f6021b2..d4552a9934cf1 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -9,7 +9,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_false, assert_true
 from sklearn.utils.testing import assert_equal
@@ -266,11 +265,11 @@ def test_late_onset_averaging_reached(self):
                                   decimal=16)
         assert_almost_equal(clf1.intercept_, average_intercept, decimal=16)
 
-    @raises(ValueError)
     def test_sgd_bad_alpha_for_optimal_learning_rate(self):
         # Check whether expected ValueError on bad alpha, i.e. 0
         # since alpha is used to compute the optimal learning rate
-        self.factory(alpha=0, learning_rate="optimal")
+        assert_raises(ValueError, self.factory,
+                      alpha=0, learning_rate="optimal")
 
 
 class DenseSGDClassifierTestCase(unittest.TestCase, CommonTest):
@@ -287,63 +286,56 @@ def test_sgd(self):
             # assert_almost_equal(clf.coef_[0], clf.coef_[1], decimal=7)
             assert_array_equal(clf.predict(T), true_result)
 
-    @raises(ValueError)
     def test_sgd_bad_l1_ratio(self):
         # Check whether expected ValueError on bad l1_ratio
-        self.factory(l1_ratio=1.1)
+        assert_raises(ValueError, self.factory, l1_ratio=1.1)
 
-    @raises(ValueError)
     def test_sgd_bad_learning_rate_schedule(self):
         # Check whether expected ValueError on bad learning_rate
-        self.factory(learning_rate="<unknown>")
+        assert_raises(ValueError, self.factory, learning_rate="<unknown>")
 
-    @raises(ValueError)
     def test_sgd_bad_eta0(self):
         # Check whether expected ValueError on bad eta0
-        self.factory(eta0=0, learning_rate="constant")
+        assert_raises(ValueError, self.factory, eta0=0,
+                      learning_rate="constant")
 
-    @raises(ValueError)
     def test_sgd_bad_alpha(self):
         # Check whether expected ValueError on bad alpha
-        self.factory(alpha=-.1)
+        assert_raises(ValueError, self.factory, alpha=-.1)
 
-    @raises(ValueError)
     def test_sgd_bad_penalty(self):
         # Check whether expected ValueError on bad penalty
-        self.factory(penalty='foobar', l1_ratio=0.85)
+        assert_raises(ValueError, self.factory, penalty='foobar',
+                      l1_ratio=0.85)
 
-    @raises(ValueError)
     def test_sgd_bad_loss(self):
         # Check whether expected ValueError on bad loss
-        self.factory(loss="foobar")
+        assert_raises(ValueError, self.factory, loss="foobar")
 
-    @raises(ValueError)
     def test_sgd_max_iter_param(self):
         # Test parameter validity check
-        self.factory(max_iter=-10000)
+        assert_raises(ValueError, self.factory, max_iter=-10000)
 
-    @raises(ValueError)
     def test_sgd_shuffle_param(self):
         # Test parameter validity check
-        self.factory(shuffle="false")
+        assert_raises(ValueError, self.factory, shuffle="false")
 
-    @raises(TypeError)
     def test_argument_coef(self):
         # Checks coef_init not allowed as model argument (only fit)
-        # Provided coef_ does not match dataset.
-        self.factory(coef_init=np.zeros((3,))).fit(X, Y)
+        # Provided coef_ does not match dataset
+        assert_raises(TypeError, self.factory, coef_init=np.zeros((3,)))
 
-    @raises(ValueError)
     def test_provide_coef(self):
         # Checks coef_init shape for the warm starts
         # Provided coef_ does not match dataset.
-        self.factory().fit(X, Y, coef_init=np.zeros((3,)))
+        assert_raises(ValueError, self.factory().fit,
+                      X, Y, coef_init=np.zeros((3,)))
 
-    @raises(ValueError)
     def test_set_intercept(self):
         # Checks intercept_ shape for the warm starts
         # Provided intercept_ does not match dataset.
-        self.factory().fit(X, Y, intercept_init=np.zeros((3,)))
+        assert_raises(ValueError, self.factory().fit,
+                      X, Y, intercept_init=np.zeros((3,)))
 
     def test_set_intercept_binary(self):
         # Checks intercept_ shape for the warm starts in binary case
@@ -386,10 +378,10 @@ def test_set_intercept_to_intercept(self):
         clf = self.factory().fit(X, Y)
         self.factory().fit(X, Y, intercept_init=clf.intercept_)
 
-    @raises(ValueError)
     def test_sgd_at_least_two_labels(self):
         # Target must have at least two labels
-        self.factory(alpha=0.01, max_iter=20).fit(X2, np.ones(9))
+        clf = self.factory(alpha=0.01, max_iter=20)
+        assert_raises(ValueError, clf.fit, X2, np.ones(9))
 
     def test_partial_fit_weight_class_balanced(self):
         # partial_fit with class_weight='balanced' not supported"""
@@ -607,17 +599,15 @@ def test_equal_class_weight(self):
         # should be similar up to some epsilon due to learning rate schedule
         assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
 
-    @raises(ValueError)
     def test_wrong_class_weight_label(self):
         # ValueError due to not existing class label.
         clf = self.factory(alpha=0.1, max_iter=1000, class_weight={0: 0.5})
-        clf.fit(X, Y)
+        assert_raises(ValueError, clf.fit, X, Y)
 
-    @raises(ValueError)
     def test_wrong_class_weight_format(self):
         # ValueError due to wrong class_weight argument type.
         clf = self.factory(alpha=0.1, max_iter=1000, class_weight=[0.5])
-        clf.fit(X, Y)
+        assert_raises(ValueError, clf.fit, X, Y)
 
     def test_weights_multiplied(self):
         # Tests that class_weight and sample_weight are multiplicative
@@ -700,18 +690,16 @@ def test_sample_weights(self):
         # the prediction on this point should shift
         assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
 
-    @raises(ValueError)
     def test_wrong_sample_weights(self):
         # Test if ValueError is raised if sample_weight has wrong shape
         clf = self.factory(alpha=0.1, max_iter=1000, fit_intercept=False)
         # provided sample_weight too long
-        clf.fit(X, Y, sample_weight=np.arange(7))
+        assert_raises(ValueError, clf.fit, X, Y, sample_weight=np.arange(7))
 
-    @raises(ValueError)
     def test_partial_fit_exception(self):
         clf = self.factory(alpha=0.01)
         # classes was not specified
-        clf.partial_fit(X3, Y3)
+        assert_raises(ValueError, clf.partial_fit, X3, Y3)
 
     def test_partial_fit_binary(self):
         third = X.shape[0] // 3
@@ -851,15 +839,14 @@ def test_sgd(self):
         clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
         assert_equal(clf.coef_[0], clf.coef_[1])
 
-    @raises(ValueError)
     def test_sgd_bad_penalty(self):
         # Check whether expected ValueError on bad penalty
-        self.factory(penalty='foobar', l1_ratio=0.85)
+        assert_raises(ValueError, self.factory,
+                      penalty='foobar', l1_ratio=0.85)
 
-    @raises(ValueError)
     def test_sgd_bad_loss(self):
         # Check whether expected ValueError on bad loss
-        self.factory(loss="foobar")
+        assert_raises(ValueError, self.factory, loss="foobar")
 
     def test_sgd_averaged_computed_correctly(self):
         # Tests the average regressor matches the naive implementation
diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py
index 279beb8014e95..3a2b1f9dc006f 100644
--- a/sklearn/linear_model/tests/test_theil_sen.py
+++ b/sklearn/linear_model/tests/test_theil_sen.py
@@ -20,7 +20,7 @@
 from sklearn.linear_model.theil_sen import _spatial_median, _breakdown_point
 from sklearn.linear_model.theil_sen import _modified_weiszfeld_step
 from sklearn.utils.testing import (
-        assert_almost_equal, assert_greater, assert_less, raises,
+        assert_almost_equal, assert_greater, assert_less, assert_raises,
 )
 
 
@@ -202,31 +202,31 @@ def test_calc_breakdown_point():
     assert_less(np.abs(bp - 1 + 1 / (np.sqrt(2))), 1.e-6)
 
 
-@raises(ValueError)
 def test_checksubparams_negative_subpopulation():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(max_subpopulation=-1, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_too_few_subsamples():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(n_subsamples=1, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_too_many_subsamples():
     X, y, w, c = gen_toy_problem_1d()
-    TheilSenRegressor(n_subsamples=101, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
-@raises(ValueError)
 def test_checksubparams_n_subsamples_if_less_samples_than_features():
     random_state = np.random.RandomState(0)
     n_samples, n_features = 10, 20
     X = random_state.normal(size=(n_samples, n_features))
     y = random_state.normal(size=n_samples)
-    TheilSenRegressor(n_subsamples=9, random_state=0).fit(X, y)
+    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
 
 
 def test_subpopulation():
diff --git a/sklearn/manifold/isomap.py b/sklearn/manifold/isomap.py
index 1f6d0ae0dc0b1..f649237448d32 100644
--- a/sklearn/manifold/isomap.py
+++ b/sklearn/manifold/isomap.py
@@ -157,6 +157,8 @@ def fit(self, X, y=None):
             numpy array, precomputed tree, or NearestNeighbors
             object.
 
+        y: Ignored
+
         Returns
         -------
         self : returns an instance of self.
@@ -173,6 +175,8 @@ def fit_transform(self, X, y=None):
             Training vector, where n_samples in the number of samples
             and n_features is the number of features.
 
+        y: Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py
index e8705cff359a6..8151658fe97cc 100644
--- a/sklearn/manifold/locally_linear.py
+++ b/sklearn/manifold/locally_linear.py
@@ -652,6 +652,8 @@ def fit(self, X, y=None):
         X : array-like of shape [n_samples, n_features]
             training set.
 
+        y: Ignored
+
         Returns
         -------
         self : returns an instance of self.
@@ -667,6 +669,8 @@ def fit_transform(self, X, y=None):
         X : array-like of shape [n_samples, n_features]
             training set.
 
+        y: Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/mds.py b/sklearn/manifold/mds.py
index 5f7327ef4dc84..3890c4e40bffb 100644
--- a/sklearn/manifold/mds.py
+++ b/sklearn/manifold/mds.py
@@ -379,6 +379,8 @@ def fit(self, X, y=None, init=None):
             Input data. If ``dissimilarity=='precomputed'``, the input should
             be the dissimilarity matrix.
 
+        y: Ignored
+
         init : ndarray, shape (n_samples,), optional, default: None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
@@ -397,6 +399,8 @@ def fit_transform(self, X, y=None, init=None):
             Input data. If ``dissimilarity=='precomputed'``, the input should
             be the dissimilarity matrix.
 
+        y: Ignored
+
         init : ndarray, shape (n_samples,), optional, default: None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index a330b7da7f856..4ae588d1ae6c0 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -428,6 +428,8 @@ def _get_affinity_matrix(self, X, Y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored
+
         Returns
         -------
         affinity_matrix, shape (n_samples, n_samples)
@@ -474,6 +476,8 @@ def fit(self, X, y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored
+
         Returns
         -------
         self : object
@@ -514,6 +518,8 @@ def fit_transform(self, X, y=None):
             Interpret X as precomputed adjacency graph computed from
             samples.
 
+        Y: Ignored
+
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 163e8340f7b29..f7dba6dbdd78f 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -851,6 +851,8 @@ def fit_transform(self, X, y=None):
             If the metric is 'precomputed' X must be a square distance
             matrix. Otherwise it contains a sample per row.
 
+        y : Ignored
+
         Returns
         -------
         X_new : array, shape (n_samples, n_components)
@@ -870,6 +872,8 @@ def fit(self, X, y=None):
             matrix. Otherwise it contains a sample per row. If the method
             is 'exact', X may be a sparse matrix of type 'csr', 'csc'
             or 'coo'.
+
+        y : Ignored
         """
         self.fit_transform(X)
         return self
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 2311b48ee2eae..907f476355069 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -244,7 +244,9 @@ def test_preserve_trustworthiness_approximately():
                         method=method)
             X_embedded = tsne.fit_transform(X)
             t = trustworthiness(X, X_embedded, n_neighbors=1)
-            assert_greater(t, 0.9)
+            assert_greater(t, 0.9, msg='Trustworthiness={:0.3f} < 0.9 '
+                                       'for method={} and '
+                                       'init={}'.format(t, method, init))
 
 
 def test_optimization_minimizes_kl_divergence():
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 395725c00d7d9..3f169fe1b46de 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -167,7 +167,7 @@ def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
     2
 
     In the multilabel case with binary label indicators:
-    
+
     >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
     0.5
     """
@@ -528,9 +528,9 @@ def matthews_corrcoef(y_true, y_pred, sample_weight=None):
     y_pred = lb.transform(y_pred)
 
     C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
-    t_sum = C.sum(axis=1)
-    p_sum = C.sum(axis=0)
-    n_correct = np.trace(C)
+    t_sum = C.sum(axis=1, dtype=np.float64)
+    p_sum = C.sum(axis=0, dtype=np.float64)
+    n_correct = np.trace(C, dtype=np.float64)
     n_samples = p_sum.sum()
     cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)
     cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 4d6b87f701ea4..c259036807f7f 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -483,6 +483,41 @@ def test_matthews_corrcoef_multiclass():
     assert_almost_equal(mcc, 0.)
 
 
+def test_matthews_corrcoef_overflow():
+    # https://github.com/scikit-learn/scikit-learn/issues/9622
+    rng = np.random.RandomState(20170906)
+
+    def mcc_safe(y_true, y_pred):
+        conf_matrix = confusion_matrix(y_true, y_pred)
+        true_pos = conf_matrix[1, 1]
+        false_pos = conf_matrix[1, 0]
+        false_neg = conf_matrix[0, 1]
+        n_points = len(y_true)
+        pos_rate = (true_pos + false_neg) / n_points
+        activity = (true_pos + false_pos) / n_points
+        mcc_numerator = true_pos / n_points - pos_rate * activity
+        mcc_denominator = activity * pos_rate * (1 - activity) * (1 - pos_rate)
+        return mcc_numerator / np.sqrt(mcc_denominator)
+
+    def random_ys(n_points):    # binary
+        x_true = rng.random_sample(n_points)
+        x_pred = x_true + 0.2 * (rng.random_sample(n_points) - 0.5)
+        y_true = (x_true > 0.5)
+        y_pred = (x_pred > 0.5)
+        return y_true, y_pred
+
+    for n_points in [100, 10000, 1000000]:
+        arr = np.repeat([0., 1.], n_points)  # binary
+        assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
+        arr = np.repeat([0., 1., 2.], n_points)  # multiclass
+        assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
+
+        y_true, y_pred = random_ys(n_points)
+        assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
+        assert_almost_equal(matthews_corrcoef(y_true, y_pred),
+                            mcc_safe(y_true, y_pred))
+
+
 def test_precision_recall_f1_score_multiclass():
     # Test Precision Recall and F1 Score for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py
index c2fd42ab45842..ddc861b4c19f0 100644
--- a/sklearn/mixture/dpgmm.py
+++ b/sklearn/mixture/dpgmm.py
@@ -273,7 +273,6 @@ def score_samples(self, X):
         X = check_array(X)
         if X.ndim == 1:
             X = X[:, np.newaxis]
-        z = np.zeros((X.shape[0], self.n_components))
         sd = digamma(self.gamma_.T[1] + self.gamma_.T[2])
         dgamma1 = digamma(self.gamma_.T[1]) - sd
         dgamma2 = np.zeros(self.n_components)
@@ -844,7 +843,6 @@ def _bound_proportions(self, z):
         return logprior
 
     def _bound_concentration(self):
-        logprior = 0.
         logprior = gammaln(np.sum(self.gamma_)) - gammaln(self.n_components
                                                           * self.alpha_)
         logprior -= np.sum(gammaln(self.gamma_) - gammaln(self.alpha_))
diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py
index 2a2dce1fc18d1..137703adfcad4 100644
--- a/sklearn/mixture/tests/test_gmm.py
+++ b/sklearn/mixture/tests/test_gmm.py
@@ -9,14 +9,14 @@
 import sys
 
 import numpy as np
-from numpy.testing import (assert_array_equal, assert_array_almost_equal,
-                           assert_raises)
+from numpy.testing import assert_array_equal, assert_array_almost_equal
+
 from scipy import stats
 from sklearn import mixture
 from sklearn.datasets.samples_generator import make_spd_matrix
 from sklearn.utils.testing import (assert_true, assert_greater,
                                    assert_raise_message, assert_warns_message,
-                                   ignore_warnings)
+                                   ignore_warnings, assert_raises)
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.externals.six.moves import cStringIO as StringIO
 
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 773f70fb7dba2..798f771534571 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -1000,6 +1000,7 @@ def learning_curve(estimator, X, y, groups=None,
         If None, the random number generator is the RandomState instance used
         by `np.random`. Used when ``shuffle`` == 'True'.
 
+    Returns
     -------
     train_sizes_abs : array, shape = (n_unique_ticks,), dtype int
         Numbers of training examples that has been used to generate the
@@ -1097,7 +1098,7 @@ def _translate_train_sizes(train_sizes, n_max_training_samples):
     n_ticks = train_sizes_abs.shape[0]
     n_min_required_samples = np.min(train_sizes_abs)
     n_max_required_samples = np.max(train_sizes_abs)
-    if np.issubdtype(train_sizes_abs.dtype, np.float):
+    if np.issubdtype(train_sizes_abs.dtype, np.floating):
         if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0:
             raise ValueError("train_sizes has been interpreted as fractions "
                              "of the maximum number of training samples and "
diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py
index f8b9b45640783..5863a0bd738db 100644
--- a/sklearn/neighbors/tests/test_approximate.py
+++ b/sklearn/neighbors/tests/test_approximate.py
@@ -46,7 +46,7 @@ def test_neighbors_accuracy_with_n_candidates():
 
     for i, n_candidates in enumerate(n_candidates_values):
         lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-            n_candidates=n_candidates)
+            n_candidates=n_candidates, random_state=0)
         ignore_warnings(lshf.fit)(X)
         for j in range(n_iter):
             query = X[rng.randint(0, n_samples)].reshape(1, -1)
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index e50a2e6f07445..25fac197c3657 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -6,10 +6,10 @@
 from scipy import sparse as sp
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_equal
-from numpy.testing import assert_raises
 
 from sklearn.neighbors import NearestCentroid
 from sklearn import datasets
+from sklearn.utils.testing import assert_raises
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -57,9 +57,9 @@ def test_classification_toy():
 
 def test_precomputed():
     clf = NearestCentroid(metric='precomputed')
-    with assert_raises(ValueError) as context:
+    with assert_raises(ValueError):
         clf.fit(X, y)
-    assert_equal(ValueError, type(context.exception))
+
 
 def test_iris():
     # Check consistency on dataset iris.
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 66da9dffeb066..54d29651ac776 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -110,8 +110,7 @@ class Pipeline(_BaseComposition):
     # BaseEstimator interface
 
     def __init__(self, steps, memory=None):
-        # shallow copy of steps
-        self.steps = list(steps)
+        self.steps = steps
         self._validate_steps()
         self.memory = memory
 
@@ -184,6 +183,8 @@ def _final_estimator(self):
     # Estimator interface
 
     def _fit(self, X, y=None, **fit_params):
+        # shallow copy of steps - this should really be steps_
+        self.steps = list(self.steps)
         self._validate_steps()
         # Setup the memory
         memory = check_memory(self.memory)
@@ -413,6 +414,7 @@ def transform(self):
         Xt : array-like, shape = [n_samples, n_transformed_features]
         """
         # _final_estimator is None or has transform, otherwise attribute error
+        # XXX: Handling the None case means we can't use if_delegate_has_method
         if self._final_estimator is not None:
             self._final_estimator.transform
         return self._transform
@@ -443,6 +445,7 @@ def inverse_transform(self):
         Xt : array-like, shape = [n_samples, n_features]
         """
         # raise AttributeError if necessary for hasattr behaviour
+        # XXX: Handling the None case means we can't use if_delegate_has_method
         for name, transform in self.steps:
             if transform is not None:
                 transform.inverse_transform
@@ -613,7 +616,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin):
 
     """
     def __init__(self, transformer_list, n_jobs=1, transformer_weights=None):
-        self.transformer_list = list(transformer_list)
+        self.transformer_list = transformer_list
         self.n_jobs = n_jobs
         self.transformer_weights = transformer_weights
         self._validate_transformers()
@@ -704,6 +707,7 @@ def fit(self, X, y=None):
         self : FeatureUnion
             This estimator
         """
+        self.transformer_list = list(self.transformer_list)
         self._validate_transformers()
         transformers = Parallel(n_jobs=self.n_jobs)(
             delayed(_fit_one_transformer)(trans, X, y)
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 7c6642a504ad1..551451a47f5a6 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -88,10 +88,13 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
 
     random_state : int, RandomState instance or None, optional (default=None)
         The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
+        the data for the dual coordinate descent (if ``dual=True``). When
+        ``dual=False`` the underlying implementation of :class:`LinearSVC`
+        is not random and ``random_state`` has no effect on the results. If
+        int, random_state is the seed used by the random number generator; If
+        RandomState instance, random_state is the random number generator; If
+        None, the random number generator is the RandomState instance used by
+        `np.random`.
 
     max_iter : int, (default=1000)
         The maximum number of iterations to be run.
@@ -509,11 +512,11 @@ class SVC(BaseSVC):
            Deprecated *decision_function_shape='ovo' and None*.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
+        The seed of the pseudo random number generator used when shuffling
+        the data for probability estimates. If int, random_state is the
+        seed used by the random number generator; If RandomState instance,
+        random_state is the random number generator; If None, the random
+        number generator is the RandomState instance used by `np.random`.
 
     Attributes
     ----------
@@ -665,11 +668,11 @@ class NuSVC(BaseSVC):
            Deprecated *decision_function_shape='ovo' and None*.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
+        The seed of the pseudo random number generator used when shuffling
+        the data for probability estimates. If int, random_state is the seed
+        used by the random number generator; If RandomState instance,
+        random_state is the random number generator; If None, the random
+        number generator is the RandomState instance used by `np.random`.
 
     Attributes
     ----------
@@ -1019,11 +1022,11 @@ class OneClassSVM(BaseLibSVM):
         Hard limit on iterations within solver, or -1 for no limit.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
+        Ignored.
+
+        .. deprecated:: 0.20
+           ``random_state`` has been deprecated in 0.20 and will be removed in
+           0.22.
 
     Attributes
     ----------
@@ -1080,6 +1083,11 @@ def fit(self, X, y=None, sample_weight=None, **params):
         If X is not a C-ordered contiguous array it is copied.
 
         """
+
+        if self.random_state is not None:
+            warnings.warn("The random_state parameter is deprecated and will"
+                          " be removed in version 0.22.", DeprecationWarning)
+
         super(OneClassSVM, self).fit(X, np.ones(_num_samples(X)),
                                      sample_weight=sample_weight, **params)
         return self
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index 583c413bc5c11..e46dbb92df44a 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -5,7 +5,7 @@
 from sklearn.svm import LinearSVC
 from sklearn.linear_model.logistic import LogisticRegression
 
-from sklearn.utils.testing import assert_true, raises
+from sklearn.utils.testing import assert_true, assert_raises
 from sklearn.utils.testing import assert_raise_message
 
 
@@ -63,13 +63,11 @@ def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
                 (np.asarray(clf.intercept_) != 0).any())
 
 
-@raises(ValueError)
 def test_ill_posed_min_c():
     X = [[0, 0], [0, 0]]
     y = [0, 1]
-    l1_min_c(X, y)
+    assert_raises(ValueError, l1_min_c, X, y)
 
 
-@raises(ValueError)
 def test_unsupported_loss():
-    l1_min_c(dense_X, Y1, 'l1')
+    assert_raises(ValueError, l1_min_c, dense_X, Y1, 'l1')
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 948d5818b9b0e..7ad0f20382657 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -61,19 +61,6 @@ def __init__(self, a=np.array([0])):
         self.a = a.copy()
 
 
-class DeprecatedAttributeEstimator(BaseEstimator):
-    def __init__(self, a=None, b=None):
-        self.a = a
-        if b is not None:
-            DeprecationWarning("b is deprecated and renamed 'a'")
-            self.a = b
-
-    @property
-    @deprecated("Parameter 'b' is deprecated and renamed to 'a'")
-    def b(self):
-        return self._b
-
-
 class Buggy(BaseEstimator):
     " A buggy estimator that does not set its parameters right. "
 
@@ -219,19 +206,6 @@ def test_get_params():
     assert_raises(ValueError, test.set_params, a__a=2)
 
 
-def test_get_params_deprecated():
-    # deprecated attribute should not show up as params
-    est = DeprecatedAttributeEstimator(a=1)
-
-    assert_true('a' in est.get_params())
-    assert_true('a' in est.get_params(deep=True))
-    assert_true('a' in est.get_params(deep=False))
-
-    assert_true('b' not in est.get_params())
-    assert_true('b' not in est.get_params(deep=True))
-    assert_true('b' not in est.get_params(deep=False))
-
-
 def test_is_classifier():
     svc = SVC()
     assert_true(is_classifier(svc))
diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py
index d5d0715a0fb7f..967acb2324f19 100644
--- a/sklearn/tests/test_isotonic.py
+++ b/sklearn/tests/test_isotonic.py
@@ -166,6 +166,30 @@ def test_isotonic_regression_ties_secondary_():
     assert_array_almost_equal(ir.fit_transform(x, y), y_true, 4)
 
 
+def test_isotonic_regression_with_ties_in_differently_sized_groups():
+    """
+    Non-regression test to handle issue 9432:
+    https://github.com/scikit-learn/scikit-learn/issues/9432
+
+    Compare against output in R:
+    > library("isotone")
+    > x <- c(0, 1, 1, 2, 3, 4)
+    > y <- c(0, 0, 1, 0, 0, 1)
+    > res1 <- gpava(x, y, ties="secondary")
+    > res1$x
+
+    `isotone` version: 1.1-0, 2015-07-24
+    R version: R version 3.3.2 (2016-10-31)
+    """
+    x = np.array([0, 1, 1, 2, 3, 4])
+    y = np.array([0, 0, 1, 0, 0, 1])
+    y_true = np.array([0., 0.25, 0.25, 0.25, 0.25, 1.])
+    ir = IsotonicRegression()
+    ir.fit(x, y)
+    assert_array_almost_equal(ir.transform(x), y_true)
+    assert_array_almost_equal(ir.fit_transform(x, y), y_true)
+
+
 def test_isotonic_regression_reversed():
     y = np.array([10, 9, 10, 7, 6, 6.1, 5])
     y_ = IsotonicRegression(increasing=False).fit_transform(
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 1165370885d36..d1d62f80e51a5 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -19,6 +19,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_dict_equal
+from sklearn.utils.testing import assert_no_warnings
 
 from sklearn.base import clone, BaseEstimator
 from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
@@ -187,7 +188,7 @@ def test_pipeline_init():
     assert_raises(ValueError, pipe.set_params, anova__C=0.1)
 
     # Test clone
-    pipe2 = clone(pipe)
+    pipe2 = assert_no_warnings(clone, pipe)
     assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
 
     # Check that apart from estimators, the parameters are the same
@@ -421,6 +422,10 @@ def test_feature_union():
     X_sp_transformed = fs.fit_transform(X_sp, y)
     assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
 
+    # Test clone
+    fs2 = assert_no_warnings(clone, fs)
+    assert_false(fs.transformer_list[0][1] is fs2.transformer_list[0][1])
+
     # test setting parameters
     fs.set_params(select__k=2)
     assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 97eee80ecff71..71ee8fa2bcb61 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -30,7 +30,6 @@
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import raises
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn.utils.validation import check_random_state
@@ -394,11 +393,10 @@ def test_importances():
                        clf2.feature_importances_)
 
 
-@raises(ValueError)
 def test_importances_raises():
     # Check if variable importance before fit raises ValueError.
     clf = DecisionTreeClassifier()
-    clf.feature_importances_
+    assert_raises(ValueError, getattr, clf, 'feature_importances_')
 
 
 def test_importances_gini_equal_mse():
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 4b2665cdd4f77..83e8a48a6625a 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -90,7 +90,7 @@ def safe_mask(X, mask):
         mask
     """
     mask = np.asarray(mask)
-    if np.issubdtype(mask.dtype, np.int):
+    if np.issubdtype(mask.dtype, np.signedinteger):
         return mask
 
     if hasattr(X, "toarray"):
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 81f0d88e3f02b..3e7cb198a9d12 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -76,6 +76,7 @@ def _yield_non_meta_checks(name, estimator):
     yield check_sample_weights_pandas_series
     yield check_sample_weights_list
     yield check_estimators_fit_returns_self
+    yield check_complex_data
 
     # Check that all estimator yield informative messages when
     # trained on empty datasets
@@ -458,6 +459,16 @@ def check_dtype_object(name, estimator_orig):
     assert_raises_regex(TypeError, msg, estimator.fit, X, y)
 
 
+def check_complex_data(name, estimator_orig):
+    # check that estimators raise an exception on providing complex data
+    X = np.random.sample(10) + 1j * np.random.sample(10)
+    X = X.reshape(-1, 1)
+    y = np.random.sample(10) + 1j * np.random.sample(10)
+    estimator = clone(estimator_orig)
+    assert_raises_regex(ValueError, "Complex data not supported",
+                        estimator.fit, X, y)
+
+
 @ignore_warnings
 def check_dict_unchanged(name, estimator_orig):
     # this estimator raises
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 70619673bea3b..e95ceb57497ae 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -421,7 +421,6 @@ def weighted_mode(a, w, axis=0):
     else:
         a = np.asarray(a)
         w = np.asarray(w)
-        axis = axis
 
     if a.shape != w.shape:
         w = np.zeros(a.shape, dtype=w.dtype) + w
diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 9ff79c628a1b8..52c12ce5d5953 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -18,6 +18,9 @@ from cython cimport floating
 
 np.import_array()
 
+ctypedef fused integral:
+    int
+    long long
 
 ctypedef np.float64_t DOUBLE
 
@@ -30,11 +33,11 @@ def csr_row_norms(X):
 
 def _csr_row_norms(np.ndarray[floating, ndim=1, mode="c"] X_data,
                    shape,
-                   np.ndarray[int, ndim=1, mode="c"] X_indices,
-                   np.ndarray[int, ndim=1, mode="c"] X_indptr):
+                   np.ndarray[integral, ndim=1, mode="c"] X_indices,
+                   np.ndarray[integral, ndim=1, mode="c"] X_indptr):
     cdef:
-        unsigned int n_samples = shape[0]
-        unsigned int n_features = shape[1]
+        unsigned long long n_samples = shape[0]
+        unsigned long long n_features = shape[1]
         np.ndarray[DOUBLE, ndim=1, mode="c"] norms
 
         np.npy_intp i, j
@@ -326,17 +329,16 @@ def inplace_csr_row_normalize_l1(X):
 
 def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data,
                                   shape,
-                                  np.ndarray[int, ndim=1] X_indices,
-                                  np.ndarray[int, ndim=1] X_indptr):
-    cdef unsigned int n_samples = shape[0]
-    cdef unsigned int n_features = shape[1]
+                                  np.ndarray[integral, ndim=1] X_indices,
+                                  np.ndarray[integral, ndim=1] X_indptr):
+    cdef unsigned long long n_samples = shape[0]
+    cdef unsigned long long n_features = shape[1]
 
     # the column indices for row i are stored in:
     #    indices[indptr[i]:indices[i+1]]
     # and their corresponding values are stored in:
     #    data[indptr[i]:indptr[i+1]]
-    cdef unsigned int i
-    cdef unsigned int j
+    cdef np.npy_intp i, j
     cdef double sum_
 
     for i in xrange(n_samples):
@@ -361,13 +363,12 @@ def inplace_csr_row_normalize_l2(X):
 
 def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
                                   shape,
-                                  np.ndarray[int, ndim=1] X_indices,
-                                  np.ndarray[int, ndim=1] X_indptr):
-    cdef unsigned int n_samples = shape[0]
-    cdef unsigned int n_features = shape[1]
+                                  np.ndarray[integral, ndim=1] X_indices,
+                                  np.ndarray[integral, ndim=1] X_indptr):
+    cdef integral n_samples = shape[0]
+    cdef integral n_features = shape[1]
 
-    cdef unsigned int i
-    cdef unsigned int j
+    cdef np.npy_intp i, j
     cdef double sum_
 
     for i in xrange(n_samples):
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 4e7f7ea3e98a3..c5b6209cc5728 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -45,9 +45,31 @@
 import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.externals import joblib
+from sklearn.utils import deprecated
 
-from nose.tools import raises
-from nose import with_setup
+additional_names_in_all = []
+try:
+    from nose.tools import raises as _nose_raises
+    deprecation_message = (
+        'sklearn.utils.testing.raises has been deprecated in version 0.20 '
+        'and will be removed in 0.22. Please use '
+        'sklearn.utils.testing.assert_raises instead.')
+    raises = deprecated(deprecation_message)(_nose_raises)
+    additional_names_in_all.append('raises')
+except ImportError:
+    pass
+
+try:
+    from nose.tools import with_setup as _with_setup
+    deprecation_message = (
+        'sklearn.utils.testing.with_setup has been deprecated in version 0.20 '
+        'and will be removed in 0.22.'
+        'If your code relies on with_setup, please use'
+        ' nose.tools.with_setup instead.')
+    with_setup = deprecated(deprecation_message)(_with_setup)
+    additional_names_in_all.append('with_setup')
+except ImportError:
+    pass
 
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_equal
@@ -61,12 +83,13 @@
 from sklearn.utils._unittest_backport import TestCase
 
 __all__ = ["assert_equal", "assert_not_equal", "assert_raises",
-           "assert_raises_regexp", "raises", "with_setup", "assert_true",
+           "assert_raises_regexp", "assert_true",
            "assert_false", "assert_almost_equal", "assert_array_equal",
            "assert_array_almost_equal", "assert_array_less",
            "assert_less", "assert_less_equal",
            "assert_greater", "assert_greater_equal",
            "assert_approx_equal", "SkipTest"]
+__all__.extend(additional_names_in_all)
 
 _dummy = TestCase('__init__')
 assert_equal = _dummy.assertEqual
@@ -745,10 +768,6 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         _delete_folder(self.temp_folder)
 
 
-with_network = with_setup(check_skip_network)
-with_travis = with_setup(check_skip_travis)
-
-
 class _named_check(object):
     """Wraps a check to show a useful description
 
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 86d604ef33f66..f53b814c70084 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -206,10 +206,19 @@ def test_row_norms():
                                   precision)
         assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)
 
-        Xcsr = sparse.csr_matrix(X, dtype=dtype)
-        assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
-                                  precision)
-        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr), precision)
+        for csr_index_dtype in [np.int32, np.int64]:
+            Xcsr = sparse.csr_matrix(X, dtype=dtype)
+            # csr_matrix will use int32 indices by default,
+            # up-casting those to int64 when necessary
+            if csr_index_dtype is np.int64:
+                Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype)
+                Xcsr.indices = Xcsr.indices.astype(csr_index_dtype)
+            assert Xcsr.indices.dtype == csr_index_dtype
+            assert Xcsr.indptr.dtype == csr_index_dtype
+            assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
+                                      precision)
+            assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr),
+                                      precision)
 
 
 def test_randomized_svd_low_rank_with_noise():
diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py
index fd09267ea7b0a..f2b35e7459833 100644
--- a/sklearn/utils/tests/test_sparsefuncs.py
+++ b/sklearn/utils/tests/test_sparsefuncs.py
@@ -478,8 +478,16 @@ def test_inplace_normalize():
         for dtype in (np.float64, np.float32):
             X = rs.randn(10, 5).astype(dtype)
             X_csr = sp.csr_matrix(X)
-            inplace_csr_row_normalize(X_csr)
-            assert_equal(X_csr.dtype, dtype)
-            if inplace_csr_row_normalize is inplace_csr_row_normalize_l2:
-                X_csr.data **= 2
-            assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones)
+            for index_dtype in [np.int32, np.int64]:
+                # csr_matrix will use int32 indices by default,
+                # up-casting those to int64 when necessary
+                if index_dtype is np.int64:
+                    X_csr.indptr = X_csr.indptr.astype(index_dtype)
+                    X_csr.indices = X_csr.indices.astype(index_dtype)
+                assert X_csr.indices.dtype == index_dtype
+                assert X_csr.indptr.dtype == index_dtype
+                inplace_csr_row_normalize(X_csr)
+                assert_equal(X_csr.dtype, dtype)
+                if inplace_csr_row_normalize is inplace_csr_row_normalize_l2:
+                    X_csr.data **= 2
+                assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones)
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 6bebad884d835..37a0eb859f565 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -437,6 +437,45 @@ def test_check_array_min_samples_and_features_messages():
     assert_array_equal(y, y_checked)
 
 
+def test_check_array_complex_data_error():
+    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # list of lists
+    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # tuple of tuples
+    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # list of np arrays
+    X = [np.array([1 + 2j, 3 + 4j, 5 + 7j]),
+         np.array([2 + 3j, 4 + 5j, 6 + 7j])]
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # tuple of np arrays
+    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]),
+         np.array([2 + 3j, 4 + 5j, 6 + 7j]))
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # dataframe
+    X = MockDataFrame(
+        np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+    # sparse matrix
+    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
+    assert_raises_regex(
+        ValueError, "Complex data not supported", check_array, X)
+
+
 def test_has_fit_parameter():
     assert_false(has_fit_parameter(KNeighborsClassifier, "sample_weight"))
     assert_true(has_fit_parameter(RandomForestRegressor, "sample_weight"))
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 5847b540d7b6c..080c30fcf9b2c 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import scipy.sparse as sp
+from numpy.core.numeric import ComplexWarning
 
 from ..externals import six
 from ..utils.fixes import signature
@@ -307,6 +308,13 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
     return spmatrix
 
 
+def _ensure_no_complex_data(array):
+    if hasattr(array, 'dtype') and array.dtype is not None \
+            and hasattr(array.dtype, 'kind') and array.dtype.kind == "c":
+        raise ValueError("Complex data not supported\n"
+                         "{}\n".format(array))
+
+
 def check_array(array, accept_sparse=False, dtype="numeric", order=None,
                 copy=False, force_all_finite=True, ensure_2d=True,
                 allow_nd=False, ensure_min_samples=1, ensure_min_features=1,
@@ -427,10 +435,28 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None,
     context = " by %s" % estimator_name if estimator is not None else ""
 
     if sp.issparse(array):
+        _ensure_no_complex_data(array)
         array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                       force_all_finite)
     else:
-        array = np.array(array, dtype=dtype, order=order, copy=copy)
+        # If np.array(..) gives ComplexWarning, then we convert the warning
+        # to an error. This is needed because specifying a non complex
+        # dtype to the function converts complex to real dtype,
+        # thereby passing the test made in the lines following the scope
+        # of warnings context manager.
+        with warnings.catch_warnings():
+            try:
+                warnings.simplefilter('error', ComplexWarning)
+                array = np.array(array, dtype=dtype, order=order, copy=copy)
+            except ComplexWarning:
+                raise ValueError("Complex data not supported\n"
+                                 "{}\n".format(array))
+
+        # It is possible that the np.array(..) gave no warning. This happens
+        # when no dtype conversion happend, for example dtype = None. The
+        # result is that np.array(..) produces an array of complex dtype
+        # and we need to catch and raise exception for such cases.
+        _ensure_no_complex_data(array)
 
         if ensure_2d:
             if array.ndim == 1: