diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index d3b28e3..d7892cb 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -5,8 +5,8 @@ properties shared by scikit-learn estimators. The specific requirements are documented per function. """ -from __future__ import absolute_import, division, print_function, \ - unicode_literals +from __future__ import (absolute_import, division, print_function, + unicode_literals) import matplotlib.pyplot as plt import numpy as np @@ -95,9 +95,11 @@ def plot_pca_component_variance(clf, title='PCA Component Explained Variances', def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', + dimensions=[0, 1], biplot=False, feature_labels=None, ax=None, figsize=None, cmap='Spectral', - title_fontsize="large", text_fontsize="medium"): + title_fontsize="large", text_fontsize="medium", + label_dots=False): """Plots the 2-dimensional projection of PCA on a given dataset. Args: @@ -163,32 +165,39 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', fig, ax = plt.subplots(1, 1, figsize=figsize) ax.set_title(title, fontsize=title_fontsize) - classes = np.unique(np.array(y)) + + # Get unique classes from y, preserving order of class occurence in y + _, class_indexes = np.unique(np.array(y), return_index=True) + classes = np.array(y)[np.sort(class_indexes)] colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(classes))) for label, color in zip(classes, colors): - ax.scatter(transformed_X[y == label, 0], transformed_X[y == label, 1], + ax.scatter(transformed_X[y == label, dimensions[0]], transformed_X[y == label, dimensions[1]], alpha=0.8, lw=2, label=label, color=color) + if label_dots: + for dot in transformed_X[y == label][:, dimensions]: + ax.text(*dot, label) + if biplot: - xs = transformed_X[:, 0] - ys = transformed_X[:, 1] - vectors = np.transpose(clf.components_[:2, :]) + xs = transformed_X[:, dimensions[0]] + ys = transformed_X[:, dimensions[1]] + vectors = np.transpose(clf.components_[dimensions, :]) vectors_scaled = vectors * [xs.max(), ys.max()] for i in range(vectors.shape[0]): - ax.annotate("", xy=(vectors_scaled[i, 0], vectors_scaled[i, 1]), + ax.annotate("", xy=(vectors_scaled[i, dimensions[0]], vectors_scaled[i, dimensions[1]]), xycoords='data', xytext=(0, 0), textcoords='data', arrowprops={'arrowstyle': '-|>', 'ec': 'r'}) - ax.text(vectors_scaled[i, 0] * 1.05, vectors_scaled[i, 1] * 1.05, + ax.text(vectors_scaled[i, dimensions[0]] * 1.05, vectors_scaled[i, dimensions[1]] * 1.05, feature_labels[i] if feature_labels else "Variable" + str(i), color='b', fontsize=text_fontsize) ax.legend(loc='best', shadow=False, scatterpoints=1, fontsize=text_fontsize) - ax.set_xlabel('First Principal Component', fontsize=text_fontsize) - ax.set_ylabel('Second Principal Component', fontsize=text_fontsize) + ax.set_xlabel(f'Principal Component {dimensions[0]+1}', fontsize=text_fontsize) + ax.set_ylabel(f'Principal Component {dimensions[1]+1}', fontsize=text_fontsize) ax.tick_params(labelsize=text_fontsize) return ax diff --git a/scikitplot/tests/test_decomposition.py b/scikitplot/tests/test_decomposition.py index f7e555b..3c3e7af 100644 --- a/scikitplot/tests/test_decomposition.py +++ b/scikitplot/tests/test_decomposition.py @@ -9,6 +9,7 @@ from scikitplot.decomposition import plot_pca_component_variance from scikitplot.decomposition import plot_pca_2d_projection +import scikitplot class TestPlotPCAComponentVariance(unittest.TestCase): @@ -81,3 +82,29 @@ def test_biplot(self): clf.fit(self.X) ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True, feature_labels=load_data().feature_names) + + def test_label_order(self): + ''' + Plot labels should be in the same order as the classes in the provided y-array + ''' + np.random.seed(0) + clf = PCA() + clf.fit(self.X) + + # define y such that the first entry is 1 + y = np.copy(self.y) + y[0] = 1 # load_iris is be default orderer (i.e.: 0 0 0 ... 1 1 1 ... 2 2 2) + + # test with len(y) == X.shape[0] with multiple rows belonging to the same class + ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral') + legend_labels = ax.get_legend_handles_labels()[1] + self.assertListEqual(['1', '0', '2'], legend_labels) + + # test with len(y) == #classes with each row belonging to an individual class + y = list(range(len(y))) + np.random.shuffle(y) + ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral') + legend_labels = ax.get_legend_handles_labels()[1] + self.assertListEqual([str(v) for v in y], legend_labels) + +