diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py
index d3b28e3..d7892cb 100644
--- a/scikitplot/decomposition.py
+++ b/scikitplot/decomposition.py
@@ -5,8 +5,8 @@
 properties shared by scikit-learn estimators. The specific requirements are
 documented per function.
 """
-from __future__ import absolute_import, division, print_function, \
-    unicode_literals
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -95,9 +95,11 @@ def plot_pca_component_variance(clf, title='PCA Component Explained Variances',
 
 
 def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection',
+                           dimensions=[0, 1],
                            biplot=False, feature_labels=None,
                            ax=None, figsize=None, cmap='Spectral',
-                           title_fontsize="large", text_fontsize="medium"):
+                           title_fontsize="large", text_fontsize="medium",
+                           label_dots=False):
     """Plots the 2-dimensional projection of PCA on a given dataset.
 
     Args:
@@ -163,32 +165,39 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection',
         fig, ax = plt.subplots(1, 1, figsize=figsize)
 
     ax.set_title(title, fontsize=title_fontsize)
-    classes = np.unique(np.array(y))
+
+    # Get unique classes from y, preserving order of class occurence in y
+    _, class_indexes = np.unique(np.array(y), return_index=True)
+    classes = np.array(y)[np.sort(class_indexes)]
 
     colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(classes)))
 
     for label, color in zip(classes, colors):
-        ax.scatter(transformed_X[y == label, 0], transformed_X[y == label, 1],
+        ax.scatter(transformed_X[y == label, dimensions[0]], transformed_X[y == label, dimensions[1]],
                    alpha=0.8, lw=2, label=label, color=color)
 
+        if label_dots:
+            for dot in transformed_X[y == label][:, dimensions]:
+                ax.text(*dot, label)
+
     if biplot:
-        xs = transformed_X[:, 0]
-        ys = transformed_X[:, 1]
-        vectors = np.transpose(clf.components_[:2, :])
+        xs = transformed_X[:, dimensions[0]]
+        ys = transformed_X[:, dimensions[1]]
+        vectors = np.transpose(clf.components_[dimensions, :])
         vectors_scaled = vectors * [xs.max(), ys.max()]
         for i in range(vectors.shape[0]):
-            ax.annotate("", xy=(vectors_scaled[i, 0], vectors_scaled[i, 1]),
+            ax.annotate("", xy=(vectors_scaled[i, dimensions[0]], vectors_scaled[i, dimensions[1]]),
                         xycoords='data', xytext=(0, 0), textcoords='data',
                         arrowprops={'arrowstyle': '-|>', 'ec': 'r'})
 
-            ax.text(vectors_scaled[i, 0] * 1.05, vectors_scaled[i, 1] * 1.05,
+            ax.text(vectors_scaled[i, dimensions[0]] * 1.05, vectors_scaled[i, dimensions[1]] * 1.05,
                     feature_labels[i] if feature_labels else "Variable" + str(i),
                     color='b', fontsize=text_fontsize)
 
     ax.legend(loc='best', shadow=False, scatterpoints=1,
               fontsize=text_fontsize)
-    ax.set_xlabel('First Principal Component', fontsize=text_fontsize)
-    ax.set_ylabel('Second Principal Component', fontsize=text_fontsize)
+    ax.set_xlabel(f'Principal Component {dimensions[0]+1}', fontsize=text_fontsize)
+    ax.set_ylabel(f'Principal Component {dimensions[1]+1}', fontsize=text_fontsize)
     ax.tick_params(labelsize=text_fontsize)
 
     return ax
diff --git a/scikitplot/tests/test_decomposition.py b/scikitplot/tests/test_decomposition.py
index f7e555b..3c3e7af 100644
--- a/scikitplot/tests/test_decomposition.py
+++ b/scikitplot/tests/test_decomposition.py
@@ -9,6 +9,7 @@
 
 from scikitplot.decomposition import plot_pca_component_variance
 from scikitplot.decomposition import plot_pca_2d_projection
+import scikitplot
 
 
 class TestPlotPCAComponentVariance(unittest.TestCase):
@@ -81,3 +82,29 @@ def test_biplot(self):
         clf.fit(self.X)
         ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True,
                                     feature_labels=load_data().feature_names)
+
+    def test_label_order(self):
+        '''
+        Plot labels should be in the same order as the classes in the provided y-array
+        '''
+        np.random.seed(0)
+        clf = PCA()
+        clf.fit(self.X)
+
+        # define y such that the first entry is 1
+        y = np.copy(self.y)
+        y[0] = 1  # load_iris is be default orderer (i.e.: 0 0 0 ... 1 1 1 ... 2 2 2)
+
+        # test with len(y) == X.shape[0] with multiple rows belonging to the same class
+        ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral')
+        legend_labels = ax.get_legend_handles_labels()[1]
+        self.assertListEqual(['1', '0', '2'], legend_labels)
+
+        # test with len(y) == #classes with each row belonging to an individual class
+        y = list(range(len(y)))
+        np.random.shuffle(y)
+        ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral')
+        legend_labels = ax.get_legend_handles_labels()[1]
+        self.assertListEqual([str(v) for v in y], legend_labels)
+
+