automl · skylogic004 · Sep 17, 2021 · Sep 17, 2021 · Sep 20, 2021 · Sep 21, 2021
diff --git a/doc_src/manual.rst b/doc_src/manual.rst
@@ -6,11 +6,13 @@ Manual
 
 Quick Start
 -----------
-To run the examples, just download the `data <https://github.com/automl/fanova/blob/master/fanova/example/online_lda.tar.gz>`_ and start the python console.
+To run the examples, download the data from the `github repository <https://github.com/automl/fanova/tree/master/examples/example_data/online_lda>`_ and start the python console.
 We can then import fANOVA and start it by typing
 
     >>> from fanova import fANOVA
     >>> import csv
+    >>> import os
+    >>> import numpy as np
     >>> path = os.path.dirname(os.path.realpath(__file__))
     >>> X = np.loadtxt(path + '/example_data/online_lda/online_lda_features.csv', delimiter=",")
     >>> Y = np.loadtxt(path + '/example_data/online_lda/online_lda_responses.csv', delimiter=",")
@@ -21,12 +23,18 @@ This creates a new fANOVA object and fits the Random Forest on the specified dat
 To compute now the marginal of the first parameter type:
 
     >>> f.quantify_importance((0, ))
-        0.075414122571199116
+        {(0,): {'individual importance': 0.07567390839783641,
+        'total importance': 0.07567390839783641,
+        'individual std': 0.020053764191788233,
+        'total std': 0.020053764191788233}}
 
 fANOVA also allows to specify parameters by their names.
 
-    >>> f.quantify_importance(("Col0", ))
-    	0.075414122571199116
+    >>> f.quantify_importance(("x_000", ))
+        {('x_000',): {'individual importance': 0.07567390839783641,
+        'total importance': 0.07567390839783641,
+        'individual std': 0.020053764191788233,
+        'total std': 0.020053764191788233}}
 
 
 Advanced
@@ -48,9 +56,9 @@ You can also specify the number of trees in the random forest as well as the min
 More functions
 --------------
 
-    * **f.get_most_important_pairwise_marginals(n)**
+    * **f.get_most_important_pairwise_marginals(n=N)**
 
-    Returns the **n** most important pairwise marginals
+    Returns the **N** most important pairwise marginals
 
     * **f.get_most_important_pairwise_marginals(params)**
 
@@ -91,7 +99,7 @@ The same can been done for pairwise marginals
 
     >>> vis.plot_pairwise_marginal([0,1])
 
-.. image:: ../examples/example_data/online_lda/figure2.png
+.. image:: ../examples/example_data/online_lda/pairwise.png
 
 
 If you are just interested in the N most important pairwise marginals you can plot them through:
@@ -120,7 +128,13 @@ You will also find an extra directory in your specified plot directory called 'i
 How to load a CSV-file
 --------------------------
 
-import numpy as np
+    >>> import numpy as np
+    >>> X = np.loadtxt('your_file.csv', delimiter=",")
 
-data = np.loadtxt('your_file.csv', delimiter=",")
+Alternatively, pandas may be used:
 
+    >>> import pandas as pd
+    >>> df = pd.read_csv('your_file.csv')
+    >>> X = df[your_param_columns]
+    >>> Y = df[your_score_column]
+    >>> f = fANOVA(X, Y, config_space=cs)
diff --git a/fanova/visualizer.py b/fanova/visualizer.py
@@ -299,7 +299,7 @@ def generate_marginal(self, p, resolution=100):
             std = np.sqrt(v)
             return mean, std
 
-    def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None):
+    def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None, ax=None):
         """
         Creates a plot of marginal of a selected parameter
 
@@ -310,14 +310,31 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
         resolution: int
             Number of samples to generate from the parameter range as values to predict
         log_scale: boolean
-            If log scale is required or not. If no value is given, it is deduced from the ConfigSpace provided
+            Whether to plot using log scale or not. If no value is given, it is deduced from the ConfigSpace provided and from values.
         show: boolean
             whether to call plt.show() to show plot directly as interactive matplotlib-plot
         incumbents: List[Configuration]
             list of ConfigSpace.Configurations that are marked as incumbents
+        ax: AxesSubplot, optional
+            A matplotlib AxesSubplot in which to place the plot or, if None, a new figure will be created.
+
+        Returns
+        -------
+        ax: AxesSubplot
+            A matplotlib AxesSubplot containing the plot. To save it to disk use `ax.get_figure().savefig('filename.png')`.
         """
         param, param_name, param_idx = self._get_parameter(param)
 
+        # get figure AxesSubplot to plot on (or make a new one)
+        if (ax is None):
+            # create empty figure to work with
+            fig, ax = plt.subplots(1)
+        else:
+            fig = ax.get_figure()
+
+            # don't show the figure when user has provided their own figure AxesSubplot
+            show = False
+
         # check if categorical
         if isinstance(param, NumericalHyperparameter):
             # PREPROCESS
@@ -328,33 +345,39 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             lower_curve = mean - std
             upper_curve = mean + std
 
+
+            # auto-detect whether to do log-scale
             if log_scale is None:
-                log_scale = param.log or (np.diff(grid).std() > 0.000001)
+                # take log value from ConfigSpace
+                log_scale = param.log
+
+                # auto-detect if log-scale might be better
+                if not log_scale and (np.diff(grid).std() > 0.000001):
+                    self.logger.info("Plotting this parameter, %s, in log-scale because auto-detected that it might be better." % param_name)
+                    log_scale = True
 
             # PLOT
             if log_scale:
-                if np.diff(grid).std() > 0.000001:
-                    self.logger.info("It might be better to plot this parameter '%s' in log-scale.", param_name)
-                plt.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label)
+                ax.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label)
             else:
-                plt.plot(grid, mean, 'b', label='predicted %s' % self._y_label)
-            plt.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std')
+                ax.plot(grid, mean, 'b', label='predicted %s' % self._y_label)
+            ax.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std')
 
             if incumbents is not None:
                 if not isinstance(incumbents, list):
                     incumbents = [incumbents]
                 values = [inc[param_name] for inc in incumbents if param_name in inc and inc[param_name] is not None]
                 indices = [(np.abs(np.asarray(grid) - val)).argmin() for val in values]
                 if len(indices) > 0:
-                    plt.scatter(list([grid[idx] for idx in indices]),
+                    ax.scatter(list([grid[idx] for idx in indices]),
                                 list([mean[idx] for idx in indices]),
                                 label='incumbent', c='black', marker='.', zorder=999)
 
-            plt.xlabel(param_name)
-            plt.ylabel(self._y_label)
-            plt.grid(True)
-            plt.legend()
-            plt.tight_layout()
+            ax.set_xlabel(param_name)
+            ax.set_ylabel(self._y_label)
+            ax.grid(True)
+            ax.legend()
+            fig.tight_layout()
 
         else:
             # PREPROCESS
@@ -376,8 +399,8 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             max_y = mean[0]
 
             # PLOT
-            b = plt.boxplot([[x] for x in mean])
-            plt.xticks(indices, labels)
+            b = ax.boxplot([[x] for x in mean])
+            ax.set_xticks(indices, labels)
             # blow up boxes
             for box, std_ in zip(b["boxes"], std):
                 y = box.get_ydata()
@@ -388,16 +411,17 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
                 min_y = min(min_y, y[0] - std_)
                 max_y = max(max_y, y[2] + std_)
 
-            plt.ylim([min_y, max_y])
+            ax.set_ylim([min_y, max_y])
 
-            plt.ylabel(self._y_label)
-            plt.xlabel(param_name)
-            plt.tight_layout()
+            ax.set_ylabel(self._y_label)
+            ax.set_xlabel(param_name)
+            fig.tight_layout()
 
         if show:
             plt.show()
-        else:
-            return plt
+
+        # Always return the matplotlib plot (to allow users to save it etc)
+        return ax
 
     def create_most_important_pairwise_marginal_plots(self, params=None, n=20, three_d=True, resolution=20):
         """