automl-edu · mlindauer · Apr 28, 2021 · Apr 25, 2021 · Apr 25, 2021
diff --git a/slides.py b/slides.py
@@ -60,13 +60,13 @@ def full_slides():
 
 
 def compile_all():
-    files = (file.with_suffix(".tex") for file, _, _ in iter_all())
+    files = (file for file, _, _ in iter_all(ext="tex"))
     with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
         pool.map(pdflatex, files)
 
 
 def compile_git():
-    files = (file.with_suffix(".tex") for file, _, _ in iter_all() if check_git(file.with_suffix(".tex")))
+    files = (file for file, _, _ in iter_all(ext="tex") if check_git(file.with_suffix(".tex")))
     with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
         pool.map(pdflatex, files)
 
@@ -80,7 +80,7 @@ def fits_identifier(week, slide):
             return week == week_id
         return week == week_id and slide == slide_id
 
-    files = (file.with_suffix(".tex") for file, week, slide in iter_all() if fits_identifier(week, slide))
+    files = (file for file, week, slide in iter_all(ext="tex") if fits_identifier(week, slide))
     with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
         pool.map(pdflatex, files)
 
@@ -92,9 +92,9 @@ def cleanup():
 
 
 # Helper functions
-def iter_all():
+def iter_all(ext="pdf"):
     folder_pattern = re.compile("w(\d{2})_")
-    slide_pattern = re.compile("t(\d{2,3})_[\w_]+\.pdf")
+    slide_pattern = re.compile("t(\d{2,3})_[\w_]+\." + ext)
     for week_folder in GIT_REPO.iterdir():
         week_number = folder_pattern.match(week_folder.name)
         if week_number is None:  # folder does not match mattern

diff --git a/slides/w02_evaluation.pdf b/slides/w02_evaluation.pdf
diff --git a/w02_evaluation/Nested_Resampling.png → w02_evaluation/images/Nested_Resampling.png b/w02_evaluation/Nested_Resampling.png → w02_evaluation/images/Nested_Resampling.png
diff --git a/w02_evaluation/bias-variance.pdf → w02_evaluation/images/bias-variance.pdf b/w02_evaluation/bias-variance.pdf → w02_evaluation/images/bias-variance.pdf
diff --git a/w02_evaluation/crit-diff-bd.png → w02_evaluation/images/crit-diff-bd.png b/w02_evaluation/crit-diff-bd.png → w02_evaluation/images/crit-diff-bd.png
diff --git a/w02_evaluation/crit-diff-nemenyi.png → w02_evaluation/images/crit-diff-nemenyi.png b/w02_evaluation/crit-diff-nemenyi.png → w02_evaluation/images/crit-diff-nemenyi.png
diff --git a/w02_evaluation/crossvalidation.png → w02_evaluation/images/crossvalidation.png b/w02_evaluation/crossvalidation.png → w02_evaluation/images/crossvalidation.png
diff --git a/w02_evaluation/datasaurus.png → w02_evaluation/images/datasaurus.png b/w02_evaluation/datasaurus.png → w02_evaluation/images/datasaurus.png
diff --git a/w02_evaluation/dist-tuning1.pdf → w02_evaluation/images/dist-tuning1.pdf b/w02_evaluation/dist-tuning1.pdf → w02_evaluation/images/dist-tuning1.pdf
diff --git a/w02_evaluation/dist-tuning2.pdf → w02_evaluation/images/dist-tuning2.pdf b/w02_evaluation/dist-tuning2.pdf → w02_evaluation/images/dist-tuning2.pdf
diff --git a/w02_evaluation/example-nested-resampling.pdf → ...tion/images/example-nested-resampling.pdf b/w02_evaluation/example-nested-resampling.pdf → ...tion/images/example-nested-resampling.pdf
diff --git a/w02_evaluation/fit.png → w02_evaluation/images/fit.png b/w02_evaluation/fit.png → w02_evaluation/images/fit.png
diff --git a/w02_evaluation/learning-curve-ideal.pdf → ...valuation/images/learning-curve-ideal.pdf b/w02_evaluation/learning-curve-ideal.pdf → ...valuation/images/learning-curve-ideal.pdf
diff --git a/...evaluation/learning-curve-overfitting.pdf → ...ion/images/learning-curve-overfitting.pdf b/...evaluation/learning-curve-overfitting.pdf → ...ion/images/learning-curve-overfitting.pdf
diff --git a/...valuation/learning-curve-underfitting.pdf → ...on/images/learning-curve-underfitting.pdf b/...valuation/learning-curve-underfitting.pdf → ...on/images/learning-curve-underfitting.pdf
diff --git a/w02_evaluation/learning-curve.pdf → w02_evaluation/images/learning-curve.pdf b/w02_evaluation/learning-curve.pdf → w02_evaluation/images/learning-curve.pdf
diff --git a/w02_evaluation/mcnemar_1.png → w02_evaluation/images/mcnemar_1.png b/w02_evaluation/mcnemar_1.png → w02_evaluation/images/mcnemar_1.png
diff --git a/w02_evaluation/multiple-boxplots.png → w02_evaluation/images/multiple-boxplots.png b/w02_evaluation/multiple-boxplots.png → w02_evaluation/images/multiple-boxplots.png
diff --git a/w02_evaluation/multiple-ranks.png → w02_evaluation/images/multiple-ranks.png b/w02_evaluation/multiple-ranks.png → w02_evaluation/images/multiple-ranks.png
diff --git a/w02_evaluation/nested-resampling-example.pdf → ...tion/images/nested-resampling-example.pdf b/w02_evaluation/nested-resampling-example.pdf → ...tion/images/nested-resampling-example.pdf
diff --git a/w02_evaluation/overfitting.jpg → w02_evaluation/images/overfitting1.jpg b/w02_evaluation/overfitting.jpg → w02_evaluation/images/overfitting1.jpg
diff --git a/w02_evaluation/overfitting.pdf → w02_evaluation/images/overfitting2.pdf b/w02_evaluation/overfitting.pdf → w02_evaluation/images/overfitting2.pdf
diff --git a/w02_evaluation/poly-test.pdf → w02_evaluation/images/poly-test.pdf b/w02_evaluation/poly-test.pdf → w02_evaluation/images/poly-test.pdf
diff --git a/w02_evaluation/poly-train.pdf → w02_evaluation/images/poly-train.pdf b/w02_evaluation/poly-train.pdf → w02_evaluation/images/poly-train.pdf
diff --git a/w02_evaluation/poly.pdf → w02_evaluation/images/poly.pdf b/w02_evaluation/poly.pdf → w02_evaluation/images/poly.pdf
diff --git a/w02_evaluation/polyt.pdf → w02_evaluation/images/polyt.pdf b/w02_evaluation/polyt.pdf → w02_evaluation/images/polyt.pdf
diff --git a/w02_evaluation/tests_overview.png → w02_evaluation/images/tests_overview.png b/w02_evaluation/tests_overview.png → w02_evaluation/images/tests_overview.png
diff --git a/w02_evaluation/t01_big_picture.pdf b/w02_evaluation/t01_big_picture.pdf
diff --git a/w02_evaluation/t01_big_picture.tex b/w02_evaluation/t01_big_picture.tex
@@ -55,7 +55,7 @@
             \end{itemize}
     \end{itemize}
     \begin{center}
-        \includegraphics[width=.5\textwidth]{overfitting}
+        \includegraphics[width=.5\textwidth]{images/overfitting1}
     \end{center}
     Usually model performance gets better with more data/higher model complexity
     and then worse, but see \lit{\href{https://arxiv.org/pdf/1912.02292.pdf}{Nakkiran et al. 2019}}%\url{https://openai.com/blog/deep-double-descent/}.

diff --git a/w02_evaluation/t02_evaluation.pdf b/w02_evaluation/t02_evaluation.pdf
diff --git a/w02_evaluation/t02_evaluation.tex b/w02_evaluation/t02_evaluation.tex
@@ -99,22 +99,24 @@
     with measurement error $\epsilon$.
 
     \begin{center}
-    \includegraphics[width=.7\textwidth]{poly}
+    \includegraphics[width=.7\textwidth]{images/poly}
     \end{center}
 
-    Assume data generating process unknown. Approximate with $d$th-degree polynomial:
+    Assume data generating process unknown.
+    Approximate with $d$th-degree polynomial:
     \[ f(\mathbf{x} | \mathbf{\theta}) = \theta_0 + \theta_1 x + \cdots + \theta_d x^d = \sum_{j = 0}^{d} \theta_j x^j \]
 
     \framebreak
 
     How should we choose $d$?
 
     \begin{center}
-    \includegraphics[width=.5\textwidth]{poly-train}
+    \includegraphics[width=.5\textwidth]{images/poly-train}
     \end{center}
 
-    d=1: MSE = 0.036 -- clear underfitting, d=3: MSE = 0.003 -- ok?, d=9: MSE =
-    0.001 -- clear overfitting
+    d=1: MSE = 0.036 -- clear underfitting,
+    d=3: MSE = 0.003 -- ok?,
+    d=9: MSE = 0.001 -- clear overfitting
 
     Simply using the training error seems to be a bad idea.
 
@@ -123,30 +125,31 @@
     \begin{frame}[c,allowframebreaks]{Outer Loss Example: Polynomial Regression}
 
     \begin{center}
-    \includegraphics[width=.7\textwidth]{polyt}
+    \includegraphics[width=.7\textwidth]{images/polyt}
     \end{center}
 
     \framebreak
 
     How should we choose $d$?
 
     \begin{center}
-    \includegraphics[width=.5\textwidth]{poly-test}
+    \includegraphics[width=.5\textwidth]{images/poly-test}
     \end{center}
 
-    d=1: MSE = 0.038 -- clear underfitting, d=3: MSE = 0.002 -- ok?, d=9: MSE =
-    0.046 -- clear overfitting
+    d=1: MSE = 0.038 -- clear underfitting,
+    d=3: MSE = 0.002 -- ok?,
+    d=9: MSE = 0.046 -- clear overfitting
 
     \framebreak
 
     \begin{center}
-    \includegraphics[width=.9\textwidth]{bias-variance}
+    \includegraphics[width=.9\textwidth]{images/bias-variance}
     \end{center}
 
     \end{frame}
 
     \begin{frame}[c]{General Trade-Off Between Error and Complexity}
-    \includegraphics[width=\textwidth]{overfitting}
+    \includegraphics[width=\textwidth]{images/overfitting2}
     \end{frame}
 
     \begin{frame}[c]{Resampling}
@@ -172,7 +175,7 @@
 
     \begin{center}
     % FIGURE SOURCE: https://docs.google.com/presentation/d/1sKtnj5nIQrcOGU7rTisMsppUGOk7UX2gbjKhtQmTX7g/edit?usp=sharing
-    \includegraphics[height=.5\textheight]{crossvalidation.png}
+    \includegraphics[height=.5\textheight]{images/crossvalidation}
     \end{center}
     10-fold cross-validation is common.
     \end{frame}
@@ -257,15 +260,15 @@
     \end{itemize}
 
     \begin{center}
-    \includegraphics[height=.35\textheight]{learning-curve}
+    \includegraphics[height=.35\textheight]{images/learning-curve}
     \end{center}
 
     \framebreak
 
     Ideal learning curve:
 
     \begin{center}
-    \includegraphics[height=.7\textheight]{learning-curve-ideal}
+    \includegraphics[height=.7\textheight]{images/learning-curve-ideal}
     \end{center}
 
     \framebreak
@@ -281,7 +284,7 @@
     \end{itemize}
 
     \begin{center}
-    \includegraphics[width=.7\textwidth]{learning-curve-underfitting}
+    \includegraphics[width=.7\textwidth]{images/learning-curve-underfitting}
     \end{center}
 
     \framebreak
@@ -294,7 +297,7 @@
     \end{itemize}
 
     \begin{center}
-    \includegraphics[width=.7\textwidth]{learning-curve-overfitting}
+    \includegraphics[width=.7\textwidth]{images/learning-curve-overfitting}
     \end{center}
 
     \end{enumerate}

diff --git a/w02_evaluation/t03_benchmarking.pdf b/w02_evaluation/t03_benchmarking.pdf
diff --git a/w02_evaluation/t03_benchmarking.tex b/w02_evaluation/t03_benchmarking.tex
@@ -92,7 +92,7 @@
     \end{itemize}
 
     \begin{center}
-    \includegraphics[height=.5\textheight]{tests_overview.png}
+    \includegraphics[height=.5\textheight]{images/tests_overview}
     \end{center}
 
     \end{frame}
@@ -110,7 +110,7 @@
 
     \medskip
     \begin{minipage}{0.25\textwidth}
-    \includegraphics[width=\textwidth]{mcnemar_1.png}
+    \includegraphics[width=\textwidth]{images/mcnemar_1}
     \end{minipage}
     \begin{minipage}{0.74\textwidth}
     \begin{itemize}
@@ -133,7 +133,7 @@
     Even if the models have the \textbf{same} errors (indicating equal performance), cells B and C may be different because the models may misclassify different instances.
     \end{minipage}
     \begin{minipage}[c]{0.25\linewidth}
-        \includegraphics[width=\textwidth]{mcnemar_1.png}
+        \includegraphics[width=\textwidth]{images/mcnemar_1}
     \end{minipage}
 
     \medskip
@@ -290,7 +290,7 @@
     \end{itemize}
 
     \begin{center}
-        \includegraphics[height=.5\textheight]{crit-diff-nemenyi}
+        \includegraphics[height=.5\textheight]{images/crit-diff-nemenyi}
     \end{center}
 
     \framebreak
@@ -316,7 +316,7 @@
             significantly different from the baseline
     \end{itemize}
     \begin{center}
-        \includegraphics[height=.6\textheight]{crit-diff-bd}
+        \includegraphics[height=.6\textheight]{images/crit-diff-bd}
     \end{center}
 
     \end{frame}
@@ -329,14 +329,14 @@
     \bigskip
     Boxplots
     \begin{center}
-        \includegraphics[height=.65\textheight]{multiple-boxplots}
+        \includegraphics[height=.65\textheight]{images/multiple-boxplots}
     \end{center}
 
     \framebreak
 
     Rank plots
     \begin{center}
-        \includegraphics[height=.7\textheight]{multiple-ranks}
+        \includegraphics[height=.7\textheight]{images/multiple-ranks}
     \end{center}
 
     \end{frame}

diff --git a/w02_evaluation/t04_nested_evaluation.pdf b/w02_evaluation/t04_nested_evaluation.pdf
diff --git a/w02_evaluation/t04_nested_evaluation.tex b/w02_evaluation/t04_nested_evaluation.tex
@@ -33,10 +33,8 @@
 	\maketitle
 
     \begin{frame}[c]{Motivation}
-    Selecting the best model from a set of potential candidates (e.g.\ different
-    classes of learners, different hyperparameter settings, different feature
-    sets, different preprocessing\ldots) is an important part of most  machine
-    learning problems. However,
+    Selecting the best model from a set of potential candidates (e.g.\ different classes of learners, different hyperparameter settings, different feature sets, different preprocessing\ldots) is an important part of most  machine learning problems.
+    However,
 
     \begin{itemize}
         \item cannot evaluate selected learner on the same
@@ -66,17 +64,16 @@
     \framebreak
 
     \begin{center}
-        \includegraphics[height=.5\textheight]{example-nested-resampling}
+        \includegraphics[height=.5\textheight]{images/example-nested-resampling}
     \end{center}
 
     \begin{itemize}
-    \item shown is best ``tuning error'' (i.e.\ performance of
-        model with fixed $\conf$ in cross-validation) after $k$ tuning iterations
+    \item shown is best ``tuning error'' (i.e.\ performance of model with fixed $\conf$ in cross-validation) after $k$ tuning iterations
     \item evaluated for different data set sizes
     \end{itemize}
 
     \begin{center}
-        \includegraphics[height=.6\textheight]{dist-tuning1}
+        \includegraphics[height=.6\textheight]{images/dist-tuning1}
     \end{center}
 
     \begin{itemize}
@@ -88,7 +85,7 @@
     \framebreak
 
     \begin{center}
-        \includegraphics[height=.55\textheight]{dist-tuning2}
+        \includegraphics[height=.55\textheight]{images/dist-tuning2}
     \end{center}
 
     \begin{itemize}
@@ -108,7 +105,7 @@
       preprocessing) evaluated \textbf{on training data}
     \item test set only touched once, so no way of ``cheating''
     \item test dataset is only used once \emph{after} model is completely
-        trained (including e.g.\ deciding hyper-parameter values)
+        trained (including e.g.\ deciding hyperparameter values)
     \item performance estimates from test set now \textbf{unbiased estimates} of the true performance
 
     \framebreak
@@ -138,7 +135,7 @@
     resampling
 
     \begin{center}
-        \includegraphics[height=0.6\textheight]{Nested_Resampling.png}
+        \includegraphics[height=0.6\textheight]{images/Nested_Resampling}
     \end{center}
 
     \framebreak
@@ -153,7 +150,7 @@
     \end{footnotesize}
 
     \begin{center}
-        \includegraphics[height=0.55\textheight]{Nested_Resampling.png}
+        \includegraphics[height=0.55\textheight]{images/Nested_Resampling}
     \end{center}
 
     \framebreak
@@ -168,7 +165,7 @@
     \end{footnotesize}
 
     \begin{center}
-        \includegraphics[height=0.55\textheight]{Nested_Resampling.png}
+        \includegraphics[height=0.55\textheight]{images/Nested_Resampling}
     \end{center}
 
     \framebreak
@@ -179,7 +176,7 @@
     \end{footnotesize}
 
     \begin{center}
-        \includegraphics[height=0.6\textheight]{Nested_Resampling.png}
+        \includegraphics[height=0.6\textheight]{images/Nested_Resampling}
     \end{center}
 
     \end{frame}
@@ -191,7 +188,7 @@
     resampling:
 
     \begin{center}
-        \includegraphics[width=0.8\textwidth]{nested-resampling-example}
+        \includegraphics[width=0.8\textwidth]{images/nested-resampling-example}
     \end{center}