Skip to content

Commit 11fa2aa

Browse files
committed
Add w02_t06 on evaluation over time
1 parent 81ccf49 commit 11fa2aa

File tree

2 files changed

+208
-0
lines changed

2 files changed

+208
-0
lines changed

w02_evaluation/t06_over_time.pdf

446 KB
Binary file not shown.

w02_evaluation/t06_over_time.tex

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
2+
\input{../latex_main/main.tex}
3+
4+
5+
6+
\title[AutoML: Risks]{AutoML: Evaluation} % week title
7+
\subtitle{Visualizing Evaluation over Time} % video title
8+
\author[Marius Lindauer]{Bernd Bischl \and Frank Hutter \and Lars Kotthoff\newline \and \underline{Marius Lindauer}}
9+
\institute{}
10+
\date{}
11+
12+
\newcommand\reffootnote[1]{%
13+
\begingroup
14+
\renewcommand\thefootnote{}\footnote{
15+
\tiny #1
16+
\vspace*{1em}}%
17+
\addtocounter{footnote}{-1}%
18+
\endgroup
19+
}
20+
21+
% \AtBeginSection[] % Do nothing for \section*
22+
% {
23+
% \begin{frame}{Outline}
24+
% \bigskip
25+
% \vfill
26+
% \tableofcontents[currentsection]
27+
% \end{frame}
28+
% }
29+
30+
\begin{document}
31+
32+
\maketitle
33+
34+
\begin{frame}[c]{Motivation}
35+
\begin{itemize}
36+
\item If we define AutoML as an optimization process, the incumbent solution\\ (i.e., the best found configuration so far) gradually improves over time
37+
\medskip
38+
\pause
39+
\item We don't know when users will stop the AutoML process
40+
\begin{itemize}
41+
\item Running over the coffee break (15min)
42+
\item Running during a meeting (1h)
43+
\item Running over night (16h)
44+
\item Running over the weekend (48+h)
45+
\end{itemize}
46+
\pause
47+
\medskip
48+
\item[$\leadsto$] Anytime performance of AutoML is important
49+
\begin{itemize}
50+
\item i.e., the AutoML tool should return the best possible solution at each time point
51+
\end{itemize}
52+
\end{itemize}
53+
\end{frame}
54+
55+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
56+
57+
\begin{frame}{Observing Performance over Time}
58+
(\textit{Empty slides for drawing something live in the video.})
59+
\end{frame}
60+
61+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
62+
63+
\begin{frame}[c]{Repeated Experiments}
64+
65+
\centering
66+
\includegraphics[width=0.6\textwidth]{plots/evaluations/4_smac4hpo.png}
67+
68+
\pause
69+
$\leadsto$ Don't linearly interpolate between points!
70+
71+
\end{frame}
72+
73+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
74+
75+
\begin{frame}[c]{Step Functions}
76+
77+
\centering
78+
\includegraphics[width=0.6\textwidth]{plots/evaluations/5_smac4hpo_step.png}
79+
80+
\pause
81+
$\leadsto$ Do we care about number of function evaluations?
82+
83+
\end{frame}
84+
85+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86+
87+
\begin{frame}[c]{CPU Time}
88+
89+
\centering
90+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/5_smac4hpo_step.png}
91+
92+
\pause
93+
$\leadsto$ We might loose information in the beginning.
94+
95+
\end{frame}
96+
97+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
98+
99+
\begin{frame}[c]{x-log scale}
100+
101+
\centering
102+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/6_1_smac4hpo_step_log_x.png}
103+
104+
\pause
105+
$\leadsto$ Small differences on y are hard to spot.
106+
107+
\end{frame}
108+
109+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
110+
111+
\begin{frame}[c]{y-log scale}
112+
113+
\centering
114+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/6_2_smac4hpo_step_log_y.png}
115+
116+
\pause
117+
$\leadsto$ Log on both?
118+
119+
\end{frame}
120+
121+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
122+
123+
\begin{frame}[c]{x-y-log scale}
124+
125+
\centering
126+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/6_3_smac4hpo_step_log_x_y.png}
127+
128+
\pause
129+
$\leadsto$ Can we summarize the individual curves?
130+
131+
\end{frame}
132+
133+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
134+
135+
\begin{frame}[c]{Mean $\pm$ Standard Deviation: $\mu \pm \sigma$}
136+
137+
\centering
138+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/8_1_smac4hpo_mean_stdev.png}
139+
140+
\pause
141+
$\leadsto$ Mean $\pm$ standard deviation works only if uncertainty is symmetric.
142+
143+
\end{frame}
144+
145+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
146+
147+
\begin{frame}[c]{Mean $\pm$ Standard Error: $\mu \pm \frac{\sigma}{\sqrt{n}}$}
148+
149+
\centering
150+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/8_2_smac4hpo_mean_stderr.png}
151+
152+
\pause
153+
$\leadsto$ Confidence of the mean estimate!
154+
155+
\end{frame}
156+
157+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
158+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
159+
160+
\begin{frame}[c]{Median + 25/75th Percentile}
161+
162+
\centering
163+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/8_3_smac4hpo_median_percentile.png}
164+
165+
\pause
166+
$\leadsto$ Works also for asymmetric uncertainties.
167+
168+
\end{frame}
169+
170+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
171+
172+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
173+
174+
\begin{frame}[c]{Comparing 2 AutoML Optimizers}
175+
176+
\centering
177+
\includegraphics[width=0.6\textwidth]{plots/cpu_time/9_2_compare_mean_stderr.png}
178+
179+
\end{frame}
180+
181+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
182+
183+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
184+
185+
\begin{frame}[c]{Summary}
186+
187+
\begin{enumerate}
188+
\item Plotting anytime performance is helpful
189+
\medskip
190+
\item On real benchmarks often better to plot CPU time instead of function evaluations
191+
\medskip
192+
\item Use step functions!
193+
\medskip
194+
\item Consider log-scales on x and/or y
195+
\medskip
196+
\item Consider different ways for plotting the uncertainty of cost observations
197+
\end{enumerate}
198+
\bigskip
199+
\includegraphics[width=0.3\textwidth]{plots/cpu_time/9_1_compare_mean_stdev.png}
200+
\includegraphics[width=0.3\textwidth]{plots/cpu_time/9_2_compare_mean_stderr.png}
201+
\includegraphics[width=0.3\textwidth]{plots/cpu_time/9_3_compare_median_percentile.png}
202+
203+
\end{frame}
204+
205+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
206+
207+
208+
\end{document}

0 commit comments

Comments
 (0)