Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

W08 multicrit #11

Merged
merged 55 commits into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
384381e
general multicrit stuff
ja-thomas Mar 23, 2020
cb9aa40
multicrit intro
ja-thomas Apr 6, 2020
96dda4e
iterate multicrit eas
ja-thomas Apr 6, 2020
6717098
cleanup
ja-thomas Apr 6, 2020
a9f70e9
...
ja-thomas Apr 6, 2020
4728c18
...
ja-thomas Apr 14, 2020
a935a9a
merge
ja-thomas Apr 14, 2020
8d421ee
iterate
ja-thomas Apr 14, 2020
3463d77
...
ja-thomas Apr 15, 2020
f6207a1
...
ja-thomas Apr 16, 2020
cf79e25
add missing code
ja-thomas Apr 17, 2020
285fcf6
comments and tiny changes
jakob-r Apr 17, 2020
9b5823e
notation
jakob-r Apr 17, 2020
a471b45
parego
ja-thomas Apr 17, 2020
4984551
multicrit bo
ja-thomas Apr 20, 2020
20a2f3d
tiny fix notation
jakob-r Apr 24, 2020
316c0b5
benchmark submit
jakob-r Apr 28, 2020
c2ce859
...
ja-thomas May 1, 2020
5c82d0c
Merge branch 'w08_multicrit' of github.com:automl/AutoMLLecture into …
ja-thomas May 1, 2020
d20897d
Merge branch 'master' into w08_multicrit
ja-thomas May 18, 2020
4f27522
iter
ja-thomas May 19, 2020
77e3cc8
iter
ja-thomas May 19, 2020
1e32b01
nsga2 adapt
jakob-r Apr 30, 2020
7d3bc63
w08 slides and example code
jakob-r May 27, 2020
858c9c8
w08 practical
jakob-r Jun 2, 2020
a95735d
imageclassif example
jakob-r Jun 2, 2020
b4199d4
Merge branch 'master' into w08_multicrit
ja-thomas Jun 3, 2020
70fc6b9
iterate
ja-thomas Jun 3, 2020
291c28e
finalize t01
ja-thomas Jun 4, 2020
e96035b
finalize t01 (hopefully)
ja-thomas Jun 4, 2020
8ea0788
ROC style, adult dataset
jakob-r Jun 5, 2020
cb783d4
...
jakob-r Jun 5, 2020
fc169bc
update figure
ja-thomas Jun 7, 2020
91f8abd
iter 01
berndbischl Jun 7, 2020
2f45baa
mosmafs slides
jakob-r Jun 8, 2020
afb6c17
parego plot new (not done)
ja-thomas Jun 8, 2020
bab71c8
Merge branch 'w08_multicrit' of github.com:automl/AutoMLLecture into …
ja-thomas Jun 8, 2020
7b9833b
mosmafs sonar example
jakob-r Jun 8, 2020
8aeefff
parego uncorrelated plot
jakob-r Jun 8, 2020
2fc4601
uncomment mspot
jakob-r Jun 8, 2020
aa1ede2
literature, using litw macro
jakob-r Jun 8, 2020
3549c80
proofread w08 t04
jakob-r Jun 8, 2020
3567243
iterate 03
ja-thomas Jun 8, 2020
72a8968
Merge branch 'w08_multicrit' of github.com:automl/AutoMLLecture into …
ja-thomas Jun 8, 2020
08bb4ec
...
ja-thomas Jun 8, 2020
ab5fcf8
image
ja-thomas Jun 8, 2020
3d6459c
fix 1000 things in t02
berndbischl Jun 8, 2020
6f835ce
fix merge conflict
berndbischl Jun 8, 2020
a197580
fix t02 1000 things
berndbischl Jun 8, 2020
f04ad48
...
ja-thomas Jun 8, 2020
67debf4
Merge branch 'w08_multicrit' of github.com:automl/AutoMLLecture into …
ja-thomas Jun 8, 2020
34e14b8
fix t02 1000 things
berndbischl Jun 8, 2020
ece53ae
Merge branch 'w08_multicrit' of github.com:automl/AutoMLLecture into …
berndbischl Jun 8, 2020
b7e3b71
fix penalty description
ja-thomas Jun 9, 2020
be08652
finish w08 multicrit
berndbischl Jun 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
*.vrb
/tmp/
/w06_hpo_bo/
.Rproj.user
115 changes: 115 additions & 0 deletions w08_hpo_multicrit/code/NSGA2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Vizualization of NSGAII

library(smoof)
library(ggplot2)
library(ecr)
library(gridExtra)
library(grid)

# viz in objective space 2 crit
plotObjectiveSpace2Crit = function(smoof.fun) {
des = generateRandomDesign(n = 10000L, par.set = getParamSet(smoof.fun))
des.eval = apply(des, 1, smoof.fun)
des.eval.df = data.frame(t(des.eval))
names(des.eval.df) = c("c1", "c2")

p = ggplot() + geom_point(data = des.eval.df, aes(x = c1, y = c2), size = 0.7, color = "grey", alpha = 0.2)

p = p + theme_bw()
return(p)
}

# Example function
ps = makeNumericParamSet("x", lower = c(0.1, 0), upper = c(1, 5))

fn = makeMultiObjectiveFunction(name = "Min-Ex",
fn = function(x) c(x[1], (1 + x[2]) / (x[1] * 60)),
par.set = ps
)

lower = getLower(ps)
upper = getUpper(ps)

MU = 25L
LAMBDA = 20L
mutator = setup(mutPolynomial, eta = 25, p = 0.2, lower = lower, upper = upper)
recombinator = setup(recSBX, eta = 15, p = 0.7, lower = lower, upper = upper)

set.seed(1)
res = ecr(fitness.fun = fn, lower = lower, upper = upper, mu = MU, lambda = LAMBDA, representation = "float", survival.strategy = "plus",
parent.selector = selSimple, mutator = mutator,
recombinator = recombinator, survival.selector = selNondom,
log.pop = TRUE, terminators = list(stopOnIters(max.iter = 10L)))

p = plotObjectiveSpace2Crit(fn)
populations = getPopulations(res$log)

for (i in c(1, 3, 5, 10)) {
popdf = data.frame(t(populations[[i]]$fitness))
pl = p + geom_point(data = popdf, aes(x = X1, y = X2), colour = "blue")
pl = pl + ggtitle(paste("Iteration", i))
assign(paste("p", i, sep = ""), value = pl)
}

g = grid.arrange(p1, p3, p10, ncol = 3)

ggsave(grid.draw(g), file = "images/NSGA2_steps.png", width = 8, height = 4)


# non-dominated sorting

pop = populations[[1]]$fitness
sorted = doNondominatedSorting(pop)
rank_max = max(sorted$ranks)
ranks = 1:rank_max

popdf = data.frame(t(pop))
popdf$Front = factor(sorted$ranks, ordered = TRUE, levels = ranks)


pl = p + geom_point(data = popdf[popdf$Front %in% ranks, ], aes(x = X1, y = X2, colour = Front))
pl = pl + geom_line(data = popdf[popdf$Front %in% ranks, ], aes(x = X1, y = X2, colour = Front), lty = 2)
ggsave(pl, file = "images/NSGA2_NDS.png", width = 4, height = 3)


# Crowd Sort - Example 1
F3 = popdf[which(popdf$Front == rank_max), ]
cd = computeCrowdingDistance(t(as.matrix(F3[, c("X1", "X2")])))

pl = p + geom_point(data = F3, aes(x = X1, y = X2), alpha = 0.3)
pl = pl + geom_line(data = F3, aes(x = X1, y = X2), lty = 2, alpha = 0.3)


pl1 = pl + geom_point(data = F3[order(cd, decreasing = FALSE)[1:5], ], aes(x = X1, y = X2), size = 3, shape = 17)
pl1 = pl1 + theme(legend.position = "none")
pl2 = pl + geom_point(data = F3[order(cd, decreasing = TRUE)[1:5], ], aes(x = X1, y = X2), shape = 17, size = 3)
pl2 = pl2 + theme(legend.position = "none")
pl2 = pl2

g = grid.arrange(pl1, pl2, ncol = 2)

ggsave(grid.draw(g), file = "images/NSGA2_CS1.png", width = 6, height = 3)


cdo = order(cd, decreasing = TRUE)[c(5, length(cd)-1)]
F3.oX1 = F3[order(F3$X1), c("X1", "X2")]

cuboids = F3[cdo, c("X1", "X2")]
idx = which(F3.oX1$X1 %in% cuboids$X1)
cuboids = cbind(cuboids, F3.oX1[idx + 1, ])
cuboids = cbind(cuboids, F3.oX1[idx - 1, ])
names(cuboids) = c("x", "y", "xmin", "ymin", "xmax", "ymax")
cuboids$point = c("i", "j")

F3 = F3[!is.na(F3$X1), ]
pl1 = p + geom_point(data = F3, aes(x = X1, y = X2))
pl1 = pl1 + theme(legend.position = "none")
pl1 = pl1 + geom_line(data = F3, aes(x = X1, y = X2), lty = 2)
pl1 = pl1 + geom_rect(data = cuboids, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, colour = point, fill = point), alpha = 0.2)
pl1 = pl1 + geom_point(data = cuboids, aes(x = x, y = y, colour = point, fill = point), size = 3)

ggsave(pl1, file = "images/NSGA2_CS2.png", width = 3, height = 3)




91 changes: 91 additions & 0 deletions w08_hpo_multicrit/code/benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
library(mlr)
library(mlrMBO)
library(batchtools)
library(checkmate)

lrn = makeLearner("classif.svm", predict.type = "response")
tsk = spam.task
n_evals = 100

make_mbo_multi_control = function(multi_method) {
mbo_ctrl = makeMBOControl(n.objectives = 2)
mbo_ctrl = setMBOControlTermination(mbo_ctrl, max.evals = n_evals)
mbo_ctrl = setMBOControlMultiObj(mbo_ctrl, method = multi_method)
if (multi_method == "dib") {
mbo_ctrl = setMBOControlInfill(mbo_ctrl, crit = crit.dib1)
}
makeTuneMultiCritControlMBO(n.objectives = 2, mbo.control = mbo_ctrl)
}

tune_ctrls = list(
grid = makeTuneMultiCritControlGrid(resolution = ceiling(sqrt(n_evals))),
random = makeTuneMultiCritControlRandom(maxit = n_evals),
nsga2 = makeTuneMultiCritControlNSGA2(budget = 12 * ceiling(n_evals / 12), popsize = 12, max.generations = ceiling(n_evals / 12)),
mbo_parego = make_mbo_multi_control("parego"),
mbo_dib = make_mbo_multi_control("dib")
)

ps = makeParamSet(
makeNumericParam("cost", lower = -3, upper = 3, trafo = function(x) 10^x),
makeNumericParam("gamma", lower = -3, upper = 3, trafo = function(x) 10^x)
)

rsmp_tuning = makeResampleDesc("CV", iters = 5)
rsmp_outer = makeResampleDesc("CV", iters = 10)

if (!fs::file_exists("benchmark_res.rds")) {
reg_dir = if (fs::file_exists("~/nobackup/")) "~/nobackup/w08_multicrit_benchmark" else "benchmark_bt"
unlink(reg_dir, recursive = TRUE)
reg = makeExperimentRegistry(file.dir = reg_dir, seed = 1, packages = c("mlr", "mlrMBO", "parallelMap", "checkmate"))

make_instance_gen = function(rsmp_outer) {
force(rsmp_outer)
instance_gen = function(data, job) {
fold = job$repl
rsmpl_outer_fixed = makeResampleInstance(rsmp_outer, task = data$tsk)
list(train = rsmpl_outer_fixed$train.inds[[fold]], test = rsmpl_outer_fixed$test.inds[[fold]], fold = fold)
}
}

for (n in names(tune_ctrls)) {
addProblem(name = n, data = list(lrn = lrn, tsk = tsk, rsmp_tuning = rsmp_tuning, ps = ps, ctrl = tune_ctrls[[n]]), fun = make_instance_gen(rsmp_outer), seed = 1)
}

algo_mlr = function(job, data, instance) {
lrn = data$lrn
tsk = data$tsk
tsk_train = subsetTask(tsk, instance$train)
tsk_test = subsetTask(tsk, instance$test)
rsmp_tuning = data$rsmp_tuning
ps = data$ps
ctrl = data$ctrl
#parallelStartMulticore(cpus = rsmp_tuning$iters %??% 4, level = "mlr.resample") #does not work :(
tune_res = tuneParamsMultiCrit(learner = lrn, task = tsk_train, resampling = rsmp_tuning, measures = list(tpr, fpr), par.set = ps, control = ctrl)
#parallelStop()
y_outer = lapply(tune_res$x, function(x) {
lrn2 = setHyperPars(lrn, par.vals = x)
mod = train(lrn2, tsk_train)
pred = predict(mod, task = tsk_test)
performance(pred, measures = list(tpr, fpr))
})
y_outer = do.call(rbind, y_outer)
return(list(tune_res = tune_res, outer_res = y_outer))
}

addAlgorithm("mlr", fun = algo_mlr)

addExperiments(repls = rsmp_outer$iters)
submitJobs(resources = list(ncpus = 1))
waitForJobs()
res = reduceResultsList(findDone(), fun = function(res, job) {
list(
x = res$tune_res$x,
inner_y = res$tune_res$y,
outer_y = res$outer_res
)
})
saveRDS(res, "benchmark_res.rds")
}



40 changes: 40 additions & 0 deletions w08_hpo_multicrit/code/census_vis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
library(mlr3)
library(mlr3oml)
#library(mlr3viz)
task = tsk("oml", data_id = 1590)
#autoplot(task, type = "pairs", cardinality_threshold = 16)
#autoplot(task, type = "duo", cardinality_threshold = 16)
#mlr3viz:::autoplot.TaskClassif
#

# remotes::install_github("ggobi/ggally")
library(GGally)
library(ggplot2)
theme_set(theme_bw())
pdata = mlr3viz::fortify(task)

g = ggally_colbar(pdata, aes(x = race, y = class), size = 3, label_format = scales::label_percent(accuracy = 1))
g = g + facet_grid(sex~.)
g = g + theme(axis.text.x = element_text(angle = 45, hjust = 1))
g = g + theme(legend.position = "none")
if (interactive()) print(g)
ggsave("../images/dataset_adult_race.png", g, height = 6, width = 3)

g = ggally_colbar(pdata, aes(x = occupation, y = class), size = 3, label_format = scales::label_percent(accuracy = 1))
g = g + facet_grid(sex~.)
g = g + theme(axis.text.x = element_text(angle = 45, hjust = 1))
g = g + theme(legend.position = "none")
if (interactive()) print(g)
ggsave("../images/dataset_adult_education.png", g, height = 6, width = 5)

g = ggplot(pdata, aes(x=age, group=class, fill=class)) + geom_histogram(binwidth=1, color='black', size = 0.1)
if (interactive()) print(g)
ggsave("../images/dataset_adult_age_sex.png", g, height = 2.5, width = 5.5)

if (FALSE) {

g = ggally_colbar(pdata, aes(x = sex, y = class))
if (interactive()) print(g)
ggsave("../images/dataset_adult_sex.png", g, height = 5, width = 7)
}

130 changes: 130 additions & 0 deletions w08_hpo_multicrit/code/cim_code.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
library(ggplot2)
library(gridExtra)

df = readRDS("expedia_example.rds")

p = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 1.5)
p = p + theme_bw()
p = p + ylim(c(- 5.5, -2))
p = p + xlab("Price per night") + ylab("Rating")
p

p1 = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 1.5)
p1 = p1 + geom_point(data = df[16:17, ], aes(x = mean_price, y = - mean_rating), size = 2, colour = c("green", "red"))
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(- 5.5, -2))
p1 = p1 + xlab("Price per night") + ylab("Rating")

p2 = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = df[c(10, 16), ], aes(x = mean_price, y = - mean_rating), size = 2, colour = "orange")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5.5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

grid.arrange(p1, p2, ncol = 2)

p1

p2

df$mean_rating = - df$mean_rating
P = df[order(df$mean_rating, df$mean_price,decreasing=FALSE),]
P = P[which(!duplicated(cummin(P$mean_price))),]

p2 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = P, aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p2 = p2 + geom_line(data = P, aes(x = mean_price, y = mean_rating), colour = "orange")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

p2

fun = function(x) (x - 1)^2
p = ggplot(data.frame(x = c(0, 3)), aes(x)) + stat_function(fun = fun)
p = p + geom_point(x = 1, y = 0, color = "green", size = 3)
p = p + theme_bw() + ylab("c") + xlab(expression(lambda))
p

fun1 = function(x) (x - 1)^2
fun2 = function(x) 3 * (x - 2)^2
p = ggplot(data.frame(x = c(0, 3)), aes(x)) + stat_function(fun = fun1) + stat_function(fun = fun2, color = "blue")
p = p + theme_bw()
p

x = seq(0, 3, length.out = 1000)
xpareto = seq(1, 2, length.out = 1000)

p2 = ggplot() + geom_point(data = data.frame(f1 = fun1(x), f2 = fun2(x)), aes(x = f1, y = f2), size = 0.05) + geom_point(data = data.frame(f1 = fun1(xpareto), f2 = fun2(xpareto)), aes(x = f1, y = f2), color = "green", size = 0.05) + theme_bw()
p2

df$apriori = df$mean_price + 50 * df$mean_rating

p1 = ggplot()
p1 = p1 + geom_point(data = df, aes(x = apriori, y = 0), size = 2)
p1 = p1 + geom_point(data = df[which.min(df$apriori), ], aes(x = apriori, y = 0), colour = "green", size = 2)
p1 = p1 + theme_bw()
p1 = p1 + xlab("Weighted sum")
p1 = p1 + theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())

p2 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = df[which.min(df$apriori), ], aes(x = mean_price, y = mean_rating), size = 2, colour = "green")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

grid.arrange(p1, p2, ncol = 2)

p1 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p1 = p1 + geom_point(data = df[df$mean_rating == -5, ], aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(-5, -2))
p1 = p1 + ggtitle("1) max. rating")
p1 = p1 + xlab("Price per night") + ylab("Rating")

p2 = p1 + geom_point(data = df[(df$mean_rating == - 5.0 & df$mean_price < 150), ], colour = "green", size = 2)
p2 = p2 + ggtitle("2) min. price")

grid.arrange(p1, p2, ncol = 2)

P = df[order(df$mean_rating, df$mean_price,decreasing=FALSE),]
P = P[which(!duplicated(cummin(P$mean_price))),]

p1 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p1 = p1 + geom_point(data = P, aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p1 = p1 + geom_line(data = P, aes(x = mean_price, y = mean_rating), colour = "orange")
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(-5, -2))
p1 = p1 + xlab("Price per night") + ylab("Rating")


p2 = p1 + geom_point(data = P[P$mean_rating == -4.5, ], aes(x = mean_price, y = mean_rating), colour = "green", size = 2)

g = grid.arrange(p1, p2, ncol = 2)
ggsave("../images/expedia-11-1.pdf", g, height = 2, width = 5)

x = seq(-1, 4, length.out = 1000)
lin = 3 * 0.4 - 2 * x

p2 = ggplot() + geom_point(data = data.frame(f1 = fun1(x), f2 = fun2(x)), aes(x = f1, y = f2), size = 0.7)
p2 = p2 + geom_line(aes(x = x, y = lin)) + ylim(c(-3, 25))
p2 = p2 + geom_point(aes(x = 0.36, y = 0.48), colour = "green", size = 3)
p2 = p2 + theme_bw()
p2

f1 = function(x) 0.01 * sum(x^2) - 2
f2 = function(x) 0.01 * sum(c(0.1, 0.3) * (x - c(-10, 20))^2)

x1 = x2 = seq(-10, 20, length.out = 100)
grid = expand.grid(x1 = x1, x2 = x2)
grid$y1 = apply(grid[, 1:2], 1, f1)
grid$y2 = apply(grid[, 1:2], 1, f2)

melt = reshape2::melt(grid, id.vars = c("x1", "x2"))

p = ggplot(data = melt) + geom_raster(aes(x = x1, y = x2, fill = value))
p = p + geom_contour(aes(x = x1, y = x2, z = value, colour = variable), bins = 15)
p = p + ylim(c(-20, 40)) + xlim(c(-20, 40)) + theme_bw()
p
Loading