Skip to content

Commit

Permalink
Merge pull request #11 from automl/w08_multicrit
Browse files Browse the repository at this point in the history
W08 multicrit
  • Loading branch information
berndbischl authored Jun 9, 2020
2 parents ae2a933 + be08652 commit 9f59ee2
Show file tree
Hide file tree
Showing 71 changed files with 2,964 additions and 36 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
*.vrb
/tmp/
/w06_hpo_bo/
.Rproj.user
115 changes: 115 additions & 0 deletions w08_hpo_multicrit/code/NSGA2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Vizualization of NSGAII

library(smoof)
library(ggplot2)
library(ecr)
library(gridExtra)
library(grid)

# viz in objective space 2 crit
plotObjectiveSpace2Crit = function(smoof.fun) {
des = generateRandomDesign(n = 10000L, par.set = getParamSet(smoof.fun))
des.eval = apply(des, 1, smoof.fun)
des.eval.df = data.frame(t(des.eval))
names(des.eval.df) = c("c1", "c2")

p = ggplot() + geom_point(data = des.eval.df, aes(x = c1, y = c2), size = 0.7, color = "grey", alpha = 0.2)

p = p + theme_bw()
return(p)
}

# Example function
ps = makeNumericParamSet("x", lower = c(0.1, 0), upper = c(1, 5))

fn = makeMultiObjectiveFunction(name = "Min-Ex",
fn = function(x) c(x[1], (1 + x[2]) / (x[1] * 60)),
par.set = ps
)

lower = getLower(ps)
upper = getUpper(ps)

MU = 25L
LAMBDA = 20L
mutator = setup(mutPolynomial, eta = 25, p = 0.2, lower = lower, upper = upper)
recombinator = setup(recSBX, eta = 15, p = 0.7, lower = lower, upper = upper)

set.seed(1)
res = ecr(fitness.fun = fn, lower = lower, upper = upper, mu = MU, lambda = LAMBDA, representation = "float", survival.strategy = "plus",
parent.selector = selSimple, mutator = mutator,
recombinator = recombinator, survival.selector = selNondom,
log.pop = TRUE, terminators = list(stopOnIters(max.iter = 10L)))

p = plotObjectiveSpace2Crit(fn)
populations = getPopulations(res$log)

for (i in c(1, 3, 5, 10)) {
popdf = data.frame(t(populations[[i]]$fitness))
pl = p + geom_point(data = popdf, aes(x = X1, y = X2), colour = "blue")
pl = pl + ggtitle(paste("Iteration", i))
assign(paste("p", i, sep = ""), value = pl)
}

g = grid.arrange(p1, p3, p10, ncol = 3)

ggsave(grid.draw(g), file = "images/NSGA2_steps.png", width = 8, height = 4)


# non-dominated sorting

pop = populations[[1]]$fitness
sorted = doNondominatedSorting(pop)
rank_max = max(sorted$ranks)
ranks = 1:rank_max

popdf = data.frame(t(pop))
popdf$Front = factor(sorted$ranks, ordered = TRUE, levels = ranks)


pl = p + geom_point(data = popdf[popdf$Front %in% ranks, ], aes(x = X1, y = X2, colour = Front))
pl = pl + geom_line(data = popdf[popdf$Front %in% ranks, ], aes(x = X1, y = X2, colour = Front), lty = 2)
ggsave(pl, file = "images/NSGA2_NDS.png", width = 4, height = 3)


# Crowd Sort - Example 1
F3 = popdf[which(popdf$Front == rank_max), ]
cd = computeCrowdingDistance(t(as.matrix(F3[, c("X1", "X2")])))

pl = p + geom_point(data = F3, aes(x = X1, y = X2), alpha = 0.3)
pl = pl + geom_line(data = F3, aes(x = X1, y = X2), lty = 2, alpha = 0.3)


pl1 = pl + geom_point(data = F3[order(cd, decreasing = FALSE)[1:5], ], aes(x = X1, y = X2), size = 3, shape = 17)
pl1 = pl1 + theme(legend.position = "none")
pl2 = pl + geom_point(data = F3[order(cd, decreasing = TRUE)[1:5], ], aes(x = X1, y = X2), shape = 17, size = 3)
pl2 = pl2 + theme(legend.position = "none")
pl2 = pl2

g = grid.arrange(pl1, pl2, ncol = 2)

ggsave(grid.draw(g), file = "images/NSGA2_CS1.png", width = 6, height = 3)


cdo = order(cd, decreasing = TRUE)[c(5, length(cd)-1)]
F3.oX1 = F3[order(F3$X1), c("X1", "X2")]

cuboids = F3[cdo, c("X1", "X2")]
idx = which(F3.oX1$X1 %in% cuboids$X1)
cuboids = cbind(cuboids, F3.oX1[idx + 1, ])
cuboids = cbind(cuboids, F3.oX1[idx - 1, ])
names(cuboids) = c("x", "y", "xmin", "ymin", "xmax", "ymax")
cuboids$point = c("i", "j")

F3 = F3[!is.na(F3$X1), ]
pl1 = p + geom_point(data = F3, aes(x = X1, y = X2))
pl1 = pl1 + theme(legend.position = "none")
pl1 = pl1 + geom_line(data = F3, aes(x = X1, y = X2), lty = 2)
pl1 = pl1 + geom_rect(data = cuboids, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, colour = point, fill = point), alpha = 0.2)
pl1 = pl1 + geom_point(data = cuboids, aes(x = x, y = y, colour = point, fill = point), size = 3)

ggsave(pl1, file = "images/NSGA2_CS2.png", width = 3, height = 3)




91 changes: 91 additions & 0 deletions w08_hpo_multicrit/code/benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
library(mlr)
library(mlrMBO)
library(batchtools)
library(checkmate)

lrn = makeLearner("classif.svm", predict.type = "response")
tsk = spam.task
n_evals = 100

make_mbo_multi_control = function(multi_method) {
mbo_ctrl = makeMBOControl(n.objectives = 2)
mbo_ctrl = setMBOControlTermination(mbo_ctrl, max.evals = n_evals)
mbo_ctrl = setMBOControlMultiObj(mbo_ctrl, method = multi_method)
if (multi_method == "dib") {
mbo_ctrl = setMBOControlInfill(mbo_ctrl, crit = crit.dib1)
}
makeTuneMultiCritControlMBO(n.objectives = 2, mbo.control = mbo_ctrl)
}

tune_ctrls = list(
grid = makeTuneMultiCritControlGrid(resolution = ceiling(sqrt(n_evals))),
random = makeTuneMultiCritControlRandom(maxit = n_evals),
nsga2 = makeTuneMultiCritControlNSGA2(budget = 12 * ceiling(n_evals / 12), popsize = 12, max.generations = ceiling(n_evals / 12)),
mbo_parego = make_mbo_multi_control("parego"),
mbo_dib = make_mbo_multi_control("dib")
)

ps = makeParamSet(
makeNumericParam("cost", lower = -3, upper = 3, trafo = function(x) 10^x),
makeNumericParam("gamma", lower = -3, upper = 3, trafo = function(x) 10^x)
)

rsmp_tuning = makeResampleDesc("CV", iters = 5)
rsmp_outer = makeResampleDesc("CV", iters = 10)

if (!fs::file_exists("benchmark_res.rds")) {
reg_dir = if (fs::file_exists("~/nobackup/")) "~/nobackup/w08_multicrit_benchmark" else "benchmark_bt"
unlink(reg_dir, recursive = TRUE)
reg = makeExperimentRegistry(file.dir = reg_dir, seed = 1, packages = c("mlr", "mlrMBO", "parallelMap", "checkmate"))

make_instance_gen = function(rsmp_outer) {
force(rsmp_outer)
instance_gen = function(data, job) {
fold = job$repl
rsmpl_outer_fixed = makeResampleInstance(rsmp_outer, task = data$tsk)
list(train = rsmpl_outer_fixed$train.inds[[fold]], test = rsmpl_outer_fixed$test.inds[[fold]], fold = fold)
}
}

for (n in names(tune_ctrls)) {
addProblem(name = n, data = list(lrn = lrn, tsk = tsk, rsmp_tuning = rsmp_tuning, ps = ps, ctrl = tune_ctrls[[n]]), fun = make_instance_gen(rsmp_outer), seed = 1)
}

algo_mlr = function(job, data, instance) {
lrn = data$lrn
tsk = data$tsk
tsk_train = subsetTask(tsk, instance$train)
tsk_test = subsetTask(tsk, instance$test)
rsmp_tuning = data$rsmp_tuning
ps = data$ps
ctrl = data$ctrl
#parallelStartMulticore(cpus = rsmp_tuning$iters %??% 4, level = "mlr.resample") #does not work :(
tune_res = tuneParamsMultiCrit(learner = lrn, task = tsk_train, resampling = rsmp_tuning, measures = list(tpr, fpr), par.set = ps, control = ctrl)
#parallelStop()
y_outer = lapply(tune_res$x, function(x) {
lrn2 = setHyperPars(lrn, par.vals = x)
mod = train(lrn2, tsk_train)
pred = predict(mod, task = tsk_test)
performance(pred, measures = list(tpr, fpr))
})
y_outer = do.call(rbind, y_outer)
return(list(tune_res = tune_res, outer_res = y_outer))
}

addAlgorithm("mlr", fun = algo_mlr)

addExperiments(repls = rsmp_outer$iters)
submitJobs(resources = list(ncpus = 1))
waitForJobs()
res = reduceResultsList(findDone(), fun = function(res, job) {
list(
x = res$tune_res$x,
inner_y = res$tune_res$y,
outer_y = res$outer_res
)
})
saveRDS(res, "benchmark_res.rds")
}



40 changes: 40 additions & 0 deletions w08_hpo_multicrit/code/census_vis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
library(mlr3)
library(mlr3oml)
#library(mlr3viz)
task = tsk("oml", data_id = 1590)
#autoplot(task, type = "pairs", cardinality_threshold = 16)
#autoplot(task, type = "duo", cardinality_threshold = 16)
#mlr3viz:::autoplot.TaskClassif
#

# remotes::install_github("ggobi/ggally")
library(GGally)
library(ggplot2)
theme_set(theme_bw())
pdata = mlr3viz::fortify(task)

g = ggally_colbar(pdata, aes(x = race, y = class), size = 3, label_format = scales::label_percent(accuracy = 1))
g = g + facet_grid(sex~.)
g = g + theme(axis.text.x = element_text(angle = 45, hjust = 1))
g = g + theme(legend.position = "none")
if (interactive()) print(g)
ggsave("../images/dataset_adult_race.png", g, height = 6, width = 3)

g = ggally_colbar(pdata, aes(x = occupation, y = class), size = 3, label_format = scales::label_percent(accuracy = 1))
g = g + facet_grid(sex~.)
g = g + theme(axis.text.x = element_text(angle = 45, hjust = 1))
g = g + theme(legend.position = "none")
if (interactive()) print(g)
ggsave("../images/dataset_adult_education.png", g, height = 6, width = 5)

g = ggplot(pdata, aes(x=age, group=class, fill=class)) + geom_histogram(binwidth=1, color='black', size = 0.1)
if (interactive()) print(g)
ggsave("../images/dataset_adult_age_sex.png", g, height = 2.5, width = 5.5)

if (FALSE) {

g = ggally_colbar(pdata, aes(x = sex, y = class))
if (interactive()) print(g)
ggsave("../images/dataset_adult_sex.png", g, height = 5, width = 7)
}

130 changes: 130 additions & 0 deletions w08_hpo_multicrit/code/cim_code.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
library(ggplot2)
library(gridExtra)

df = readRDS("expedia_example.rds")

p = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 1.5)
p = p + theme_bw()
p = p + ylim(c(- 5.5, -2))
p = p + xlab("Price per night") + ylab("Rating")
p

p1 = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 1.5)
p1 = p1 + geom_point(data = df[16:17, ], aes(x = mean_price, y = - mean_rating), size = 2, colour = c("green", "red"))
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(- 5.5, -2))
p1 = p1 + xlab("Price per night") + ylab("Rating")

p2 = ggplot(data = df, aes(x = mean_price, y = - mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = df[c(10, 16), ], aes(x = mean_price, y = - mean_rating), size = 2, colour = "orange")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5.5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

grid.arrange(p1, p2, ncol = 2)

p1

p2

df$mean_rating = - df$mean_rating
P = df[order(df$mean_rating, df$mean_price,decreasing=FALSE),]
P = P[which(!duplicated(cummin(P$mean_price))),]

p2 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = P, aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p2 = p2 + geom_line(data = P, aes(x = mean_price, y = mean_rating), colour = "orange")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

p2

fun = function(x) (x - 1)^2
p = ggplot(data.frame(x = c(0, 3)), aes(x)) + stat_function(fun = fun)
p = p + geom_point(x = 1, y = 0, color = "green", size = 3)
p = p + theme_bw() + ylab("c") + xlab(expression(lambda))
p

fun1 = function(x) (x - 1)^2
fun2 = function(x) 3 * (x - 2)^2
p = ggplot(data.frame(x = c(0, 3)), aes(x)) + stat_function(fun = fun1) + stat_function(fun = fun2, color = "blue")
p = p + theme_bw()
p

x = seq(0, 3, length.out = 1000)
xpareto = seq(1, 2, length.out = 1000)

p2 = ggplot() + geom_point(data = data.frame(f1 = fun1(x), f2 = fun2(x)), aes(x = f1, y = f2), size = 0.05) + geom_point(data = data.frame(f1 = fun1(xpareto), f2 = fun2(xpareto)), aes(x = f1, y = f2), color = "green", size = 0.05) + theme_bw()
p2

df$apriori = df$mean_price + 50 * df$mean_rating

p1 = ggplot()
p1 = p1 + geom_point(data = df, aes(x = apriori, y = 0), size = 2)
p1 = p1 + geom_point(data = df[which.min(df$apriori), ], aes(x = apriori, y = 0), colour = "green", size = 2)
p1 = p1 + theme_bw()
p1 = p1 + xlab("Weighted sum")
p1 = p1 + theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())

p2 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p2 = p2 + geom_point(data = df[which.min(df$apriori), ], aes(x = mean_price, y = mean_rating), size = 2, colour = "green")
p2 = p2 + theme_bw()
p2 = p2 + ylim(c(-5, -2))
p2 = p2 + xlab("Price per night") + ylab("Rating")

grid.arrange(p1, p2, ncol = 2)

p1 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p1 = p1 + geom_point(data = df[df$mean_rating == -5, ], aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(-5, -2))
p1 = p1 + ggtitle("1) max. rating")
p1 = p1 + xlab("Price per night") + ylab("Rating")

p2 = p1 + geom_point(data = df[(df$mean_rating == - 5.0 & df$mean_price < 150), ], colour = "green", size = 2)
p2 = p2 + ggtitle("2) min. price")

grid.arrange(p1, p2, ncol = 2)

P = df[order(df$mean_rating, df$mean_price,decreasing=FALSE),]
P = P[which(!duplicated(cummin(P$mean_price))),]

p1 = ggplot(data = df, aes(x = mean_price, y = mean_rating)) + geom_point(size = 2)
p1 = p1 + geom_point(data = P, aes(x = mean_price, y = mean_rating), size = 2, colour = "orange")
p1 = p1 + geom_line(data = P, aes(x = mean_price, y = mean_rating), colour = "orange")
p1 = p1 + theme_bw()
p1 = p1 + ylim(c(-5, -2))
p1 = p1 + xlab("Price per night") + ylab("Rating")


p2 = p1 + geom_point(data = P[P$mean_rating == -4.5, ], aes(x = mean_price, y = mean_rating), colour = "green", size = 2)

g = grid.arrange(p1, p2, ncol = 2)
ggsave("../images/expedia-11-1.pdf", g, height = 2, width = 5)

x = seq(-1, 4, length.out = 1000)
lin = 3 * 0.4 - 2 * x

p2 = ggplot() + geom_point(data = data.frame(f1 = fun1(x), f2 = fun2(x)), aes(x = f1, y = f2), size = 0.7)
p2 = p2 + geom_line(aes(x = x, y = lin)) + ylim(c(-3, 25))
p2 = p2 + geom_point(aes(x = 0.36, y = 0.48), colour = "green", size = 3)
p2 = p2 + theme_bw()
p2

f1 = function(x) 0.01 * sum(x^2) - 2
f2 = function(x) 0.01 * sum(c(0.1, 0.3) * (x - c(-10, 20))^2)

x1 = x2 = seq(-10, 20, length.out = 100)
grid = expand.grid(x1 = x1, x2 = x2)
grid$y1 = apply(grid[, 1:2], 1, f1)
grid$y2 = apply(grid[, 1:2], 1, f2)

melt = reshape2::melt(grid, id.vars = c("x1", "x2"))

p = ggplot(data = melt) + geom_raster(aes(x = x1, y = x2, fill = value))
p = p + geom_contour(aes(x = x1, y = x2, z = value, colour = variable), bins = 15)
p = p + ylim(c(-20, 40)) + xlim(c(-20, 40)) + theme_bw()
p
Loading

0 comments on commit 9f59ee2

Please sign in to comment.