Skip to content

Commit

Permalink
Wrapped up simulations
Browse files Browse the repository at this point in the history
  • Loading branch information
g0ulash committed Jul 16, 2020
1 parent 72bf6b4 commit 42efea1
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 3,260 deletions.
24 changes: 15 additions & 9 deletions 1_offline_ranking.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,33 @@ source("./policy_ur.R")
#############################################################

### Set seed
set.seed(1)
set.seed(333)

### Set number of interactions (horizon) and number of repeats (simulations)
### In the paper we used a horizon of 10000 and 10000 simulations
horizon <- 10000
simulations <- 5000
simulations <- 1000

### Set up functions to make offline dataset
unimodal_data <- function(x){
c1 <- runif(1, 0.25, 0.75)
c2 <- 1
return(list("data" = -(x - c1) ^2 + c2 + rnorm(length(x), 0, 0.01), "max" = c2))
return(list("data" = -(x - c1) ^2 + c2 + rnorm(length(x), 0, 0.05), "max" = c2))
}

bimodal_data <- function(x){
mu1 <- runif(1, 0.15, 0.2)
sd1 <- runif(1, 0.1, 0.15)
sd1 <- runif(1, 0.05, 0.15)
mu2 <- runif(1, 0.7, 0.85)
sd2 <- runif(1, 0.1, 0.15)
sd2 <- runif(1, 0.05, 0.15)
y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
maxval <- truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
return(list("data" = y1 + y2 + rnorm(length(x), 0, 0.01), "max" = maxval))
if (sd2 >= sd1) {
maxval <- truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
} else {
maxval <- truncnorm::dtruncnorm(mu1, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
}
return(list("data" = y1 + y2 + rnorm(length(x), 0, 0.05), "max" = maxval))
}

functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data))
Expand All @@ -57,8 +61,8 @@ functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data)
deltas <- c(0, 0.5, 0.2, 0.1, 0.05, 0.01)

### Parameters for LiF
int_time <- 10
amplitude <- 0.035
int_time <- 25
amplitude <- 0.05
learn_rate <- 2*pi/int_time
omega <- 1

Expand Down Expand Up @@ -106,6 +110,7 @@ for(i in 1:length(deltas)){
ylab("Cumulative regret") +
xlab("Time") +
theme(legend.position = "none") +
theme_bw(base_size = 15) +
ggtitle("Online")
plots[[i]] <- g
} else {
Expand All @@ -115,6 +120,7 @@ for(i in 1:length(deltas)){
ylab("Cumulative regret") +
xlab("Time") +
theme(legend.position = "none") +
theme_bw(base_size = 15) +
ggtitle(paste0("Delta ", deltas[i]))
plots[[i]] <- g
}
Expand Down
24 changes: 17 additions & 7 deletions 2a_offline_empirical.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ omega <- 1
### Set up all agents with different amplitudes and run them for each bandit
bandit <- OfflineContinuumBandit$new(data = dt, max_bool = TRUE, delta = deltas, horizon = horizon)

agents <- list(Agent$new(UniformRandomPolicy$new(), bandit),
Agent$new(EpsilonFirstLinearRegressionPolicy$new(), bandit),
Agent$new(LifPolicyRandstart$new(int_time, amplitude, learn_rate, omega), bandit),
Agent$new(ThompsonBayesianLinearPolicy$new(), bandit))
agents <- list(Agent$new(UniformRandomPolicy$new(), bandit, name = "UR"),
Agent$new(EpsilonFirstLinearRegressionPolicy$new(epsilon = 100), bandit, name = "E-First"),
Agent$new(LifPolicyRandstart$new(int_time, amplitude, learn_rate, omega), bandit, name = "LiF"),
Agent$new(ThompsonBayesianLinearPolicy$new(), bandit, name = "TBL"))

history <- Simulator$new(agents = agents,
horizon = horizon,
Expand All @@ -67,7 +67,17 @@ history <- Simulator$new(agents = agents,
save_interval = 10)$run()


cairo_ps("offline_empirical.eps")
plot(history, regret=FALSE, type="cumulative", legend_labels = c("UR", "E-First", "LiF", "TBL"), disp="ci", trunc_per_agent = FALSE)
dev.off()
history$update_statistics()

history_cumulative <- history$get_cumulative_data()
colnames(history_cumulative)[which(names(history_cumulative) == "agent")] <- "Policy"

g <- ggplot(history_cumulative) +
geom_line(aes(y = cum_reward, x = t, color=Policy)) +
geom_ribbon(aes(ymin = cum_reward - cum_reward_ci, ymax = cum_reward + cum_reward_ci, x = t, fill=Policy), alpha = 0.2, show.legend=FALSE) +
ylab("Cumulative reward") +
xlab("Time") +
theme(legend.position = "none") +
theme_bw(base_size = 15)
ggsave("offline_empirical.pdf", g, device="pdf", width=10, height=7)
print(g)
4 changes: 2 additions & 2 deletions 2b_company_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ data <- data[data$store=="15337",]
dat2 <- aggregate(data, by=list(data$context.UserID), FUN=mean)

dat2$bb <- (1-dat2$split) * dat2$revenue
c <- qplot(dat2$split, dat2$bb, xlab="Split of discount offered to customer", ylab="Profit for rebate company in euros") + stat_smooth() + geom_vline(xintercept = 0.5, linetype = "dashed", color = "red")
ggsave("company_results.eps", c, device="eps")
c <- qplot(dat2$split, dat2$bb, xlab="Split of discount offered to customer", ylab="Profit for rebate company in euros") + stat_smooth() + geom_vline(xintercept = 0.5, linetype = "dashed", color = "red") + theme_bw(base_size = 15)
ggsave("company_results.pdf", c, device="pdf")
print(c)
4 changes: 2 additions & 2 deletions 3_offline_par_lif.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ set.seed(1)
### Set number of interactions (horizon) and number of repeats (simulations)
### In the paper we used a horizon of 10000 and 10000 simulations
horizon <- 10000
simulations <- 10
simulations <- 1000

### Set LiF specific parameters. Start point is set within the policy itself.
int_time <- 50
Expand Down Expand Up @@ -231,4 +231,4 @@ for (dif_plot in different_plots){
ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],"_delta.eps"), device="eps")
print(g)
}
}
}
4 changes: 2 additions & 2 deletions 4_offline_par_tbl.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ set.seed(1)
### Set number of interactions (horizon) and number of repeats (simulations)
### In the paper we used a horizon of 10000 and 10000 simulations
horizon <- 10000
simulations <- 10
simulations <- 1000

### Set TBL specific parameters
J <- matrix(c(5, 4, -4), nrow=1, ncol=3, byrow = TRUE)
Expand Down Expand Up @@ -244,4 +244,4 @@ for (dif_plot in different_plots){
ggsave(g, file=paste0("offline_tbl_function_",dif_plot[[1]],"_delta.eps"), device="eps")
print(g)
}
}
}
Loading

0 comments on commit 42efea1

Please sign in to comment.