Wrapped up simulations

Nth-iteration-labs · Jul 16, 2020 · 42efea1 · 42efea1
1 parent 72bf6b4
commit 42efea1
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 3,260 deletions.
diff --git a/1_offline_ranking.R b/1_offline_ranking.R
@@ -25,29 +25,33 @@ source("./policy_ur.R")
 #############################################################
 
 ### Set seed
-set.seed(1)
+set.seed(333)
 
 ### Set number of interactions (horizon) and number of repeats (simulations)
 ### In the paper we used a horizon of 10000 and 10000 simulations
 horizon            <- 10000
-simulations        <- 5000
+simulations        <- 1000
 
 ### Set up functions to make offline dataset
 unimodal_data <- function(x){
     c1 <- runif(1, 0.25, 0.75)
     c2 <- 1
-    return(list("data" = -(x - c1) ^2 + c2 + rnorm(length(x), 0, 0.01), "max" = c2))
+    return(list("data" = -(x - c1) ^2 + c2 + rnorm(length(x), 0, 0.05), "max" = c2))
 }
 
 bimodal_data <- function(x){
     mu1 <- runif(1, 0.15, 0.2)
-    sd1 <- runif(1, 0.1, 0.15)
+    sd1 <- runif(1, 0.05, 0.15)
     mu2 <- runif(1, 0.7, 0.85)
-    sd2 <- runif(1, 0.1, 0.15)
+    sd2 <- runif(1, 0.05, 0.15)
     y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
     y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
-    maxval <- truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
-    return(list("data" = y1 + y2 + rnorm(length(x), 0, 0.01), "max" = maxval))
+    if (sd2 >= sd1) {
+        maxval <- truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
+    } else {
+        maxval <- truncnorm::dtruncnorm(mu1, a=0, b=1, mean=mu1, sd=sd1) + truncnorm::dtruncnorm(mu2, a=0, b=1, mean=mu2, sd=sd2)
+    }
+    return(list("data" = y1 + y2 + rnorm(length(x), 0, 0.05), "max" = maxval))
 }
 
 functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data))
@@ -57,8 +61,8 @@ functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data)
 deltas <- c(0, 0.5, 0.2, 0.1, 0.05, 0.01)
 
 ### Parameters for LiF
-int_time <- 10
-amplitude <- 0.035
+int_time <- 25
+amplitude <- 0.05
 learn_rate <- 2*pi/int_time 
 omega <- 1
 
@@ -106,6 +110,7 @@ for(i in 1:length(deltas)){
              ylab("Cumulative regret") +
              xlab("Time") +
              theme(legend.position = "none") +
+             theme_bw(base_size = 15) +
              ggtitle("Online")
         plots[[i]] <- g
     } else { 
@@ -115,6 +120,7 @@ for(i in 1:length(deltas)){
              ylab("Cumulative regret") +
              xlab("Time") +
              theme(legend.position = "none") +
+             theme_bw(base_size = 15) +
              ggtitle(paste0("Delta ", deltas[i]))
         plots[[i]] <- g
     }

diff --git a/2a_offline_empirical.R b/2a_offline_empirical.R
@@ -55,10 +55,10 @@ omega <- 1
 ### Set up all agents with different amplitudes and run them for each bandit
 bandit <- OfflineContinuumBandit$new(data = dt, max_bool = TRUE, delta = deltas, horizon = horizon)
 
-agents <- list(Agent$new(UniformRandomPolicy$new(), bandit),
-             Agent$new(EpsilonFirstLinearRegressionPolicy$new(), bandit),
-             Agent$new(LifPolicyRandstart$new(int_time, amplitude, learn_rate, omega), bandit),
-             Agent$new(ThompsonBayesianLinearPolicy$new(), bandit))
+agents <- list(Agent$new(UniformRandomPolicy$new(), bandit, name = "UR"),
+             Agent$new(EpsilonFirstLinearRegressionPolicy$new(epsilon = 100), bandit, name = "E-First"),
+             Agent$new(LifPolicyRandstart$new(int_time, amplitude, learn_rate, omega), bandit, name = "LiF"),
+             Agent$new(ThompsonBayesianLinearPolicy$new(), bandit, name = "TBL"))
 
 history            <- Simulator$new(agents      = agents,
                                   horizon     = horizon,
@@ -67,7 +67,17 @@ history            <- Simulator$new(agents      = agents,
                                   save_interval = 10)$run()
 
 
-cairo_ps("offline_empirical.eps")
-plot(history, regret=FALSE, type="cumulative", legend_labels = c("UR", "E-First", "LiF", "TBL"), disp="ci", trunc_per_agent = FALSE)     
-dev.off()
+history$update_statistics()
 
+history_cumulative <- history$get_cumulative_data()
+colnames(history_cumulative)[which(names(history_cumulative) == "agent")] <- "Policy"
+
+g <- ggplot(history_cumulative) + 
+     geom_line(aes(y = cum_reward, x = t, color=Policy)) +
+     geom_ribbon(aes(ymin = cum_reward - cum_reward_ci, ymax = cum_reward + cum_reward_ci, x = t, fill=Policy), alpha = 0.2, show.legend=FALSE) +
+     ylab("Cumulative reward") +
+     xlab("Time") +
+     theme(legend.position = "none") +
+     theme_bw(base_size = 15) 
+ggsave("offline_empirical.pdf", g, device="pdf", width=10, height=7)
+print(g)
diff --git a/2b_company_results.R b/2b_company_results.R
@@ -11,6 +11,6 @@ data <- data[data$store=="15337",]
 dat2 <- aggregate(data, by=list(data$context.UserID), FUN=mean)
 
 dat2$bb <- (1-dat2$split) * dat2$revenue
-c <- qplot(dat2$split, dat2$bb, xlab="Split of discount offered to customer", ylab="Profit for rebate company in euros") + stat_smooth() + geom_vline(xintercept = 0.5, linetype = "dashed", color = "red")
-ggsave("company_results.eps", c, device="eps")
+c <- qplot(dat2$split, dat2$bb, xlab="Split of discount offered to customer", ylab="Profit for rebate company in euros") + stat_smooth() + geom_vline(xintercept = 0.5, linetype = "dashed", color = "red") + theme_bw(base_size = 15)
+ggsave("company_results.pdf", c, device="pdf")
 print(c)
diff --git a/3_offline_par_lif.R b/3_offline_par_lif.R
@@ -27,7 +27,7 @@ set.seed(1)
 ### Set number of interactions (horizon) and number of repeats (simulations)
 ### In the paper we used a horizon of 10000 and 10000 simulations
 horizon            <- 10000
-simulations        <- 10
+simulations        <- 1000
 
 ### Set LiF specific parameters. Start point is set within the policy itself.
 int_time           <- 50
@@ -231,4 +231,4 @@ for (dif_plot in different_plots){
     ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],"_delta.eps"), device="eps")
     print(g)
   }
-}
+}
diff --git a/4_offline_par_tbl.R b/4_offline_par_tbl.R
@@ -27,7 +27,7 @@ set.seed(1)
 ### Set number of interactions (horizon) and number of repeats (simulations)
 ### In the paper we used a horizon of 10000 and 10000 simulations
 horizon            <- 10000
-simulations        <- 10
+simulations        <- 1000
 
 ### Set TBL specific parameters
 J <- matrix(c(5, 4, -4), nrow=1, ncol=3, byrow = TRUE)
@@ -244,4 +244,4 @@ for (dif_plot in different_plots){
     ggsave(g, file=paste0("offline_tbl_function_",dif_plot[[1]],"_delta.eps"), device="eps")
     print(g)
   }
-}
+}