diff --git a/README.md b/README.md
index 13fa779..97e99a5 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,4 @@
 # offline-parameter-tuning
 Code for the offline paramater tuning paper (submitted to IDA 2020)
+
+For the replications of the plots, see demo_lif_bandit.R and demo_tbl_bandit.R
diff --git a/bandit_continuum_function_bimodal.R b/bandit_continuum_function_bimodal.R
new file mode 100644
index 0000000..4e059d2
--- /dev/null
+++ b/bandit_continuum_function_bimodal.R
@@ -0,0 +1,133 @@
+#' @export
+library("truncnorm")
+ContinuumBanditBimodal <- R6::R6Class(
+  inherit = Bandit,
+  class = FALSE,
+  public = list(
+    arm_function = NULL,
+    mu1 = NULL,
+    sd1 = NULL,
+    mu2 = NULL,
+    sd2 = NULL,
+    class_name = "ContinuumBanditBimodal",
+    initialize   = function() {
+      self$arm_function <- function(x, mu1, sd1, mu2, sd2) {
+        y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
+        y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
+        return(y1 + y2 + rnorm(length(x), 0, 0.01))
+      }
+      super$initialize()
+      self$d            <- 1
+      self$k            <- 1
+    },
+    post_initialization = function(){
+      self$mu1 <- runif(1, 0.15, 0.35)
+      self$sd1 <- runif(1, 0.1, 0.2)
+      self$mu2 <- runif(1, 0.65, 0.85)
+      self$sd2 <- runif(1, 0.1, 0.2)
+    },
+    get_context = function(t) {
+      context           <- list()
+      context$k         <- self$k
+      context$d         <- self$d
+      context
+    },
+    get_reward = function(t, context, action) {
+      reward  <- list(
+        reward                   = self$arm_function(action$choice, self$mu1, self$sd1, self$mu2, self$sd2),
+        optimal_reward           = self$mu2
+      )
+    }
+  )
+)
+
+#' Bandit: ContinuumBandit
+#'
+#' A function based continuum multi-armed bandit
+#' where arms are chosen from a subset of the real line and the mean rewards
+#' are assumed to be a continuous function of the arms.
+#'
+#' @section Usage:
+#' \preformatted{
+#'    bandit <- ContinuumBandit$new(FUN)
+#' }
+#'
+#' @name ContinuumBandit
+#'
+#'
+#' @section Arguments:
+#' \describe{
+#'   \item{FUN}{continuous function.}
+#' }
+#'
+#' @section Methods:
+#'
+#' \describe{
+#'
+#'   \item{\code{new(FUN)}}{ generates and instantializes a new \code{ContinuumBandit} instance. }
+#'
+#'   \item{\code{get_context(t)}}{
+#'      argument:
+#'      \itemize{
+#'          \item \code{t}: integer, time step \code{t}.
+#'      }
+#'      returns a named \code{list}
+#'      containing the current \code{d x k} dimensional matrix \code{context$X},
+#'      the number of arms \code{context$k} and the number of features \code{context$d}.
+#'  }
+#'
+#'   \item{\code{get_reward(t, context, action)}}{
+#'      arguments:
+#'      \itemize{
+#'          \item \code{t}: integer, time step \code{t}.
+#'          \item \code{context}: list, containing the current \code{context$X} (d x k context matrix),
+#'          \code{context$k} (number of arms) and \code{context$d} (number of context features)
+#'          (as set by \code{bandit}).
+#'          \item \code{action}:  list, containing \code{action$choice} (as set by \code{policy}).
+#'      }
+#'      returns a named \code{list} containing \code{reward$reward} and, where computable,
+#'         \code{reward$optimal} (used by "oracle" policies and to calculate regret).
+#'  }
+#
+#' }
+#'
+#' @seealso
+#'
+#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
+#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
+#'
+#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
+#' \code{\link{OfflineReplayEvaluatorBandit}}
+#'
+#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
+#'
+#' @examples
+#' \dontrun{
+#'
+#' horizon            <- 1500
+#' simulations        <- 100
+#'
+#' continuous_arms  <- function(x) {
+#'   -0.1*(x - 5) ^ 2 + 3.5  + rnorm(length(x),0,0.4)
+#' }
+#'
+#' int_time    <- 100
+#' amplitude   <- 0.2
+#' learn_rate  <- 0.3
+#' omega       <- 2*pi/int_time
+#' x0_start    <- 2.0
+#'
+#' policy             <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start)
+#'
+#' bandit             <- ContinuumBandit$new(FUN = continuous_arms)
+#'
+#' agent              <- Agent$new(policy,bandit)
+#'
+#' history            <- Simulator$new(     agents = agent,
+#'                                          horizon = horizon,
+#'                                          simulations = simulations,
+#'                                          save_theta = TRUE             )$run()
+#'
+#' plot(history, type = "average", regret = FALSE)
+#' }
+NULL
diff --git a/bandit_continuum_function_unimodal.R b/bandit_continuum_function_unimodal.R
new file mode 100644
index 0000000..da399ae
--- /dev/null
+++ b/bandit_continuum_function_unimodal.R
@@ -0,0 +1,126 @@
+#' @export
+ContinuumBanditUnimodal <- R6::R6Class(
+  inherit = Bandit,
+  class = FALSE,
+  public = list(
+    arm_function = NULL,
+    c1 = NULL,
+    c2 = NULL,
+    class_name = "ContinuumBanditUnimodal",
+    initialize   = function() {
+      self$c2 <- 1
+      self$arm_function <- function(x, c1 = 0.25, c2 = 0.75) {
+        -(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01)
+      }
+      super$initialize()
+      self$d            <- 1
+      self$k            <- 1
+    },
+    post_initialization = function(){
+      self$c1 <- runif(1,0.25,0.75)
+    },
+    get_context = function(t) {
+      context           <- list()
+      context$k         <- self$k
+      context$d         <- self$d
+      context
+    },
+    get_reward = function(t, context, action) {
+      reward  <- list(
+        reward                   = self$arm_function(action$choice, self$c1, self$c2),
+        optimal_reward           = self$c2
+      )
+    }
+  )
+)
+
+#' Bandit: ContinuumBandit
+#'
+#' A function based continuum multi-armed bandit
+#' where arms are chosen from a subset of the real line and the mean rewards
+#' are assumed to be a continuous function of the arms.
+#'
+#' @section Usage:
+#' \preformatted{
+#'    bandit <- ContinuumBandit$new(FUN)
+#' }
+#'
+#' @name ContinuumBandit
+#'
+#'
+#' @section Arguments:
+#' \describe{
+#'   \item{FUN}{continuous function.}
+#' }
+#'
+#' @section Methods:
+#'
+#' \describe{
+#'
+#'   \item{\code{new(FUN)}}{ generates and instantializes a new \code{ContinuumBandit} instance. }
+#'
+#'   \item{\code{get_context(t)}}{
+#'      argument:
+#'      \itemize{
+#'          \item \code{t}: integer, time step \code{t}.
+#'      }
+#'      returns a named \code{list}
+#'      containing the current \code{d x k} dimensional matrix \code{context$X},
+#'      the number of arms \code{context$k} and the number of features \code{context$d}.
+#'  }
+#'
+#'   \item{\code{get_reward(t, context, action)}}{
+#'      arguments:
+#'      \itemize{
+#'          \item \code{t}: integer, time step \code{t}.
+#'          \item \code{context}: list, containing the current \code{context$X} (d x k context matrix),
+#'          \code{context$k} (number of arms) and \code{context$d} (number of context features)
+#'          (as set by \code{bandit}).
+#'          \item \code{action}:  list, containing \code{action$choice} (as set by \code{policy}).
+#'      }
+#'      returns a named \code{list} containing \code{reward$reward} and, where computable,
+#'         \code{reward$optimal} (used by "oracle" policies and to calculate regret).
+#'  }
+#
+#' }
+#'
+#' @seealso
+#'
+#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
+#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
+#'
+#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
+#' \code{\link{OfflineReplayEvaluatorBandit}}
+#'
+#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
+#'
+#' @examples
+#' \dontrun{
+#'
+#' horizon            <- 1500
+#' simulations        <- 100
+#'
+#' continuous_arms  <- function(x) {
+#'   -0.1*(x - 5) ^ 2 + 3.5  + rnorm(length(x),0,0.4)
+#' }
+#'
+#' int_time    <- 100
+#' amplitude   <- 0.2
+#' learn_rate  <- 0.3
+#' omega       <- 2*pi/int_time
+#' x0_start    <- 2.0
+#'
+#' policy             <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start)
+#'
+#' bandit             <- ContinuumBandit$new(FUN = continuous_arms)
+#'
+#' agent              <- Agent$new(policy,bandit)
+#'
+#' history            <- Simulator$new(     agents = agent,
+#'                                          horizon = horizon,
+#'                                          simulations = simulations,
+#'                                          save_theta = TRUE             )$run()
+#'
+#' plot(history, type = "average", regret = FALSE)
+#' }
+NULL
diff --git a/bandit_continuum_offon.R b/bandit_continuum_offon.R
new file mode 100644
index 0000000..df930c0
--- /dev/null
+++ b/bandit_continuum_offon.R
@@ -0,0 +1,42 @@
+#' @export
+OnlineOfflineContinuumBandit <- R6::R6Class(
+  inherit = Bandit,
+  class = FALSE,
+  private = list(
+    S = NULL
+  ),
+  public = list(
+    class_name = "OnlineOfflineContinuumBandit",
+    delta = NULL,
+    horizon = NULL,
+    choice = NULL,
+    arm_function = NULL,
+    initialize   = function(FUN, delta, horizon) {
+      self$arm_function <- FUN
+      self$horizon <- horizon
+      self$delta <- delta
+      self$k <- 1
+    },
+    post_initialization = function() {
+      self$choice <- runif(self$horizon, min=0, max=1)
+      private$S <- data.frame(self$choice, self$arm_function(self$choice))
+      private$S <- private$S[sample(nrow(private$S)),]
+      colnames(private$S) <- c('choice', 'reward')
+    },
+    get_context = function(index) {
+      context           <- list()
+      context$k         <- self$k
+      context
+    },
+    get_reward = function(index, context, action) {
+      reward_at_index <- as.double(private$S$reward[[index]])
+      if (abs(private$S$choice[[index]] - action$choice) < self$delta) {
+        reward <- list(
+          reward = reward_at_index
+        )
+      } else {
+        NULL
+      }
+    }
+  )
+)
diff --git a/bandit_continuum_offon_kern.R b/bandit_continuum_offon_kern.R
new file mode 100644
index 0000000..50d2c09
--- /dev/null
+++ b/bandit_continuum_offon_kern.R
@@ -0,0 +1,51 @@
+#' @export
+OnlineOfflineContinuumBanditKernel <- R6::R6Class(
+  inherit = Bandit,
+  class = FALSE,
+  private = list(
+    S = NULL,
+    n = NULL
+  ),
+  public = list(
+    class_name = "OnlineOfflineContinuumBanditKernel",
+    delta = NULL,
+    c1 = NULL,
+    c2 = NULL,
+    arm_function = NULL,
+    choice = NULL,
+    h = NULL,
+    kernel = NULL,
+    horizon = NULL,
+    initialize   = function(FUN, horizon) {
+      self$arm_function <- FUN
+      self$k <- 1
+      self$horizon <- horizon
+      self$h <- horizon^(-1/5)
+      self$kernel <- function(action_true, action_choice, bandwith){ 1/sqrt(2*pi)*exp(((action_choice - action_true) / bandwith)^2/2) }
+    },
+    post_initialization = function() {
+      self$choice <- runif(self$horizon, min=0, max=1)
+      private$S <- data.frame(self$choice, self$arm_function(self$choice))
+      private$S <- private$S[sample(nrow(private$S)),]
+      colnames(private$S) <- c('choice', 'reward')
+      private$n <- 0
+    },
+    get_context = function(index) {
+      context           <- list()
+      context$k         <- self$k
+      context
+    },
+    get_reward = function(index, context, action) {
+      reward_at_index <- as.double(private$S$reward[[index]])
+      #kern_value <- self$kernel(action_true = private$S$choice[[index]], action_choice = action$choice, bandwith = self$h)
+      temp_u <- (action$choice - private$S$choice[[index]]) / self$h
+      kern_value <- 1/sqrt(2*pi) * exp(-temp_u^2 / 2)
+      #inc(private$n) <- 1
+      #print(paste0("Kernel value: ", kern_value, "action choice: ", action$choice, "true action: ", private$S$choice[[index]], "divy: ", temp_u))
+      reward <- list(
+        reward = (kern_value * reward_at_index),
+        optimal_reward = self$c2
+      )
+    }
+  )
+)
diff --git a/demo_lif_bandit.R b/demo_lif_bandit.R
new file mode 100644
index 0000000..7604eac
--- /dev/null
+++ b/demo_lif_bandit.R
@@ -0,0 +1,234 @@
+# This is the demo file for the online and offline parameter tuning of Lock-in Feedback
+# For policy and bandit specific code, please look at the files (as sourced above).
+# First make sure to install contextual 
+# (see https://github.com/Nth-iteration-labs/contextual for a how to).
+#
+# For any questions, please contact the authors.
+
+library(contextual)
+library(here)
+library(ggplot2)
+
+source("./bandit_continuum_function_unimodal.R")
+source("./bandit_continuum_function_bimodal.R")
+source("./bandit_continuum_offon.R")
+source("./bandit_continuum_offon_kern.R")
+source("./policy_cont_lif_randstart.R")
+
+#############################################################
+#                                                           #
+#  Online evaluation for LiF                                #
+#                                                           #
+#############################################################
+
+### Set seed
+set.seed(1)
+
+### Set number of interactions (horizon) and number of repeats (simulations)
+### In the paper we used a horizon of 10000 and 10000 simulations
+horizon            <- 10000
+simulations        <- 10
+
+### Set LiF specific parameters. Start point is set within the policy itself.
+int_time           <- 50
+amplitude_list     <- seq(0.002, 0.2, length.out = 20)
+learn_rate         <- .1
+omega              <- 2*pi/int_time
+
+### Set up two different bandits
+bandits <- c(ContinuumBanditUnimodal$new(), ContinuumBanditBimodal$new())
+
+### Set up all agents with different amplitudes and run them for each bandit
+for (bandit in bandits){
+  
+  agents <- list()
+  
+  for (i in 1:length(amplitude_list)){
+    agents <- append(agents, Agent$new(LifPolicyRandstart$new(int_time, amplitude_list[i], learn_rate, omega), bandit))
+  }
+  
+  history            <- Simulator$new(agents      = agents,
+                                      horizon     = horizon,
+                                      simulations = simulations,
+                                      do_parallel = TRUE,
+                                      save_interval = 10)$run()
+  
+  ### Post-processing for plotting
+  iters <- length(amplitude_list)
+  reward_rate <- c()
+  confs <- c()
+  
+  for(i in 1:iters){
+    reward_rate[[i]] <- history$cumulative[[i]]$cum_reward_rate
+    confs[[i]] <- history$cumulative[[i]]$cum_reward_rate_sd / sqrt(simulations) * qnorm(0.975)
+  }
+  
+  df <- data.frame(amp = amplitude_list, reward = reward_rate, ci = confs, delta = rep("Online"))
+  g <- ggplot(data = df, aes(x=amp, y=reward)) +
+    geom_line(colour = 'red') +
+    geom_errorbar(aes(ymin=reward-ci, ymax=reward+ci, color='red'), width=0.01) +
+    labs(x = "Amplitude", y = "Average reward per interaction") +
+    theme_bw(base_size = 15) +
+    theme(legend.position = "none")
+  
+  ### Saving data to use later
+  if (bandit$class_name == "ContinuumBanditBimodal"){
+    online_lif_bimodal <- df
+  } else if (bandit$class_name == "ContinuumBanditUnimodal"){
+    online_lif_unimodal <- df
+  }
+  
+  print(g)
+  print(amplitude_list[[which.max(reward_rate)]])
+}
+
+#############################################################
+#                                                           #
+#  Offline evaluation for LiF                               #
+#                                                           #
+#############################################################
+
+### Set seed
+set.seed(1)
+
+### Set number of interactions (horizon) and number of repeats (simulations)
+### Typically same as in online evaluation
+horizon            <- 1000
+simulations        <- 2
+
+### Set LiF specific parameters. Start point is set within the policy itself.
+### Typically same as in online evaluation
+int_time           <- 50
+amplitude_list     <- seq(0.002, 0.2, length.out = 20)
+learn_rate         <- .1
+omega              <- 2*pi/int_time
+
+### Set up functions to make offline dataset
+unimodal_data    <- function(x) {
+  c1 <- runif(1, 0.25, 0.75)
+  c2 <- 1
+  return(-(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01))
+}
+
+bimodal_data <- function(x){
+  mu1 <- runif(1, 0.15, 0.2)
+  sd1 <- runif(1, 0.1, 0.15)
+  mu2 <- runif(1, 0.7, 0.85)
+  sd2 <- runif(1, 0.1, 0.15)
+  y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
+  y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
+  return(y1 + y2 + rnorm(length(x), 0, 0.01))
+}
+
+functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data))
+
+### Set up different delta's for the delta and kernel method. If delta = 0 we resort to the kernel method.
+deltas <- c(0, 0.01, 0.1, 0.5)
+
+### Pre-allocation
+offline_lif_unimodal_kernel <- data.frame()
+offline_lif_unimodal <- data.frame()
+offline_lif_bimodal_kernel <- data.frame()
+offline_lif_bimodal <- data.frame()
+
+### Set up all agents with different amplitudes and run them for each bandit
+### Do this for each specified delta
+for (f in functions){
+  for (d in deltas){
+    if(d == 0){
+      bandit             <- OnlineOfflineContinuumBanditKernel$new(FUN = f[[2]], horizon = horizon)
+    } else {
+      bandit             <- OnlineOfflineContinuumBandit$new(FUN = f[[2]], delta = d, horizon = horizon)
+    }
+    
+    agents <- list()
+    
+    for (amp in amplitude_list){
+      agents <- append(agents, Agent$new(LifPolicyRandstart$new(int_time, amp, learn_rate, omega), bandit))
+    }
+    
+    history            <- Simulator$new(agents      = agents,
+                                        horizon     = horizon,
+                                        simulations = simulations,
+                                        policy_time_loop = FALSE,
+                                        save_interval = 20)$run()
+    
+    ### Post-processing for plotting
+    iters <- length(amplitude_list)
+    reward_rate <- c()
+    confs <- c()
+    
+    for(k in 1:iters){
+      reward_rate[[k]] <- history$cumulative[[k]]$cum_reward_rate
+      dt <- history$get_data_table()
+      df_split <- split(dt, dt$agent)
+      for(dd in df_split){
+        dd <- as.data.table(dd)
+        maxes <- dd[, .I[which.max(t)], by=sim]$V1
+        select <- dd[maxes]$cum_reward_rate
+        confs[[k]] <- sd(select) / sqrt(simulations) * qnorm(0.975)
+      }
+    }
+    history$clear_data_table()
+    if (d == 0){
+      df <- data.frame(amp = amplitude_list, reward = reward_rate, delta = as.factor(rep("Kernel")), ci = confs)
+    } else {
+      df <- data.frame(amp = amplitude_list, reward = reward_rate, delta = as.factor(rep(d)), ci = confs)
+    }
+    
+    if (f[[1]] == "bimodal"){
+      if (d == 0){
+        offline_lif_bimodal_kernel <- rbind(offline_lif_bimodal_kernel, df)
+      } else {
+        offline_lif_bimodal <- rbind(offline_lif_bimodal, df)
+      }
+    } else if(f[[1]] == "unimodal"){
+      if (d == 0){
+        offline_lif_unimodal_kernel <- rbind(offline_lif_unimodal_kernel, df)
+      } else {
+        offline_lif_unimodal <- rbind(offline_lif_unimodal, df)
+      }
+    }
+  }
+}
+
+### Plotting both online and offline data together
+different_plots <- list(
+  list("unimodal", rbind(online_lif_unimodal, offline_lif_unimodal)),
+  list("unimodal_kernel", offline_lif_unimodal_kernel),
+  list("bimodal", rbind(online_lif_bimodal, offline_lif_bimodal)),
+  list("bimodal_kernel", offline_lif_bimodal_kernel)
+)
+
+for (dif_plot in different_plots){
+  if(dif_plot[[1]] == "unimodal_kernel"){
+    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
+      geom_vline(xintercept = 0.115, linetype = "dotted", color = "black", size = 1.5) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],".eps"), device="eps")
+    print(g)
+  } else if(dif_plot[[1]] == "bimodal_kernel"){
+    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
+      geom_vline(xintercept = 0.035, linetype = "dotted", color = "black", size = 1.5) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],".eps"), device="eps")
+    print(g)
+  } else if(dif_plot[[1]] == "unimodal" || dif_plot[[1]] == "bimodal") {
+    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],"_delta.eps"), device="eps")
+    print(g)
+  }
+}
\ No newline at end of file
diff --git a/demo_tbl_bandit.R b/demo_tbl_bandit.R
new file mode 100644
index 0000000..24388e5
--- /dev/null
+++ b/demo_tbl_bandit.R
@@ -0,0 +1,247 @@
+# This is the demo file for the online and offline parameter tuning of Thompson sampling
+# for Bayesian linear regression. For policy and bandit specific code, please look at
+# the files (as sourced above). First make sure to install contextual
+# (see https://github.com/Nth-iteration-labs/contextual for a how to).
+# 
+# For any questions, please contact the authors.
+
+library(contextual)
+library(here)
+library(ggplot2)
+
+source("./bandit_continuum_function_unimodal.R")
+source("./bandit_continuum_function_bimodal.R")
+source("./bandit_continuum_offon.R")
+source("./bandit_continuum_offon_kern.R")
+source("./policy_tbl.R")
+
+#############################################################
+#                                                           #
+#  Online evaluation for TBL                                #
+#                                                           #
+#############################################################
+
+### Set seed
+set.seed(1)
+
+### Set number of interactions (horizon) and number of repeats (simulations)
+### In the paper we used a horizon of 10000 and 10000 simulations
+horizon            <- 10000
+simulations        <- 10
+
+### Set TBL specific parameters
+J <- matrix(c(5, 4, -4), nrow=1, ncol=3, byrow = TRUE)
+err <- 1
+precision_list <- list(matrix(diag(c(0.01,0.01,0.02)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(0.1,0.1,0.2)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(0.4,0.4,1)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(1,1,2)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(10,10,20)), nrow=3, ncol=3, byrow = TRUE))
+
+### Set up two different bandits
+bandits <- c(ContinuumBanditUnimodal$new(), ContinuumBanditBimodal$new())
+
+### Set up all agents with different prior precisions and run them for each bandit
+for (bandit in bandits){
+  
+  agents <- list()
+  
+  for (prec in precision_list){
+    agents <- append(agents, Agent$new(ThompsonBayesianLinearPolicy$new(J = J, P = prec, err = err), bandit))
+  }
+  
+  history            <- Simulator$new(agents      = agents,
+                                      horizon     = horizon,
+                                      simulations = simulations,
+                                      do_parallel = TRUE,
+                                      save_interval = 10)$run()
+  
+  ### Post-processing for plotting
+  iters <- length(precision_list)
+  reward_rate <- c()
+  confs <- c()
+  
+  for(i in 1:iters){
+    reward_rate[[i]] <- history$cumulative[[i]]$cum_reward_rate
+    confs[[i]] <- history$cumulative[[i]]$cum_reward_rate_sd / sqrt(simulations) * qnorm(0.975)
+  }
+  
+  df <- data.frame(prec = 1:length(precision_list), reward = reward_rate, ci = confs, delta = rep("Online"))
+  g <- ggplot(data = df, aes(x=prec, y=reward)) +
+    geom_line(colour = 'red') +
+    geom_errorbar(aes(ymin=reward-ci, ymax=reward+ci, color='red'), width=.2) +
+    labs(x = "Prior precision", y = "Average reward per interaction") +
+    scale_x_continuous(breaks = c(1,6), labels=c("Low", "High")) +
+    theme_bw(base_size = 15) + 
+    theme(legend.position = "none")
+  
+  ### Saving data to use later
+  if (bandit$class_name == "ContinuumBanditBimodal"){
+    online_tbl_bimodal <- df
+  } else if (bandit$class_name == "ContinuumBanditUnimodal"){
+    online_tbl_unimodal <- df
+  }
+  
+  print(g)
+  print(precision_list[[which.max(reward_rate)]])
+}
+
+#############################################################
+#                                                           #
+#  Offline evaluation for TBL                               #
+#                                                           #
+#############################################################
+
+### Set seed
+set.seed(1)
+
+### Set number of interactions (horizon) and number of repeats (simulations)
+### Typically same as in online evaluation
+horizon            <- 1000
+simulations        <- 2
+
+### Set TBL specific parameters
+### Typically same as in online evaluation
+J <- matrix(c(5, 4, -4), nrow=1, ncol=3, byrow = TRUE)
+err <- 1
+precision_list <- list(matrix(diag(c(0.01,0.01,0.02)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(0.1,0.1,0.2)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(0.4,0.4,1)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(1,1,2)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
+                       matrix(diag(c(10,10,20)), nrow=3, ncol=3, byrow = TRUE))
+
+### Set up functions to make offline dataset
+unimodal_data    <- function(x) {
+  c1 <- runif(1, 0.25, 0.75)
+  c2 <- 1
+  return(-(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01))
+}
+
+bimodal_data <- function(x){
+  mu1 <- runif(1, 0.15, 0.2)
+  sd1 <- runif(1, 0.1, 0.15)
+  mu2 <- runif(1, 0.7, 0.85)
+  sd2 <- runif(1, 0.1, 0.15)
+  y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
+  y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
+  return(y1 + y2 + rnorm(length(x), 0, 0.01))
+}
+
+functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data))
+
+### Set up different delta's for the delta and kernel method. If delta = 0 we resort to the kernel method.
+deltas <- c(0, 0.01, 0.1, 0.5)
+
+### Pre-allocation
+offline_tbl_unimodal_kernel <- data.frame()
+offline_tbl_unimodal <- data.frame()
+offline_tbl_bimodal_kernel <- data.frame()
+offline_tbl_bimodal <- data.frame()
+
+### Set up all agents with different prior precisions and run them for each bandit
+### Do this for each specified delta
+for (f in functions){
+  for (d in deltas){
+    if(d == 0){
+      bandit             <- OnlineOfflineContinuumBanditKernel$new(FUN = f[[2]], horizon = horizon)
+    } else {
+      bandit             <- OnlineOfflineContinuumBandit$new(FUN = f[[2]], delta = d, horizon = horizon)
+    }
+    
+    agents <- list()
+    
+    for (prec in precision_list){
+      agents <- append(agents, Agent$new(ThompsonBayesianLinearPolicy$new(J = J, P = prec, err = err), bandit))
+    }
+    
+    history            <- Simulator$new(agents      = agents,
+                                        horizon     = horizon,
+                                        simulations = simulations,
+                                        policy_time_loop = FALSE,
+                                        save_interval = 20)$run()
+    
+    ### Post-processing for plotting
+    iters <- length(precision_list)
+    reward_rate <- c()
+    confs <- c()
+    
+    for(k in 1:iters){
+      reward_rate[[k]] <- history$cumulative[[k]]$cum_reward_rate
+      dt <- history$get_data_table()
+      df_split <- split(dt, dt$agent)
+      for(dd in df_split){
+        dd <- as.data.table(dd)
+        maxes <- dd[, .I[which.max(t)], by=sim]$V1
+        select <- dd[maxes]$cum_reward_rate
+        confs[[k]] <- sd(select) / sqrt(simulations) * qnorm(0.975)
+      }
+    }
+    
+    history$clear_data_table()
+    if (d == 0){
+      df <- data.frame(prec = 1:length(precision_list), reward = reward_rate, delta = as.factor(rep("Kernel")), ci = confs)
+    } else {
+      df <- data.frame(prec = 1:length(precision_list), reward = reward_rate, delta = as.factor(rep(d)), ci = confs)
+    }
+    
+    if (f[[1]] == "bimodal"){
+      if (d == 0){
+        offline_tbl_bimodal_kernel <- rbind(offline_tbl_bimodal_kernel, df)
+      } else {
+        offline_tbl_bimodal <- rbind(offline_tbl_bimodal, df)
+      }
+    } else if(f[[1]] == "unimodal"){
+      if (d == 0){
+        offline_tbl_unimodal_kernel <- rbind(offline_tbl_unimodal_kernel, df)
+      } else {
+        offline_tbl_unimodal <- rbind(offline_tbl_unimodal, df)
+      }
+    }
+  }
+}
+
+### Plotting both online and offline data together
+different_plots <- list(
+  list("unimodal", rbind(online_tbl_unimodal, offline_tbl_unimodal)),
+  list("unimodal_kernel", offline_tbl_unimodal_kernel),
+  list("bimodal", rbind(online_tbl_bimodal, offline_tbl_bimodal)),
+  list("bimodal_kernel", offline_tbl_bimodal_kernel)
+)
+
+for (dif_plot in different_plots){
+  if(dif_plot[[1]] == "unimodal_kernel"){
+    g <- ggplot(data = dif_plot[[2]], aes(x=prec, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.2) +
+      geom_vline(xintercept = 4, linetype = "dotted", color = "black", size = 1.5) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Prior precision", y = "Average reward per interaction", color="", fill="") +
+      scale_x_continuous(breaks = c(1,6), labels=c("Low", "High")) +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_tbl_function_",dif_plot[[1]],".eps"), device="eps")
+    print(g)
+  } else if(dif_plot[[1]] == "bimodal_kernel"){
+    g <- ggplot(data = dif_plot[[2]], aes(x=prec, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.2) +
+      geom_vline(xintercept = 4, linetype = "dotted", color = "black", size = 1.5) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Prior precision", y = "Average reward per interaction", color="", fill="") +
+      scale_x_continuous(breaks = c(1,6), labels=c("Low", "High")) +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_tbl_function_",dif_plot[[1]],".eps"), device="eps")
+    print(g)
+  } else if(dif_plot[[1]] == "unimodal" || dif_plot[[1]] == "bimodal") {
+    g <- ggplot(data = dif_plot[[2]], aes(x=prec, y=reward, label = delta)) +
+      geom_line(aes(colour = as.factor(delta))) +
+      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.2) +
+      theme(legend.position = "right") + #"none"
+      labs(x = "Prior precision", y = "Average reward per interaction", color="", fill="") +
+      scale_x_continuous(breaks = c(1,6), labels=c("Low", "High")) +
+      theme_bw(base_size = 15)
+    ggsave(g, file=paste0("offline_tbl_function_",dif_plot[[1]],"_delta.eps"), device="eps")
+    print(g)
+  }
+}
\ No newline at end of file
diff --git a/policy_cont_lif_randstart.R b/policy_cont_lif_randstart.R
new file mode 100644
index 0000000..c15c466
--- /dev/null
+++ b/policy_cont_lif_randstart.R
@@ -0,0 +1,96 @@
+#' @export
+LifPolicyRandstart <- R6::R6Class(
+  portable = FALSE,
+  class = FALSE,
+  inherit = Policy,
+  public = list(
+    first = NULL,
+    inttime = NULL,                     # Integration time
+    amplitude = NULL,                   # Amplitude
+    learnrate = NULL,                   # Learnrate
+    omega = NULL,                       # Omega
+    x0_start = NULL,                    # x0 start value
+    class_name = "LifPolicyRandstart",
+    initialize = function(inttime,amplitude,learnrate,omega) {
+      super$initialize()
+      self$inttime   <- inttime
+      self$amplitude <- amplitude
+      self$learnrate <- learnrate
+      self$omega     <- omega
+    },
+    post_initialization = function(){
+      self$x0_start <- runif(1, min = 0.7, max = 1)
+      self$theta <- list('x0' = x0_start, 'Y' = rep(NA, inttime), 't_real' = 1)
+    },
+    set_parameters = function(context_params) {
+    },
+    get_action = function(t, context) {
+      action$choice <- self$theta$x0 + amplitude*cos(omega * self$theta$t_real)
+      if(action$choice < 0){
+        action$choice <- 0
+      }else if(action$choice > 1){
+        action$choice <- 1
+      }
+      action
+    },
+    set_reward = function(t, context, action, reward) {
+      reward   <- reward$reward
+      y <- amplitude*cos(omega * self$theta$t_real)*reward
+      self$theta$Y <- c(y, self$theta$Y)[seq_along(self$theta$Y)]
+      if (self$theta$t_real > inttime)
+        self$theta$x0 <- self$theta$x0 + learnrate * sum( self$theta$Y ) / inttime
+      self$theta$t_real <- self$theta$t_real + 1
+      self$theta
+    }
+  )
+)
+
+#' Policy: Continuum Bandit Policy with Lock-in Feedback
+#'
+#' The continuum type Lock-in Feedback (LiF) policy is based on an approach used in physics and engineering,
+#' where, if a physical variable y depends on the value of a well controllable physical variable x,
+#' the search for argmax x f(x) can be solved via what is nowadays considered as standard electronics.
+#' This approach relies on the possibility of making the variable x oscillate at a fixed frequency and to
+#' look at the response of the dependent variable y at the very same frequency by means of a lock-in
+#' amplifier. The method is particularly suitable when y is immersed in a high noise level,
+#' where other more direct methods would fail. Furthermore, should the entire curve
+#' shift (or, in other words, if argmax x f(x) changes in time, also known as concept drift), the circuit
+#' will automatically adjust to the new situation and quickly reveal the new maximum position.
+#' This approach is widely used in a very large number of applications, both in industry and research,
+#' and is the basis for the Lock-in Feedback (LiF) method.
+#'
+#' In this, Lock in feedback goes through the following steps, again and again:
+#'
+#' * Oscillate a controllable independent variable X around a set value at a fixed pace.
+#' * Apply the Lock-in amplifier algorithm.to obtain values of the amplitude if the outcome variable Y at
+#' the pace you set at step 1.
+#' * Is the amplitude of this variable zero? Congratulations, you have reached lock-in!
+#' That is, you have found the optimal value of Y at the current value of X.
+#' Still, this optimal value might shift over time, so move to step 1 and repeat the process to make sure we
+#' maintain lock-in.
+#' * Is the amplitude less than, or greater than zero?
+#' Then move the set value around which we are oscillating our independent variable X up or down on the basis
+#' of the outcome.
+#'
+#' Now move to step 1 and repeat..
+#'
+#' @name LifPolicy
+#'
+#' @section Usage:
+#' \preformatted{b <- LifPolicy$new(inttime,amplitude,learnrate,omega,x0_start)}
+#'
+#' @references
+#' Kaptein, M. C., Van Emden, R., & Iannuzzi, D. (2016). Tracking the decoy: maximizing the decoy effect
+#' through sequential experimentation. Palgrave Communications, 2, 16082.
+#'
+#' @seealso
+#'
+#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
+#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
+#'
+#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
+#' \code{\link{OfflineReplayEvaluatorBandit}}
+#'
+#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
+NULL
+
diff --git a/policy_tbl.R b/policy_tbl.R
new file mode 100644
index 0000000..238d003
--- /dev/null
+++ b/policy_tbl.R
@@ -0,0 +1,42 @@
+ThompsonBayesianLinearPolicy <- R6::R6Class(
+  portable = FALSE,
+  class = FALSE,
+  inherit = Policy,
+  public = list(
+    class_name = "ThompsonBayesianLinearPolicy",
+    J = NULL,
+    P = NULL,
+    err = NULL,
+    initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
+                          P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
+                          err=1) {
+      super$initialize()
+      self$J <- J
+      self$P <- P
+      self$err <- err
+    },
+    set_parameters = function(context_params) {
+      self$theta <- list('J' = self$J, 'P' = self$P, 'err' = self$err)
+    },
+    get_action = function(t, context) {
+      sigma <- solve(self$theta$P, tol = 1e-200)
+      mu <- sigma %*% matrix(self$theta$J)
+      betas <- contextual::mvrnorm(n = 1, mu, sigma)
+      action$choice <- -(betas[2] / (2*betas[3]))
+      if(action$choice > 1){
+        action$choice <- 1
+      } else if(action$choice < 0) {
+        action$choice <- 0
+      }
+      action
+    },
+    set_reward = function(t, context, action, reward) {
+      y <- reward$reward
+      x <- action$choice
+      x <- matrix(c(1,x,x^2), nrow = 1, ncol = 3, byrow = TRUE)
+      self$theta$J <- (x*y)/self$theta$err + self$theta$J
+      self$theta$P <- t(x)%*%x + self$theta$P
+      self$theta
+    }
+  )
+)