-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
973 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
# offline-parameter-tuning | ||
Code for the offline paramater tuning paper (submitted to IDA 2020) | ||
|
||
For the replications of the plots, see demo_lif_bandit.R and demo_tbl_bandit.R |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#' @export | ||
library("truncnorm") | ||
ContinuumBanditBimodal <- R6::R6Class( | ||
inherit = Bandit, | ||
class = FALSE, | ||
public = list( | ||
arm_function = NULL, | ||
mu1 = NULL, | ||
sd1 = NULL, | ||
mu2 = NULL, | ||
sd2 = NULL, | ||
class_name = "ContinuumBanditBimodal", | ||
initialize = function() { | ||
self$arm_function <- function(x, mu1, sd1, mu2, sd2) { | ||
y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1) | ||
y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2) | ||
return(y1 + y2 + rnorm(length(x), 0, 0.01)) | ||
} | ||
super$initialize() | ||
self$d <- 1 | ||
self$k <- 1 | ||
}, | ||
post_initialization = function(){ | ||
self$mu1 <- runif(1, 0.15, 0.35) | ||
self$sd1 <- runif(1, 0.1, 0.2) | ||
self$mu2 <- runif(1, 0.65, 0.85) | ||
self$sd2 <- runif(1, 0.1, 0.2) | ||
}, | ||
get_context = function(t) { | ||
context <- list() | ||
context$k <- self$k | ||
context$d <- self$d | ||
context | ||
}, | ||
get_reward = function(t, context, action) { | ||
reward <- list( | ||
reward = self$arm_function(action$choice, self$mu1, self$sd1, self$mu2, self$sd2), | ||
optimal_reward = self$mu2 | ||
) | ||
} | ||
) | ||
) | ||
|
||
#' Bandit: ContinuumBandit | ||
#' | ||
#' A function based continuum multi-armed bandit | ||
#' where arms are chosen from a subset of the real line and the mean rewards | ||
#' are assumed to be a continuous function of the arms. | ||
#' | ||
#' @section Usage: | ||
#' \preformatted{ | ||
#' bandit <- ContinuumBandit$new(FUN) | ||
#' } | ||
#' | ||
#' @name ContinuumBandit | ||
#' | ||
#' | ||
#' @section Arguments: | ||
#' \describe{ | ||
#' \item{FUN}{continuous function.} | ||
#' } | ||
#' | ||
#' @section Methods: | ||
#' | ||
#' \describe{ | ||
#' | ||
#' \item{\code{new(FUN)}}{ generates and instantializes a new \code{ContinuumBandit} instance. } | ||
#' | ||
#' \item{\code{get_context(t)}}{ | ||
#' argument: | ||
#' \itemize{ | ||
#' \item \code{t}: integer, time step \code{t}. | ||
#' } | ||
#' returns a named \code{list} | ||
#' containing the current \code{d x k} dimensional matrix \code{context$X}, | ||
#' the number of arms \code{context$k} and the number of features \code{context$d}. | ||
#' } | ||
#' | ||
#' \item{\code{get_reward(t, context, action)}}{ | ||
#' arguments: | ||
#' \itemize{ | ||
#' \item \code{t}: integer, time step \code{t}. | ||
#' \item \code{context}: list, containing the current \code{context$X} (d x k context matrix), | ||
#' \code{context$k} (number of arms) and \code{context$d} (number of context features) | ||
#' (as set by \code{bandit}). | ||
#' \item \code{action}: list, containing \code{action$choice} (as set by \code{policy}). | ||
#' } | ||
#' returns a named \code{list} containing \code{reward$reward} and, where computable, | ||
#' \code{reward$optimal} (used by "oracle" policies and to calculate regret). | ||
#' } | ||
# | ||
#' } | ||
#' | ||
#' @seealso | ||
#' | ||
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, | ||
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} | ||
#' | ||
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, | ||
#' \code{\link{OfflineReplayEvaluatorBandit}} | ||
#' | ||
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' | ||
#' horizon <- 1500 | ||
#' simulations <- 100 | ||
#' | ||
#' continuous_arms <- function(x) { | ||
#' -0.1*(x - 5) ^ 2 + 3.5 + rnorm(length(x),0,0.4) | ||
#' } | ||
#' | ||
#' int_time <- 100 | ||
#' amplitude <- 0.2 | ||
#' learn_rate <- 0.3 | ||
#' omega <- 2*pi/int_time | ||
#' x0_start <- 2.0 | ||
#' | ||
#' policy <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start) | ||
#' | ||
#' bandit <- ContinuumBandit$new(FUN = continuous_arms) | ||
#' | ||
#' agent <- Agent$new(policy,bandit) | ||
#' | ||
#' history <- Simulator$new( agents = agent, | ||
#' horizon = horizon, | ||
#' simulations = simulations, | ||
#' save_theta = TRUE )$run() | ||
#' | ||
#' plot(history, type = "average", regret = FALSE) | ||
#' } | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#' @export | ||
ContinuumBanditUnimodal <- R6::R6Class( | ||
inherit = Bandit, | ||
class = FALSE, | ||
public = list( | ||
arm_function = NULL, | ||
c1 = NULL, | ||
c2 = NULL, | ||
class_name = "ContinuumBanditUnimodal", | ||
initialize = function() { | ||
self$c2 <- 1 | ||
self$arm_function <- function(x, c1 = 0.25, c2 = 0.75) { | ||
-(x - c1) ^ 2 + c2 + rnorm(length(x), 0, 0.01) | ||
} | ||
super$initialize() | ||
self$d <- 1 | ||
self$k <- 1 | ||
}, | ||
post_initialization = function(){ | ||
self$c1 <- runif(1,0.25,0.75) | ||
}, | ||
get_context = function(t) { | ||
context <- list() | ||
context$k <- self$k | ||
context$d <- self$d | ||
context | ||
}, | ||
get_reward = function(t, context, action) { | ||
reward <- list( | ||
reward = self$arm_function(action$choice, self$c1, self$c2), | ||
optimal_reward = self$c2 | ||
) | ||
} | ||
) | ||
) | ||
|
||
#' Bandit: ContinuumBandit | ||
#' | ||
#' A function based continuum multi-armed bandit | ||
#' where arms are chosen from a subset of the real line and the mean rewards | ||
#' are assumed to be a continuous function of the arms. | ||
#' | ||
#' @section Usage: | ||
#' \preformatted{ | ||
#' bandit <- ContinuumBandit$new(FUN) | ||
#' } | ||
#' | ||
#' @name ContinuumBandit | ||
#' | ||
#' | ||
#' @section Arguments: | ||
#' \describe{ | ||
#' \item{FUN}{continuous function.} | ||
#' } | ||
#' | ||
#' @section Methods: | ||
#' | ||
#' \describe{ | ||
#' | ||
#' \item{\code{new(FUN)}}{ generates and instantializes a new \code{ContinuumBandit} instance. } | ||
#' | ||
#' \item{\code{get_context(t)}}{ | ||
#' argument: | ||
#' \itemize{ | ||
#' \item \code{t}: integer, time step \code{t}. | ||
#' } | ||
#' returns a named \code{list} | ||
#' containing the current \code{d x k} dimensional matrix \code{context$X}, | ||
#' the number of arms \code{context$k} and the number of features \code{context$d}. | ||
#' } | ||
#' | ||
#' \item{\code{get_reward(t, context, action)}}{ | ||
#' arguments: | ||
#' \itemize{ | ||
#' \item \code{t}: integer, time step \code{t}. | ||
#' \item \code{context}: list, containing the current \code{context$X} (d x k context matrix), | ||
#' \code{context$k} (number of arms) and \code{context$d} (number of context features) | ||
#' (as set by \code{bandit}). | ||
#' \item \code{action}: list, containing \code{action$choice} (as set by \code{policy}). | ||
#' } | ||
#' returns a named \code{list} containing \code{reward$reward} and, where computable, | ||
#' \code{reward$optimal} (used by "oracle" policies and to calculate regret). | ||
#' } | ||
# | ||
#' } | ||
#' | ||
#' @seealso | ||
#' | ||
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, | ||
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} | ||
#' | ||
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, | ||
#' \code{\link{OfflineReplayEvaluatorBandit}} | ||
#' | ||
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' | ||
#' horizon <- 1500 | ||
#' simulations <- 100 | ||
#' | ||
#' continuous_arms <- function(x) { | ||
#' -0.1*(x - 5) ^ 2 + 3.5 + rnorm(length(x),0,0.4) | ||
#' } | ||
#' | ||
#' int_time <- 100 | ||
#' amplitude <- 0.2 | ||
#' learn_rate <- 0.3 | ||
#' omega <- 2*pi/int_time | ||
#' x0_start <- 2.0 | ||
#' | ||
#' policy <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start) | ||
#' | ||
#' bandit <- ContinuumBandit$new(FUN = continuous_arms) | ||
#' | ||
#' agent <- Agent$new(policy,bandit) | ||
#' | ||
#' history <- Simulator$new( agents = agent, | ||
#' horizon = horizon, | ||
#' simulations = simulations, | ||
#' save_theta = TRUE )$run() | ||
#' | ||
#' plot(history, type = "average", regret = FALSE) | ||
#' } | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#' @export | ||
OnlineOfflineContinuumBandit <- R6::R6Class( | ||
inherit = Bandit, | ||
class = FALSE, | ||
private = list( | ||
S = NULL | ||
), | ||
public = list( | ||
class_name = "OnlineOfflineContinuumBandit", | ||
delta = NULL, | ||
horizon = NULL, | ||
choice = NULL, | ||
arm_function = NULL, | ||
initialize = function(FUN, delta, horizon) { | ||
self$arm_function <- FUN | ||
self$horizon <- horizon | ||
self$delta <- delta | ||
self$k <- 1 | ||
}, | ||
post_initialization = function() { | ||
self$choice <- runif(self$horizon, min=0, max=1) | ||
private$S <- data.frame(self$choice, self$arm_function(self$choice)) | ||
private$S <- private$S[sample(nrow(private$S)),] | ||
colnames(private$S) <- c('choice', 'reward') | ||
}, | ||
get_context = function(index) { | ||
context <- list() | ||
context$k <- self$k | ||
context | ||
}, | ||
get_reward = function(index, context, action) { | ||
reward_at_index <- as.double(private$S$reward[[index]]) | ||
if (abs(private$S$choice[[index]] - action$choice) < self$delta) { | ||
reward <- list( | ||
reward = reward_at_index | ||
) | ||
} else { | ||
NULL | ||
} | ||
} | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#' @export | ||
OnlineOfflineContinuumBanditKernel <- R6::R6Class( | ||
inherit = Bandit, | ||
class = FALSE, | ||
private = list( | ||
S = NULL, | ||
n = NULL | ||
), | ||
public = list( | ||
class_name = "OnlineOfflineContinuumBanditKernel", | ||
delta = NULL, | ||
c1 = NULL, | ||
c2 = NULL, | ||
arm_function = NULL, | ||
choice = NULL, | ||
h = NULL, | ||
kernel = NULL, | ||
horizon = NULL, | ||
initialize = function(FUN, horizon) { | ||
self$arm_function <- FUN | ||
self$k <- 1 | ||
self$horizon <- horizon | ||
self$h <- horizon^(-1/5) | ||
self$kernel <- function(action_true, action_choice, bandwith){ 1/sqrt(2*pi)*exp(((action_choice - action_true) / bandwith)^2/2) } | ||
}, | ||
post_initialization = function() { | ||
self$choice <- runif(self$horizon, min=0, max=1) | ||
private$S <- data.frame(self$choice, self$arm_function(self$choice)) | ||
private$S <- private$S[sample(nrow(private$S)),] | ||
colnames(private$S) <- c('choice', 'reward') | ||
private$n <- 0 | ||
}, | ||
get_context = function(index) { | ||
context <- list() | ||
context$k <- self$k | ||
context | ||
}, | ||
get_reward = function(index, context, action) { | ||
reward_at_index <- as.double(private$S$reward[[index]]) | ||
#kern_value <- self$kernel(action_true = private$S$choice[[index]], action_choice = action$choice, bandwith = self$h) | ||
temp_u <- (action$choice - private$S$choice[[index]]) / self$h | ||
kern_value <- 1/sqrt(2*pi) * exp(-temp_u^2 / 2) | ||
#inc(private$n) <- 1 | ||
#print(paste0("Kernel value: ", kern_value, "action choice: ", action$choice, "true action: ", private$S$choice[[index]], "divy: ", temp_u)) | ||
reward <- list( | ||
reward = (kern_value * reward_at_index), | ||
optimal_reward = self$c2 | ||
) | ||
} | ||
) | ||
) |
Oops, something went wrong.