-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbandit_continuum_function_bimodal.R
136 lines (134 loc) · 4.26 KB
/
bandit_continuum_function_bimodal.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#' @export
library("truncnorm")
ContinuumBanditBimodal <- R6::R6Class(
inherit = Bandit,
class = FALSE,
public = list(
arm_function = NULL,
mu1 = NULL,
sd1 = NULL,
mu2 = NULL,
sd2 = NULL,
class_name = "ContinuumBanditBimodal",
initialize = function() {
self$arm_function <- function(x, mu1, sd1, mu2, sd2) {
y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
return(y1 + y2 + rnorm(length(x), 0, 0.01))
}
super$initialize()
self$d <- 1
self$k <- 1
},
post_initialization = function(){
self$mu1 <- runif(1, 0.15, 0.35)
self$sd1 <- runif(1, 0.1, 0.2)
self$mu2 <- runif(1, 0.65, 0.85)
self$sd2 <- runif(1, 0.1, 0.2)
},
get_context = function(t) {
context <- list()
context$k <- self$k
context$d <- self$d
context
},
get_reward = function(t, context, action) {
reward <- self$arm_function(action$choice, self$mu1, self$sd1, self$mu2, self$sd2)
optimal_reward <- self$arm_function(self$mu2, self$mu1, self$sd1, self$mu2, self$sd2)
reward <- list(
reward = reward,
optimal_reward = optimal_reward
)
}
)
)
#' Bandit: ContinuumBandit
#'
#' A function based continuum multi-armed bandit
#' where arms are chosen from a subset of the real line and the mean rewards
#' are assumed to be a continuous function of the arms.
#'
#' @section Usage:
#' \preformatted{
#' bandit <- ContinuumBandit$new(FUN)
#' }
#'
#' @name ContinuumBandit
#'
#'
#' @section Arguments:
#' \describe{
#' \item{FUN}{continuous function.}
#' }
#'
#' @section Methods:
#'
#' \describe{
#'
#' \item{\code{new(FUN)}}{ generates and instantializes a new \code{ContinuumBandit} instance. }
#'
#' \item{\code{get_context(t)}}{
#' argument:
#' \itemize{
#' \item \code{t}: integer, time step \code{t}.
#' }
#' returns a named \code{list}
#' containing the current \code{d x k} dimensional matrix \code{context$X},
#' the number of arms \code{context$k} and the number of features \code{context$d}.
#' }
#'
#' \item{\code{get_reward(t, context, action)}}{
#' arguments:
#' \itemize{
#' \item \code{t}: integer, time step \code{t}.
#' \item \code{context}: list, containing the current \code{context$X} (d x k context matrix),
#' \code{context$k} (number of arms) and \code{context$d} (number of context features)
#' (as set by \code{bandit}).
#' \item \code{action}: list, containing \code{action$choice} (as set by \code{policy}).
#' }
#' returns a named \code{list} containing \code{reward$reward} and, where computable,
#' \code{reward$optimal} (used by "oracle" policies and to calculate regret).
#' }
#
#' }
#'
#' @seealso
#'
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
#'
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
#' \code{\link{OfflineReplayEvaluatorBandit}}
#'
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
#'
#' @examples
#' \dontrun{
#'
#' horizon <- 1500
#' simulations <- 100
#'
#' continuous_arms <- function(x) {
#' -0.1*(x - 5) ^ 2 + 3.5 + rnorm(length(x),0,0.4)
#' }
#'
#' int_time <- 100
#' amplitude <- 0.2
#' learn_rate <- 0.3
#' omega <- 2*pi/int_time
#' x0_start <- 2.0
#'
#' policy <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start)
#'
#' bandit <- ContinuumBandit$new(FUN = continuous_arms)
#'
#' agent <- Agent$new(policy,bandit)
#'
#' history <- Simulator$new( agents = agent,
#' horizon = horizon,
#' simulations = simulations,
#' save_theta = TRUE )$run()
#'
#' plot(history, type = "average", regret = FALSE)
#' }
NULL