diff --git a/Arms.py b/Arms.py new file mode 100644 index 0000000..24a922f --- /dev/null +++ b/Arms.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +""" + +__author__ = "Wenbo Wang" + +#from random import random +from numpy.random import random as nprandom + +import scipy.stats as stats + +class Arm(object): + """ Base class for an arm class.""" + + def __init__(self, param): + """ Base class for an arm class.""" + self.lower = param["lower_val"] #: Lower value of rewardd, array[context] + self.upper = param["upper_val"] #: Upper value of rewards + self.amplitude = self.upper - self.lower #: Amplitude of value of rewards + + # for arm of a specific context-player + self.context = param["context"] + self.playerID = param["playerID"] + self.armID = param["armID"] + + # prepare samples + self.horizon = 0 + self.prepared_samples = [] + + # --- Printing + + # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property + @property + def lower_amplitude(self): + """(lower, amplitude)""" + if hasattr(self, 'lower') and hasattr(self, 'amplitude'): + return self.lower, self.amplitude + else: + raise NotImplementedError("This method lower_amplitude() has to be implemented in the class inheriting from Arm.") + + @property + def current_context(self): + """(lower, amplitude)""" + if hasattr(self, 'context_set'): + return self.context + else: + raise NotImplementedError("This method current_context() has to be implemented in the class inheriting from Arm.") + + # --- Printing + + def __str__(self): + return self.__class__.__name__ + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, self.__dir__) + + # --- Random samples + + def draw_sample(self, t=None): + """ Draw one random sample.""" + raise NotImplementedError("This method draw_sample(t) has to be implemented in the class inheriting from Arm.") + + def prepare_samples(self, horizon): + raise NotImplementedError("This method prepare_samples(horizon) has to be implemented in the class inheriting from Arm.") + +""" +Uniform distribution arms +""" +class UniformArm(Arm): + """ Uniformly distributed arm, default in [0, 1], + """ + + def __init__(self, param): + """New arm.""" + self.lower = param["lower_val"] #: Lower value of rewardd, array[context] + self.upper = param["upper_val"] #: Upper value of rewards + self.amplitude = self.upper - self.lower #: Amplitude of value of rewards + if self.amplitude <= 0: + raise Exception("The upper bound must be larger than the lower bound") + + self.mean = (self.lower + self.upper) / 2.0 #: Mean for this UniformArm arm + self.variance = self.amplitude**2 / 12.0 #: Variance for ths UniformArm arm + + self.context = param["context"] + self.playerID = param["playerID"] + self.armID = param["armID"] + + # prepare samples + self.horizon = 0 + self.prepared_samples = [] + + # --- Random samples + + def draw_sample(self, context, t=None): + """ Draw one random sample.""" + if self.context != context: + raise Exception("the arm corresponding to a different context is called") + + if t is None: + # The parameter t is ignored in this Arm. Do sampling right away. + return self.lower + (nprandom() * self.amplitude) + else: + if t >= self.horizon: + raise Exception("the time instance is beyond the horizon") + else: + return self.prepared_samples[t] + + def prepare_samples(self, horizon): + if horizon <= 0: + raise Exception("the input horizon is invalid") + else: + self.horizon = horizon + self.prepared_samples = self.lower + (nprandom(self.horizon) * self.amplitude) + + # --- Printing + + def __str__(self): + return "UniformArm" + + def __repr__(self): + return "U({:.3g}, {:.3g})".format(self.lower, self.upper) + +""" +Gaussian distribution arms +""" +class GaussianArm(Arm): + """ + Gaussian distributed arm, possibly truncated. + - The default setting is to truncate into [0, 1] (so Gaussian.draw() is sampled in [0, 1]). + """ + + def __init__(self, param): + """New arm.""" + self.mu = param["mu"] + if "sigma" not in param.keys(): + self.sigma = 0.05 + else: + self.sigma = param["sigma"] + assert self.sigma > 0, "The parameter 'sigma' for a Gaussian arm has to be > 0." + + self.lower = 0# used to truncate the sampled value + self.upper = 1# used to truncate the sampled value + + # For the trunctated normal distribution, see: + # "Simulation of truncated normal variables", https://arxiv.org/pdf/0907.4010.pdf + # Section "Two-sided truncated normal distribution" + + alpha = (self.lower - self.mu) / self.sigma + beta = (self.upper - self.mu) / self.sigma + + self.sampler = stats.truncnorm(alpha, beta, loc=self.mu, scale=self.sigma) + + self.mean, self.variance = self.sampler.stats(moments='mv') + + + self.context = param["context"] + self.playerID = param["playerID"] + self.armID = param["armID"] + + + # --- Random samples + + def draw_sample(self, context, t=None): + """ + Draw one random sample. The parameter t is ignored in this Arm. + """ + if self.context != context: + raise Exception("the arm corresponding to a different context is called") + + if t is None: + # The parameter t is ignored in this Arm. Do sampling right away. + return self.sampler.rvs(1) + else: + if t >= self.horizon: + raise Exception("the time instance is beyond the horizon") + else: + return self.prepared_samples[t] + + def prepare_samples(self, horizon): + """ + The runcated normal distribution takes a lot more time for giving a single sample each time + We could pre-sample an array and then retrive them with index of t. + """ + if horizon <= 0: + raise Exception("the input horizon is invalid") + else: + self.horizon = horizon + self.prepared_samples = self.sampler.rvs(self.horizon) + + # --- Printing + def __str__(self): + return "Gaussian" + + def __repr__(self): + return "N({:.3g}, {:.3g})".format(self.mean, self.sigma) + +""" +Other types of distribution should be implemented here. +""" + +if __name__ == '__main__': + print("Warning: this script 'Arms.py' is NOT executable..") # DEBUG + exit(0) + +__all__ = ["UniformArm", "GaussianArm"] \ No newline at end of file diff --git a/GameEvaluator.py b/GameEvaluator.py new file mode 100644 index 0000000..5d0a734 --- /dev/null +++ b/GameEvaluator.py @@ -0,0 +1,829 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. +""" + +# This file defines the evaluation and virtualization mechanisms of the simulations. +# class AlgEvaluator +# +# For each simulation there are two versions: single-process and multi-process (parallel). +# Note that the parallel version is usually 2X to 4X faster than the single-process version, depending on the +# simulation configuration. However, it is at the cost of consuming the same folds of memory. +# It may use up the machine memory and result in a program collapse when the horizon/player nunber/arm number +# is too large + +__author__ = "Wenbo Wang" + +from tqdm import tqdm +import multiprocessing as mp +import numpy as np + +#import environemnt generators +from MPMAB import MP_MAB +from HetNetSimulator import HomeBrewedHetNetEnv + +#import algorithms +from MABAlgorithms import Hungarian, StaticHungarian, MusicalChairs, TrialandError, GameofThrone +from MABAlgorithms2 import SOC + +from loggingutils import info_logger + +# result recorder +from PlayResult import ResultMultiPlayers + +if __name__ == '__main__': + print("Warning: this script 'GameEvaluator.py' is NOT executable..") # DEBUG + exit(0) + +class AlgEvaluator: + def __init__(self, configuration): + self.horizon = configuration['horizon'] + + self.nbArms = configuration['arm number'] + self.nbPlayers = configuration['player number'] + + self.context_set = configuration['context set'] + self.nbContext = len(self.context_set) + + # for loaded values or when calling the prepare() methods, set true + self.flag_pre_prepare = False + self.flag_simulation_done = False + + # we only have a unique bandit game, but may have more than one algorithms + self.mp_mab_env = None + # to be extended + if configuration['env_type'] == 'uniform': + self.mp_mab_env = MP_MAB.uniform_mab(self.context_set, self.nbArms, self.nbPlayers, + dic_lower = configuration['initial data'][0], + dic_upper = configuration['initial data'][1]) + + # 'context probabilites' is used for a differernt purpose in HetNet simulator + if 'context probabilites' in configuration.keys(): + # set arbitrary probabilities for discrete context distribution + context_probabilites = configuration['context probabilites'] + self.mp_mab_env.set_discrete_context_prob(context_probabilites) + elif configuration['env_type'] == 'gaussian': + self.mp_mab_env = MP_MAB.uniform_mab(self.context_set, self.nbArms, self.nbPlayers, + dic_mean = configuration['initial data'][0], + dic_sigma = configuration['initial data'][1]) + + # 'context probabilites' is used for a differernt purpose in HetNet simulator + if 'context probabilites' in configuration.keys(): + # set arbitrary probabilities for discrete context distribution + context_probabilites = configuration['context probabilites'] + self.mp_mab_env.set_discrete_context_prob(context_probabilites) + elif configuration['env_type'] == 'HetNet simulator': + hetnet_params = {'enabel mmWave': configuration['enabel mmWave'], + 'horizon': self.horizon, + 'cell range': configuration['cell range'], + 'context_prob': configuration['context_prob'], + 'los_prob': configuration['los_prob'] + } + + self.mp_mab_env = HomeBrewedHetNetEnv.HetNet_mab(self.context_set, self.nbArms, self.nbPlayers, + hetnet_params) +# print("showing UE and MUE positions") #debugging +# self.mp_mab_env.helper_plot_ue_posiiton() #debugging + + elif configuration['env_type'] == 'load data': + #TODO: load the series of arm values from a existing file +# self.flag_pre_prepare = True + pass + + self.algorithms = [] # a list of algorithms + self.result_recorders = [] # a list of result recorder for each algorithm + self.alg_names = [] + + def prepare_arm_samples(self, horizon = None): + if horizon is not None: + self.horizon = horizon + + self.mp_mab_env.prepare_samples(self.horizon) + + self.flag_pre_prepare = True + + def reset_player_number(self, nbPlayer=None): + # it is allowed only to be done after the samples are prepared + if nbPlayer is None or self.flag_pre_prepare == False: + return False + else: + self.nbPlayers = nbPlayer + self.mp_mab_env.nbPlayers = nbPlayer + + return True + + def reset_arm_number(self, nbArm=None): + # it is allowed only be done after the samples are prepared + # we are not goning to change the real record of the arm values + if nbArm is None or self.flag_pre_prepare == False: + return False + else: + self.nbArms = nbArm + self.mp_mab_env.nbArms = nbArm + + return True + + def clear_algorithms(self): + # clear all existing algorithms and their corresponding recorders + self.algorithms = [] + self.result_recorders = [] + self.alg_names = [] + + def add_algorithm(self, algo_type = 'Trial and Error', custome_params=None): + """ Create environments.""" + alg_params = {"nbPlayer": self.nbPlayers, "nbArm": self.nbArms, "context_set": self.context_set} + + #for each algorithm, append a recorder + if algo_type == 'Trial and Error' or algo_type == 'TnE Nonobservable': + #create a trial-and-error algorithm + alg_params["horizon"] = self.horizon + alg_params["c1"] = custome_params["c1"] if custome_params is not None else 100 + alg_params["c2"] = custome_params["c2"] if custome_params is not None else 5 + alg_params["c3"] = custome_params["c3"] if custome_params is not None else 1 + + alg_params["epsilon"] = custome_params["epsilon"] if custome_params is not None else 0.1 + alg_params["delta"] = custome_params["delta"] if custome_params is not None else 2 + + if "alpha11" in custome_params.keys(): + alg_params["alpha11"] = custome_params["alpha11"] + + if "alpha12" in custome_params.keys(): + alg_params["alpha12"] = custome_params["alpha12"] + + if "alpha21" in custome_params.keys(): + alg_params["alpha21"] = custome_params["alpha21"] + + if "alpha22" in custome_params.keys(): + alg_params["alpha22"] = custome_params["alpha22"] + + alg_TnE = TrialandError(alg_params) + + if "observable" in custome_params.keys(): + alg_TnE.set_context_observability(custome_params["observable"]==1) + + self.algorithms.append(alg_TnE) + + if algo_type == 'Trial and Error': + result_TnE = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_TnE) + self.alg_names.append(algo_type) + else: + result_TnE = ResultMultiPlayers('Non-Contextual TnE', + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_TnE) + self.alg_names.append('Non-Contextual TnE') + + elif algo_type == 'Musical Chairs': #str(MusicalChair) + alg_params["horizon"] = self.horizon + # 3000 is hardcoded, as given by the original paper [Rosenski2015] + alg_params["T0"] = custome_params["T0"] if custome_params is not None else 3000 + + alg_MC = MusicalChairs(alg_params) + self.algorithms.append(alg_MC) + + # to record the learning results of alg_MC + result_MC = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_MC) + + self.alg_names.append(algo_type) + + elif algo_type == 'Hungarian': #str(Hungarian) + alg_Hungarian = Hungarian(alg_params) + self.algorithms.append(alg_Hungarian) + + result_hungarian = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_hungarian) + + self.alg_names.append(algo_type) + + elif algo_type == 'Static Hungarian': + game_env = {} + + array_context, array_prob = self.mp_mab_env.get_discrete_context_prob() + alg_params["array_context"] = array_context + alg_params["array_prob"] = array_prob + + for context in self.context_set: + lower, upper, means, variance = self.mp_mab_env.get_param(context) + game_env[context] = means + + alg_params["mean_game_env"] = game_env + + alg_SHungarian = StaticHungarian(alg_params) + self.algorithms.append(alg_SHungarian) + + result_static_hungarian = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_static_hungarian) + + self.alg_names.append(algo_type) + elif algo_type == 'Nonobservable-context Hungarian': + # when the algorithm is not able to observe the context (side information) + # the algorithm provides a optimal result in terms of normal MP-MAB + game_env = {} + game_mean = np.zeros((self.nbPlayers,self.nbArms)) + + array_context, array_prob = self.mp_mab_env.get_discrete_context_prob() + alg_params["array_context"] = array_context + alg_params["array_prob"] = array_prob + + for context_id in range(len(array_context)): + lower, upper, means, variance = self.mp_mab_env.get_param(array_context[context_id]) + game_mean = game_mean + means * array_prob[context_id] + + for context in self.context_set: + lower, upper, means, variance = self.mp_mab_env.get_param(context) + game_env[context] = game_mean + + alg_params["mean_game_env"] = game_env + + alg_SHungarian = StaticHungarian(alg_params) + self.algorithms.append(alg_SHungarian) + + result_static_hungarian = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_static_hungarian) + + self.alg_names.append(algo_type) + elif algo_type == 'Game of Thrones': + alg_params["horizon"] = self.horizon + + alg_params["c1"] = custome_params["c1"] if custome_params is not None else 100 + alg_params["c2"] = custome_params["c2"] if custome_params is not None else 5 + alg_params["c3"] = custome_params["c3"] if custome_params is not None else 1 + + alg_params["epsilon"] = custome_params["epsilon"] if custome_params is not None else 0.1 + alg_params["delta"] = custome_params["delta"] if custome_params is not None else 2 + + alg_GoT = GameofThrone(alg_params) + self.algorithms.append(alg_GoT) + + result_GoT = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_GoT) + + self.alg_names.append(algo_type) + elif algo_type == "SOC": + alg_params["delta"] = custome_params["delta"] if custome_params is not None else 0.1 + + alg_SOC = SOC(alg_params) + self.algorithms.append(alg_SOC) + + result_GoT = ResultMultiPlayers(algo_type, + self.context_set, self.nbPlayers, self.nbArms, self.horizon) + self.result_recorders.append(result_GoT) + + self.alg_names.append(algo_type) # use the full name of 'Stable Orthogonal Allocation' + else: + #TODO: add other algorithms here + print("The algorithm type '{}' is not identified".format(algo_type)) + + def reset_algorithms(self, horizon = None): + """ + reset the internal states/recorders of the algorithms + """ + if horizon is not None: + if self.flag_pre_prepare: + if self.horizon < horizon: + raise Exception("horizon exceeds the maximum recorded values") + else: + self.horizon = horizon + else: + self.horizon = horizon + + for index in range(len(self.algorithms)): + self.algorithms[index].reset(horizon) + self.result_recorders[index].reset_record(horizon) + + self.flag_simulation_done = False + + #----- play the bandit game with all the registered algorithms + def play_game(self, algorithm_ids=None, horizon=None, flag_progress_bar=False): + """ + play_game() produces a single round of simulation results in a sequentail way. + It also works if there is no pre-prepared environment. + """ + self.reset_algorithms() + + alg_list = [] + recorder_list = [] + if algorithm_ids is None: + alg_list = self.algorithms + recorder_list = self.result_recorders + else: + alg_list = [self.algorithms[index] for index in algorithm_ids] + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + if horizon is None: + horizon = self.horizon + + if flag_progress_bar: + progress_range = tqdm(range(horizon)) + else: + progress_range = range(horizon) + + for t in progress_range: + # sample arms + if self.flag_pre_prepare == True: + context, arm_values = self.mp_mab_env.draw_sample(t) + else: + context, arm_values = self.mp_mab_env.draw_sample() + + # trim the arm_value array if needed + arm_values = arm_values[:self.nbPlayers, :self.nbArms] +# print("shape of arm_values: {}".format(np.shape(arm_values))) + + for alg_index in range(len(alg_list)): + pulls, total_reward, sampled_rewards = alg_list[alg_index].learn_policy(arm_values, context, t) + arm_choices = alg_list[alg_index].pulls2choices(pulls) + action_collisions = alg_list[alg_index].resolve_collision(pulls) + recorder_list[alg_index].store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions) + + self.flag_simulation_done = True + + #----- play the bandit game with all the registered algorithms in a parallel manner + def play_game_parallel(self, algorithm_ids=None, horizon=None, flag_progress_bar=False, step=100): + """ + play_game_parallel() is restricted to work for the pre-prepared environment only. + The extral time used for pickling the data is not negligible. + Multiprocessing doesn't improve much the efficiency if len(algorithm_ids) is less than 3 for small horizons. + """ + assert self.flag_pre_prepare == True, "the environment has to be prepared" + self.reset_algorithms() + + # for parallel computing on a sngle machine + max_nb_processes = max(mp.cpu_count()-2, 1) + task_pool = mp.Pool(processes = max_nb_processes) + + alg_list = [] + recorder_list = [] + if algorithm_ids is None: + alg_list = self.algorithms + recorder_list = self.result_recorders + else: + alg_list = [self.algorithms[index] for index in algorithm_ids] + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + if horizon is None: + horizon = self.horizon + + results = [] + + if flag_progress_bar == False: + for alg_index in range(len(alg_list)): + res = task_pool.apply_async(self.async_simulation_work, + args = (horizon, alg_index, self.mp_mab_env, + alg_list[alg_index], recorder_list[alg_index])) + results.append(res) + + task_pool.close() + task_pool.join() + else: + manager = mp.Manager() + queue = manager.Queue() + for alg_index in range(len(alg_list)): + res = task_pool.apply_async(self.async_simulation_work, + args = (horizon, alg_index, self.mp_mab_env, + alg_list[alg_index], recorder_list[alg_index], queue, step)) + results.append(res) + + # add the monitoring process + print("single-shot: number of iteration: {}".format(len(alg_list)*horizon)) + # add the monitoring process + proc = mp.Process(target=self.porgress_monitor, + args=(queue, len(alg_list), horizon)) + + # start the processes + proc.start() + task_pool.close() + task_pool.join() + queue.put(None) + proc.join() + + # each task do not exchange info. with each other + self.flag_simulation_done = True + + for res in results: + recorder = res.get() + recorder_list[recorder[0]] = recorder[1] + +# print("AlgEvaluator finishes parallelization") + + @staticmethod + def async_simulation_work(horizon, alg_index, env, alg, recorder, queue=None, step=100): + """ + async_simu_work() is restricted to be called in play_game_parallel() only. + To avoid passing the pool member, we make it a static method. + """ + # each task is identified by a tuple (alg_index, horizon) + progress_range = range(horizon) + + for t in progress_range: + context, arm_values = env.draw_sample(t) + + arm_values= arm_values[:env.nbPlayers, :env.nbArms] +# print("shape of arm_values: {}".format(np.shape(arm_values))) + + pulls, total_reward, sampled_rewards = alg.learn_policy(arm_values, context, t) + arm_choices = alg.pulls2choices(pulls) + action_collisions = alg.resolve_collision(pulls) + recorder.store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions) + + if queue is not None: + if t % step == 0: + queue.put_nowait(step) + + return (alg_index, recorder) + + def play_repeated_game(self, horizon_list, algorithm_ids=None, + simulation_rounds=1, flag_progress_bar=False): + """ + Play the game repeatedly with different horizons in single-process mode. + It only works with the pre-prepared environment. + The recorder accompanying each algorithm do not work here, + since they store only the results from the last run. + + play_repeated_game() return a dictionary with the keys: + {'algorithm_name', 'reward_series', 'collision_series', 'horizon'}, + where 'reward_series', 'horizon' and 'collision_series' are 2D arrays, + with the rows aligned with elements in 'algorithm_name' + """ + assert self.flag_pre_prepare == True, "the environment has to be prepared" + self.reset_algorithms() + + alg_names = self.get_alg_names(algorithm_ids) + # reward_series records the reward data for each algorithm + # in a form (len(algorithm_ids), simulation_rounds*len(horizon_list)) + # other records are defined in the same form + if algorithm_ids==None: + algorithm_ids = list(range(len(self.algorithms))) + + reward_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list))) + collision_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list))) + switching_count_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list))) + horizon_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list))) + + # convert types (convert ndarray to list) + if isinstance(horizon_list, list) != True: + horizon_list = np.ndarray.tolist(horizon_list) + +# print("number of algorithms: {}".format(len(algorithm_ids))) + + if flag_progress_bar: + progress_range = tqdm(range(simulation_rounds)) + else: + progress_range = range(simulation_rounds) + + for simu_index in progress_range: + if flag_progress_bar == False: + print("Simulation round {} of total rounds {}...".format(simu_index+1, simulation_rounds)) + + for horizon in horizon_list: + self.play_game(algorithm_ids, horizon=int(horizon), flag_progress_bar=False) # could set to None + + # example: for 3 algorithms, len(tmp_total_payoff) == 3 + tmp_total_payoff = self.get_total_payoff(algorithm_ids, horizon=int(horizon)) + tmp_total_collision = self.get_total_collision(algorithm_ids, horizon=int(horizon)) + tmp_total_switching = self.get_total_switching_count(algorithm_ids, horizon=int(horizon)) + + idx_horizon = horizon_list.index(horizon) + + id_plays = simu_index * len(horizon_list) + idx_horizon + # record the reward obtained in this single round, + # the following is prepared for a dataframe format + for id_alg in range(len(algorithm_ids)): + horizon_series[id_alg][id_plays] = horizon + reward_series[id_alg][id_plays] = tmp_total_payoff[id_alg] + collision_series[id_alg][id_plays] = tmp_total_collision[id_alg]# + switching_count_series[id_alg][id_plays] = tmp_total_switching[id_alg] + + simulation_results = {} + simulation_results['reward_series'] = reward_series + simulation_results['collision_series'] = collision_series + simulation_results['switching_count_series'] = switching_count_series + simulation_results['horizon'] = horizon_series + simulation_results['algorithm_name'] = alg_names + + return simulation_results + + #----- play the bandit game with (all) the registered algorithms in a parallel manner + def play_repeated_game_parallel(self, horizon_list, algorithm_ids=None, + simulation_rounds=1, flag_progress_bar=False, step=1): + """ + parallel version of repeated_game_play(). + play_repeated_game_parallel() only works with the pre-prepared environment. + """ + assert self.flag_pre_prepare == True, "the environment has to be prepared" + self.reset_algorithms() + + alg_list = [] + recorder_list = [] + if algorithm_ids is None: + alg_list = self.algorithms + recorder_list = self.result_recorders + else: + alg_list = [self.algorithms[index] for index in algorithm_ids] + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + # for parallel computing on a sngle machine + max_nb_processes = max(mp.cpu_count()-2, 1) + task_pool = mp.Pool(processes = max_nb_processes) + + # add works to the task pool + results = [] + if flag_progress_bar == True: + manager = mp.Manager() + queue = manager.Queue() + for alg_index in range(len(alg_list)): + res = task_pool.apply_async(self.async_repeated_work, + args = (self.mp_mab_env, alg_list[alg_index], + alg_index, horizon_list, recorder_list[alg_index], + simulation_rounds, queue, step)) + # append the results + results.append(res) + + # add the monitoring process + proc = mp.Process(target=self.porgress_monitor, + args=(queue, len(alg_list), simulation_rounds)) + # start the processes + proc.start() + task_pool.close() + task_pool.join() + queue.put(None) + proc.join() + else: + for alg_index in range(len(alg_list)): + res = task_pool.apply_async(self.async_repeated_work, + args = (self.mp_mab_env, alg_list[alg_index], + alg_index, horizon_list, recorder_list[alg_index], + simulation_rounds)) + # append the results + results.append(res) + # start the processes + task_pool.close() + task_pool.join() + + # each task do not exchange info. with each other + self.flag_simulation_done = True + + # reward_series records the reward data for each algorithm + # in a form (len(algorithm_ids), simulation_rounds*len(horizon_list)) + # all other records are defined in the same form + reward_series = np.empty((0, simulation_rounds*len(horizon_list))) + collision_series = np.empty((0, simulation_rounds*len(horizon_list))) + switching_count_series = np.empty((0, simulation_rounds*len(horizon_list))) + horizon_series = np.zeros((0, simulation_rounds*len(horizon_list))) + alg_indicators = [] + + # re-organize the results of each algorithm + for res in results: + alg_id, recorder, reward, collision, switching_count, horizons = res.get() + # fill the recorded data with the last-round result + self.result_recorders[alg_id] = recorder + + # add a new row + reward_series = np.append(reward_series, [reward], axis=0) + collision_series = np.append(collision_series, [collision], axis=0) + switching_count_series = np.append(switching_count_series, [switching_count], axis=0) + horizon_series = np.append(horizon_series, [horizons], axis=0) + + alg_indicators.append(alg_id) + + simulation_results = {} + simulation_results['reward_series'] = reward_series + simulation_results['collision_series'] = collision_series + simulation_results['switching_count_series'] = switching_count_series + + simulation_results['horizon'] = horizon_series + simulation_results['algorithm_name'] = [self.alg_names[index] for index in alg_indicators] + +# print("len of collision_series:{}".format((collision_series.shape))) +# print("len of reward_series:{}".format((reward_series.shape))) +# print("len of switching_count_series:{}".format((switching_count_series.shape))) +# print("len of horizon_series:{}".format((horizon_series.shape))) + + return simulation_results + + @staticmethod + def async_repeated_work(env, algrithm, alg_index, horizon_list, recorder, simulation_rounds=1, queue=None, step=1): + """ + async_repeated_work() is should be only called by repeated_game_play_parallel(). + To avoid passing the pool member, we make it a static method. + + - a task is identified by a tuple (algrithm, horizon_list) + - 'reward_series' records the reward data for algorithm identified by 'alg_index' + in an 1-D array of len(simulation_rounds)*len(horizon_list) + - other records are defined in the same form + """ + reward_series = np.zeros(simulation_rounds*len(horizon_list)) + collision_series = np.zeros(simulation_rounds*len(horizon_list)) + switching_count_series = np.zeros(simulation_rounds*len(horizon_list)) + horizon_series = np.zeros(simulation_rounds*len(horizon_list)) + + #convert horizon type to list if it is an ndarray + if isinstance(horizon_list, list) != True: + horizon_list = np.ndarray.tolist(horizon_list) + + for simu_index in range(simulation_rounds): + for horizon in horizon_list: + idx_horizon = horizon_list.index(horizon) + + # reset the algorithm + algrithm.reset() + recorder.reset_record() + + # play the game + progress_range = range(int(horizon)) + # initialize the switching count records + tmp_total_switching = 0 + + # store the choices according to the contexts that they are in + tmp_switch_dic = {} + tmp_context_count = {} + for context in env.context_set: + tmp_switch_dic[context] = np.zeros([int(horizon), env.nbPlayers]) + tmp_context_count[context] = 0 + + for t in progress_range: + context, arm_values = env.draw_sample(t) + + arm_values = arm_values[:env.nbPlayers, :env.nbArms] + + # all in arrays + pulls, total_reward, sampled_rewards = algrithm.learn_policy(arm_values, context, t) + arm_choices = algrithm.pulls2choices(pulls) + action_collisions = algrithm.resolve_collision(pulls) + + #get collision in arrays + id_nonzero = np.where(action_collisions != 0) + action_collisions[id_nonzero] = action_collisions[id_nonzero] - 1 + + recorder.store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions) + + # store choices according to contexts + tmp_switch_dic[context][tmp_context_count[context],:] = arm_choices + tmp_context_count[context] = tmp_context_count[context] + 1 + + for context in env.context_set: + # count the switching for each context +# print("Contex: {}, shape: {}".format(context, tmp_switch_dic[context].shape)) + + for tt in range(1, tmp_context_count[context]+1): + tmp_switching_count = np.sum(tmp_switch_dic[context][tt,:] != tmp_switch_dic[context][tt-1, :]) + tmp_total_switching += tmp_switching_count + + # compute directly instead of calling get_total_payoff() + tmp_total_payoff = np.sum(recorder.total_rewards[:int(horizon)]) + tmp_total_collision = np.sum(recorder.collisions[:int(horizon)]) + + id_plays = simu_index * len(horizon_list) + idx_horizon + + reward_series[id_plays] = tmp_total_payoff + collision_series[id_plays] = tmp_total_collision + switching_count_series[id_plays] = tmp_total_switching + horizon_series[id_plays] = horizon + + if queue is not None: + if simu_index % step == 0: + queue.put_nowait(step) + + return (alg_index, recorder, reward_series, collision_series, switching_count_series, horizon_series) + + @staticmethod + def porgress_monitor(queue, nbAlgorithm, nbRound): + """ + porgress_monitor() is added by the monitor process for updating the simulation progress bar. + nbRound represents the total number of repeatitions in case of a repeated simulation, + or the number of horizon in case of a single-shot simulation + """ + pbar = tqdm(total = nbAlgorithm*nbRound) + for item in iter(queue.get, None): + pbar.update(item) + + #----- utility functions + def get_total_payoff(self, algorithm_ids = None, horizon = None): + assert self.flag_simulation_done == True, "no simulation record is available" + + recorder_list = [] + if algorithm_ids is None: + recorder_list = self.result_recorders + else: + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + if horizon is None: + horizon = self.horizon + else: + assert self.horizon >= horizon, "not enough data for the given value of horizon" + + array_total_payoff = np.zeros(len(recorder_list)) + for index in range(len(recorder_list)): + array_total_payoff[index] = np.sum(recorder_list[index].total_rewards[:horizon]) + + return array_total_payoff + + def get_total_collision(self, algorithm_ids = None, horizon = None): + assert self.flag_simulation_done == True, "no simulation record is available" + + recorder_list = [] + if algorithm_ids is None: + recorder_list = self.result_recorders + else: + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + if horizon is None: + horizon = self.horizon + else: + assert self.horizon >= horizon, "not enough data for the given value of horizon" + + array_total_collision = np.zeros(len(recorder_list)) + for index in range(len(recorder_list)): + idx_nonzero = np.where(recorder_list[index].collisions != 0) + + recorder_list[index].collisions[idx_nonzero] = recorder_list[index].collisions[idx_nonzero] - 1 + array_total_collision[index] = np.sum(recorder_list[index].collisions[:horizon]) + + return array_total_collision + + def get_total_switching_count(self, algorithm_ids = None, horizon = None): + """ + get the action switching count of the given list of algorithms, + we do it w/r to the context + """ + assert self.flag_simulation_done == True, "no simulation record is available" + + recorder_list = [] + if algorithm_ids is None: + recorder_list = self.result_recorders + else: + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + if horizon is None: + horizon = self.horizon + else: + assert self.horizon >= horizon, "not enough data for the given value of horizon" + + array_total_switching_count = np.zeros(len(recorder_list)) # with a number of the algorithms + for index in range(len(recorder_list)): + total_switching_count = 0 + # we add choices into lists w/r to contexts + tmp_switch_dic = {} + tmp_context_count = {} + for context in self.context_set: + # we allocate a bit more than needed + tmp_switch_dic[context] = np.zeros([horizon, self.nbPlayers]) + tmp_context_count[context] = 0 + + # separate the action choices according to contexts + for tt in range(0, horizon): + context = self.result_recorders[index].context_history[tt] + tmp_switch_dic[context][tmp_context_count[context],:] = self.result_recorders[index].choices[:,tt] + tmp_context_count[context] = tmp_context_count[context] + 1 + + for context in self.context_set: + # count the switching for each context + for tt in range(1, tmp_context_count[context]+1): + tmp_switching_count = np.sum(tmp_switch_dic[context][tt,:] != tmp_switch_dic[context][tt-1,:]) + total_switching_count += tmp_switching_count + + array_total_switching_count[index] = total_switching_count + + return array_total_switching_count + + def get_alg_names(self, algorithm_ids = None): + """ + get the name list of the given algorithms + """ + if algorithm_ids is None: + name_list = self.alg_names + else: + name_list = [self.alg_names[index] for index in algorithm_ids] + + return name_list + + #----- plotting + def plot_rewards(self, algorithm_ids = None, horizon = None, save_fig = False, save_data = False): + if self.flag_simulation_done == False: + print("No simulation results are ready") + else: + recorder_list = [] + if algorithm_ids is None: + recorder_list = self.result_recorders + else: + recorder_list = [self.result_recorders[index] for index in algorithm_ids] + + recorder_list[0].plot_cumu_rewards(horizon, other_results=recorder_list[1:], save_fig=save_fig, save_data=save_data) + recorder_list[0].plot_avg_reward(horizon, other_results=recorder_list[1:], save_fig=save_fig, save_data=save_data) \ No newline at end of file diff --git a/HetNetSimulator.py b/HetNetSimulator.py new file mode 100644 index 0000000..4699923 --- /dev/null +++ b/HetNetSimulator.py @@ -0,0 +1,548 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +""" + +# This file implements a simple heterogeneous network with underlying macro-cell UEs +# working in a typical 5G cell, and the overlaying IoT devices working in a narrow-bandwidth (NB) +# mode. IoT devices are placed randomly at fixed locations and macro-cell UEs are moving +# randomly according to a Gauss—Markov model. + +__author__ = "Wenbo Wang" + +from MPMAB import MP_MAB + +import scipy +import numpy as np + +import matplotlib.pyplot as plt +from tqdm import tqdm + +from plotutils import prepare_file_name + +if __name__ == '__main__': + print("Warning: this script 'HetNetSimulator.py' is NOT executable..") # DEBUG + exit(0) + +class HomeBrewedHetNetEnv(MP_MAB): + """ + The network simulator and its interface for MP_MAB. + In the future version, we planned to incooperate existing simulators such as QuaDRiGa-5G for the channel models for the macro cell. + (see https://quadriga-channel-model.de/#Publications) + For an example of a HetNet simulator over QuaDRiGa, see https://github.com/ICT-Embedded/5GNR-HetNet_Model + Due to the time consumption of building it with matlab engine, we adopt a home-brewed HetNet simulator in this version. + """ + def __init__(self, context_set, nbArms=20, nbPlayers=10): + """""" + self.nbArms = nbArms # number of channels + + # for poisson point process it is the intensity of nodes for a square area of 1 + # for uniform distribution, it is the number of nodes + assert nbPlayers<=nbArms, "the number of channels should be no less than the number of devices." + self.nbPlayers = nbPlayers + self.nbArms = nbArms + + self.context_set = context_set# + self.prob_LoS = np.zeros(len(context_set)) + self.prob_context = np.zeros(len(context_set)) + + self.current_arm_value = np.zeros((nbPlayers, nbArms)) + self.current_context = None + + self.arms = {}; + self.horizon = 0 + + self.flag_mmWave = True + + self.ue_position = [] + self.bs_position = [] + + self.nb_mue = 0 + self.mue_position = []# macro cell UE + self.mue_mean_vel = [] + self.mue_mean_dir = [] + + #basic parameters of channel, not exposed to the parameter setting yet + self.frequence = 28e9 # 28GHz + self.nb_UPBC = 4# number of unique pointing beans combined + self.wf_A = 0.07# weighting factor via MMSE for fitting BC-CI path loss model + self.ue_shadow_variance = np.zeros((nbPlayers, nbArms)) # currently based on an arbitrary value, e.g., 9 + self.ue_fading_variance = np.ones((nbPlayers, nbArms)) # currently based on an arbitrary value + self.mobile_alpha = 0.3 + + self.mue_shadow_variance = np.zeros((len(context_set))) # the same across arms + + """ + The path loss exponent model is silightly different w.r.t. to different experiments in the literature. + According to "Path Loss, Shadow Fading, and Line-Of-Sight Probability Models for 5G Urban Macro-Cellular Scenarios", [Sun2015], + PLE_LoS = 2.1 and PLE_NLoS = 2.6 for the CI model in the 28GHz-urban macro-cellular scenario + """ + self.PLE_LoS = 2 # path loss exponent LoS + self.PLE_NLoS = 3 # path loss exponent NLoS + self.mue_power = 10 * np.random.uniform(low=0.5, high=1.0, size=len(context_set)) # 40 dBm, 10w + self.ue_power = 1 # 30 dBm, 1w + self.atenna_gain = 3 #dBi + self.noise = 5e-17 # Watt + + # for beamforming, the oversampling factor is 1 + # we consider the beamforming vector to be randomly choosen, + # this project does not aim to provide mechanisms of optimizing it + self.F = np.zeros([self.nb_UPBC, self.nb_UPBC], dtype=complex) + theta = np.pi * np.arange(start=0., stop=1., step=1./self.nb_UPBC) + # Beamforming codebook F + for nn in np.arange(self.nb_UPBC): + exponent = 1j * np.pi * np.cos(theta[nn]) * np.arange(self.nb_UPBC) + bf_vec = 1. / np.sqrt(self.nb_UPBC) * np.exp(exponent) + self.F[:,nn] = bf_vec[nn] + + self.mue_cb_idx = np.random.randint(self.nb_UPBC) + # to simplify the process of computation, we consider the IoT devices are using the same ones + # it does not affect the simulation results + self.iot_cb_idx = np.random.randint(self.nb_UPBC) + + # recorder of the pre-sampled arm values + self.arm_values = {} + self.max_arm_value ={} # recording the maximum rate for normalization for each context along the time horizon + for context in self.context_set: + self.arm_values[context] = [] + self.max_arm_value[context] = [] + + self.flag_sample_prepared = False + + @classmethod + def HetNet_mab(cls, context_set, nbArms, nbPlayers, hetnet_params): + """ + A number of parameters are hardcoded for the purpose of simplification. + However, they can be easily exposed to the upper layer by moving into 'hetnet_params' + + """ + hetnet_inst = cls(context_set, nbArms, nbPlayers) + + hetnet_inst.horizon = hetnet_params['horizon'] + hetnet_inst.flag_mmWave = hetnet_params['enabel mmWave'] + + cell_range = hetnet_params['cell range'] if 'cell range' in hetnet_params.keys() else 200 + hetnet_inst.bs_position = np.array([0.5 * cell_range, 0.5 * cell_range]) # always placed at the center + hetnet_inst.ue_position, new_nbPlayer = hetnet_inst.initialize_UE_position(nbPlayers=nbPlayers, distance = cell_range, + dist_model=hetnet_params['dist_model'] if 'dist_model' in hetnet_params.keys() else 0) + + hetnet_inst.mue_position, new_nbMUE = hetnet_inst.initialize_UE_position(nbPlayers=len(hetnet_inst.context_set), + distance=cell_range, dist_model=0) + + # randomly set shadowing variances of ue's, as an array of (nbUE-nbChannel) + shadow_vr_base = 2.0 if 'shadow_vr' not in hetnet_params.keys() else hetnet_params['shadow_vr'] + hetnet_inst.ue_shadow_variance = np.random.uniform(size=(nbPlayers, nbArms))*shadow_vr_base + hetnet_inst.mue_shadow_variance = np.random.uniform(size=len(context_set))*shadow_vr_base + + fading_vr_base = 1.0 if 'fading_vr' not in hetnet_params.keys() else hetnet_params['fading_vr'] + hetnet_inst.ue_fading_variance = np.random.uniform(size=(nbPlayers, nbArms))*fading_vr_base + + # assume that different context has different probability of LoS path + hetnet_inst.set_discrete_context_prob(hetnet_params['context_prob'], hetnet_params['los_prob']) + + nb_MUE = len(hetnet_inst.prob_context) + hetnet_inst.mue_mean_vel, hetnet_inst.mue_mean_dir = hetnet_inst.initialize_UE_mobile_model(nb_MUE, scale_velocity=0.1) + + hetnet_inst.mue_vel = np.zeros(nb_MUE) + hetnet_inst.mue_dir = np.zeros(nb_MUE) + + hetnet_inst.vel_base = 1.0 if 'vel_base' not in hetnet_params.keys() else hetnet_params['vel_base'] + + return hetnet_inst + + def set_discrete_context_prob(self, context_prob, los_prob): + """ + assign arbitrary probabilities to contexts + """ + if set(context_prob.keys()) != self.context_set: + raise Exception("probability values do not match the set of context") + + self.context_array = np.array(list(context_prob.keys())) + + # probability of different MUE/UE in neighbor cells transmitting + self.prob_context = np.array(list(context_prob.values())) + self.prob_context = self.prob_context / np.sum(self.prob_context) # normalize + + # probability of different MUE to the receiving AP + # this is to simulate the situation that transmissions from different MUE occupy the channels in the cell + self.prob_LoS = np.array(list(los_prob.values())) + self.prob_LoS = self.prob_LoS / np.sum(self.prob_LoS) # normalize + + + def initialize_UE_position(self, nbPlayers, distance=200, dist_model=0): + """ + initialize the positions of IoT devices and UEs + """ + if dist_model == 1:# PPP distribution + #TODO: the input number of nodes may not be equal to N according to the PPP distribution + # we need to update the player number self.nbPlayers + # do not call this branch in this version + N = scipy.stats.poisson( nbPlayers*1 ).rvs() + + else: # uniform distribution, TODO: add new distribution model here + N = nbPlayers + + x = scipy.stats.uniform.rvs(0, 1,((N,1)))*distance + y = scipy.stats.uniform.rvs(0, 1,((N,1)))*distance + + ue_position = np.hstack((x,y)).T + + return ue_position, N + + def initialize_UE_mobile_model(self, nbPlayers, scale_velocity=1): + ue_mean_vel = np.random.uniform(nbPlayers)*scale_velocity + ue_direction = np.random.uniform(nbPlayers)*np.pi*2 + + return ue_mean_vel, ue_direction + + """Draw samples""" + def draw_sample(self, t=None): + """ + draw a new sample + """ + context_id_array = np.arange(start=0, stop=len(self.context_array)) + id_context = np.random.choice(a=context_id_array, size=None, p=self.prob_context) # choose the ID of MUE + self.current_context = self.context_array[id_context] # get the context value + + if t == None: + # update all MUEs' positions + self.mue_position, self.mue_vel, self.mue_dir = self.update_ue_position(self.mue_position, self.mue_vel, + self.mue_dir, self.mobile_alpha, self.mue_mean_vel, self.mue_mean_dir) + + current_arm_value = self.compute_device_rate(id_context) + + # normalization + self.current_arm_value = current_arm_value / np.max(current_arm_value) + else: + if self.flag_sample_prepared == False: + raise Exception("samples are not prepared") + else: + # draw samples from the stored data + self.current_arm_value = self.arm_values[self.current_context][t] + + return self.current_context, self.current_arm_value # we only return part of the real data + + + def prepare_samples(self, horizon, flag_progress_bar=True): + """ + Prepare the samples along the time horizon in advance. + The sequential generation of UE positions would be the most significant bottleneck + for the simulation. + """ + if horizon <= 0: + raise Exception("Input horizon is not valid") + + self.horizon = horizon + + if flag_progress_bar: + progress_range = tqdm(range(horizon)) + else: + progress_range = range(horizon) + + for time in progress_range: + # update position first + self.mue_position, self.mue_vel, self.mue_dir = self.update_ue_position(self.mue_position, self.mue_vel, + self.mue_dir, self.mobile_alpha, self.mue_mean_vel, self.mue_mean_dir) + # the positions are the same w.r.t. each channel, but the shadowing/fading parameters are different + for context in self.context_set: + id_context = self.context_array.tolist().index(context) #np.where(self.context_array == context) + + rates = self.compute_device_rate(id_context) + + # normalization + current_max_rate = np.max(rates) + normalized_rate = rates / current_max_rate + # record the normalized rate matrix at "time" + self.arm_values[context].append(normalized_rate) + self.max_arm_value[context].append(current_max_rate) #added @ 2020.02.21 + + self.flag_sample_prepared = True + + """ + methods used in draw_sample() + """ + def update_ue_position(self, ue_position, ue_vel, ue_dir, mobil_alpha, ue_mean_vel, ue_mean_dir): + # Gauss—Markov mobility model, Chapter 2.5. Gauss—Markov "A survey of mobility models for ad hoc network research", [Camp2002] + # Calculate the new velocity and direction values using the Gauss-Markov formula: + # new_val = alpha*old_val + (1-alpha)*mean_val + sqrt(1-alpha^2)*rv + # where rv is a random number sampled from a normal (gaussian) distribution + # reference code (ns-3): https://www.nsnam.org/docs/doxygen/gauss-markov-mobility-model_8cc_source.html + one_minus_alpha = 1 - mobil_alpha + sqrt_alpha = np.sqrt(1 - mobil_alpha**2) + + rv = np.random.normal(size=len(ue_vel)) * self.vel_base # velocity + rd = np.random.normal(size=len(ue_vel)) # angle + + # random value, default parameters: mean = 0, and variance = 1 + ue_vel = mobil_alpha * ue_vel + one_minus_alpha * ue_mean_vel + sqrt_alpha * rv + ue_dir = mobil_alpha * ue_dir + one_minus_alpha * ue_mean_dir + sqrt_alpha * rd + + cos_dir = np.cos(ue_dir) + sin_dir = np.sin(ue_dir) + + x = ue_position[0,:] + ue_vel * cos_dir + y = ue_position[1,:] + ue_vel * sin_dir + + ue_position = np.vstack((x,y)) + + return ue_position, ue_vel, ue_dir + + # used for sampling channels gains + def update_pathloss_db(self, ue_pos, bs_pos, flag_LoS=False): + #update the pathloss of the IoT devices and the macrocell UE + if self.flag_mmWave == True: + if flag_LoS == True: + pl_db = self.path_loss_dB_mmWave(ue_pos, bs_pos, self.PLE_LoS) + else: + pl_db = self.path_loss_dB_mmWave(ue_pos, bs_pos, self.PLE_NLoS) + +# pl = 10 ** (pl_db / 10.) + else: + pl_db = self.path_loss_dB_cost231(ue_pos, bs_pos) +# pl = 10 ** (pl_db / 10.) + + return pl_db # path loss in dB + + # we may need to compute different ue/device-BS pairs + def path_loss_dB_mmWave(self, ue_position, bs_position, PLE): + """ + Based on IEEE TWC paper "Directional Radio Propagation Path Loss Models for Millimeter-Wave + Wireless Networks in the 28-, 60-, and 73-GHz Bands", Oct. 2016 [Sulyman2016] + Nr is the number of unique pointing beams combined, Nr = 3,4,5 + """ + #PLE = 2 for LoS, 4 for NLoS, see self.PLE_LoS, self.PLE_NLoS + c = 3e8 # light speed + + # to align the notations with the equations in the refernece [Sulyman2016] + A = self.wf_A + nr = self.nb_UPBC + fc = self.frequence # in Hz + + if ue_position.ndim == 1: + pass # single ue, don't have to do anything + else: + bs_position = np.broadcast_to(bs_position, (ue_position.shape[::-1])).T + + dist = np.linalg.norm(ue_position-bs_position, axis=0) # along the rows + +# fspl = 32.4 + 20 * np.log10(fc / 1e9) # fc in GHz, Eq (1a) of 2016 [Sulyman2016], equivalent equation + fspl = 20 * np.log10((4*np.pi*dist*fc) / c) # Eq (1a) of 2016 [Sulyman2016] + pl = fspl + 10 * PLE * np.log10(dist) * (1 - A*np.log2(nr)) # Eq (8) of 2016 [Sulyman2016] + + return pl # in dB + + def path_loss_dB_cost231(self, ue_position, bs_position): + """ + reference: A.2.2 COST 231 Extension to Hata Model, Channel Models A Tutorial, [Jain2007] + code reference: https://www.mathworks.com/matlabcentral/fileexchange/21795-hata-and-cost231-hata-models + """ + fc = self.frequence + + dist =np.linalg.norm(ue_position-bs_position, axis=1) + + h_BS = 20 # effective base station antenna height + h_UE = 1.5 # mobile station antenna height + + # COST231 + C = 3 + + # equation: ahMS = (1.1 * log10(f) - 0.7) * hMS - (1.56 * log10(f) - 0.8); + ahMS = (1.1 * np.log10(fc/1e6) - 0.7)*h_UE - (1.56*np.log10(fc/1e6) - 0.8) + + # equation: L50dB = 46.3 + 33.9 * log10(f) - 13.82 * log10(hBSef) - ahMS + (44.9 - 6.55 * log10(hBSef)) * log(d) + C; + # f is in MHz, dist is in km + pl = 46.3 + 33.9 * np.log10(fc/1e6) + 13.82 * np.log10(h_BS) - ahMS + (44.9 - 6.55 * np.log10(h_BS)) * np.log10(dist/1000.) + C + + return pl # in dB + + def update_shadow(self, shadow_mean, shadow_var, ue_number): + """ + log-normal shadowing + """ + # ue_number is used in case the shadowing parameters are the same + chi_shadow = np.random.normal(loc=shadow_mean, scale=shadow_var, size=ue_number) # log-normal shadowing in dB + + return chi_shadow # in dB + + + def update_fast_fading(self, ue_number, rb_number, fading_variance, fading_type=0): + """ + Rayleigh fading + """ + if fading_type == 0: + """ + Rayleigh fading, + """ + if rb_number > 1: + scale = np.broadcast_to(fading_variance, (ue_number, rb_number)) + hf = 1/np.sqrt(2*scale) * (np.random.normal(scale = scale, size = (ue_number, rb_number)) + + 1j* np.random.normal(scale = scale, size = (ue_number, rb_number))) + else: + scale = fading_variance + hf = 1/np.sqrt(2*scale) * (np.random.normal(scale=scale, size=ue_number) + + 1j* np.random.normal(scale=scale, size=ue_number)) + + h_fading = 20 * np.log10(np.abs(hf)) # in dB + else: + #implement other fast fading model here + raise NotImplementedError("fast fading types not supported") + + return h_fading # in dB + + + def update_MUE_channels(self, mue_position, mue_shadow_variance, flag_LoS=False): + """ + update_MUE_channels() and update_IoT_channels() are functions called by compute_device_rate() + """ + # update_MUE_channels() is supposed to update a single MUE's (according to the context id) channel information + # multiple MUE is also supported + # + pl = self.update_pathloss_db(mue_position, self.bs_position, flag_LoS) + sh = self.update_shadow(shadow_mean=0, shadow_var=mue_shadow_variance, ue_number=1) + ff = 0# compared with the path loss, we ignore fast fading here + + if mue_position.ndim == 1: + # to check if we compute for a single MUE or multiple ones + nb_mue = 1 + else: + nb_mue = mue_position.shape[0] + + + channel_gains = np.array((nb_mue, self.nb_UPBC), dtype=complex) + + if nb_mue == 1: + channel_gains = self.update_channel_gain(pl, sh, ff, self.atenna_gain, flag_LoS) + else: + for ii in range(nb_mue): + channel_gains[ii,:] = self.update_channel_gain(pl[ii], sh[ii], ff[ii], self.atenna_gain, flag_LoS) + + return channel_gains + + def update_IoT_channels(self, flag_LoS=False): + # we assume that the iot devices do not move + pl = self.update_pathloss_db(self.ue_position, self.bs_position, flag_LoS) # the same for each player + + channel_gains = np.zeros((self.nbPlayers, self.nbArms, self.nb_UPBC), dtype=complex) + for id_arm in range(self.nbArms): + # not the same for each channel/arm + sh = self.update_shadow(shadow_mean=0, shadow_var=self.ue_shadow_variance[:,id_arm], ue_number=self.nbPlayers) + ff = self.update_fast_fading(self.nbPlayers, 1, self.ue_fading_variance[:,id_arm]) # not the same for each channel/arm + + for ii in range(self.nbPlayers): + channel_gains[ii, id_arm, :] = self.update_channel_gain(pl[ii], sh[ii], ff[ii], self.atenna_gain, flag_LoS) #pl + sh - ff + + return channel_gains + + def update_channel_gain(self, pl, sh, ff, atenna_gain, flag_LoS): + """ + consider a uniform linear array (ULA) with nb_UPBC antennas, + the steering vector of the array towards direction θ is denoted as theta + """ + path_loss = 10 ** (pl / 10.) + + vb = np.zeros(self.nb_UPBC, dtype=complex) + # v is the array vector + if (flag_LoS == True): + Np = 1 + vb[0] = 1. / np.sqrt(path_loss) + else: + # + Np = self.nb_UPBC + vb = (np.random.normal(size=Np) + 1j * np.random.normal(size=Np)) / np.sqrt(path_loss) + + # randomly generated + theta = np.random.uniform(low=0, high=np.pi, size=Np) + rho = 10 ** ((atenna_gain + sh + ff ) / 10.) + + # initialize the channel as a complex variable. + h_ch = np.zeros(self.nb_UPBC, dtype=complex) + + for path in np.arange(Np): + exponent = 1j * np.pi * np.cos(theta[path]) * np.arange(self.nb_UPBC) + + bf_vec = 1. / np.sqrt(self.nb_UPBC) * np.exp(exponent) + h_ch = h_ch + bf_vec[path] / rho * bf_vec.T # scalar multiplication into a vector + + h_ch = h_ch * np.sqrt(self.nb_UPBC) + + return h_ch + + def compute_device_rate(self, id_context): + id_LoS = np.random.choice([0,1], p=[self.prob_LoS[id_context], 1-self.prob_LoS[id_context]]) + + mue_channel_gain = self.update_MUE_channels(self.mue_position[:, id_context], self.mue_shadow_variance[id_context], + flag_LoS=(id_LoS == 0)) # part of the context, interference + + iot_channel_gains = self.update_IoT_channels(flag_LoS=False) + + # get the channel capacity w.r.t. each IoT devices over each arm/channel + interference_power = self.mue_power[id_context] * abs(np.dot(mue_channel_gain.conj(), self.F[:, self.mue_cb_idx])) ** 2 + + iot_received_power = np.zeros((self.nbPlayers, self.nbArms))# 2D matrix, columns correspond to each channel + for player_id in range(self.nbPlayers): + for ch_id in range(self.nbArms): + iot_received_power[player_id][ch_id] = self.ue_power * abs(np.dot(iot_channel_gains[player_id, ch_id,:].conj(), + self.F[:, self.iot_cb_idx])) ** 2 + + mue_ipn = interference_power + self.noise #interference plus noise, scalar + + # should be a (nbPlayer, nbArm) matrix + rates = np.log2(1 + np.divide(iot_received_power, mue_ipn)) + + # update the rate value for all players over all arms + return rates + + """utility functions""" + # helper_plot_ue_posiiton() is used only for debugging + def helper_plot_ue_posiiton(self): + """ + For debugging purpose + """ + plt.figure(figsize=(4,3)) + plt.scatter(self.ue_position[0,:], self.ue_position[1,:], edgecolor='b', facecolor='none', alpha=0.5 ) + plt.scatter(self.mue_position[0,:], self.mue_position[1,:], edgecolor='r', facecolor='none', alpha=0.5 ) + + def get_discrete_context_prob(self): + return self.prob_context + + def get_param(self, context): + # it is difficult to get the rate statisitics of the UEs over each channel + raise NotImplementedError("get_param() is not campatible with class HomeBrewedHetNetEnv.") + + def get_current_param(self, t=None): + """ + Get the current sampling parameters of arms in the given context. + """ + raise NotImplementedError("This method get_current_param() is not campatible with class HomeBrewedHetNetEnv.") + + def save_environment(self, file_name=None): + #TODO: not fully tested yet, not used + if self.flag_sample_prepared == False: + print("No data is prepared") + else: + # we cannot select the path yet, put the file to the default directory "\results" of the current path + file_path = prepare_file_name("{}-{}".format(file_name if file_name is not None else "", "env"), + alg_name = None, ext_format = "mat") + + scipy.io.savemat(file_path, self.arm_values) + + def load_environment(self, file_path, horizon=None): + #TODO: not fully tested yet, not used + try: + self.arm_values = scipy.io.loadmat(file_path) + except: + print("No data is loaded") + + self.flag_sample_prepared = True \ No newline at end of file diff --git a/MABAlgorithms.py b/MABAlgorithms.py new file mode 100644 index 0000000..bcea531 --- /dev/null +++ b/MABAlgorithms.py @@ -0,0 +1,715 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +""" + +# This file defines and implements the multi-player, multi-arm bandits algorithms. +# Currently, the realized algorithms include: +# 1. Hungarian: standard Hungarian algorithm for centralized arm allocation, +# 2. StaticHungarian: centralized allocation algorithm if the mean value of each arm is known +# 3. MusicalChairs: Musical Chairs algorithm for multi-player, homogeneous multi-arm bandit +# 4. TrialandError: Log-linear learning algorithm for contextual multi-player multi-arm bandits, +# with heterogeneous arms +# 5. GameofThrone: Log-linear learning algorithm for multi-player multi-arm bandits with heterogeneous +# arms. It is sub-optimal for contextual bandits +# +# Typically, one distributed algorithm is accompanied by a corresponding player class +# see also MABAlgorithms2.py + +__author__ = "Wenbo Wang" + +import numpy as np +from scipy.optimize import linear_sum_assignment +from Players import MusicChairPlayer, TnEPlayer, GoTPlayer + +from loggingutils import info_logger + +if __name__ == '__main__': + print("Warning: this script 'MABAlgorithms.py' is NOT executable..") # DEBUG + exit(0) + + +class MABAlgorithm(object): + """ Base class for an algorithm class.""" + def __init__(self, param): + """ Base class for an algorithm class.""" + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.context_set = param["context_set"] + + self.nbAgent = 0 # number of agents in the algorithms, can be centralized, decentralized or partially decentralized + + # an agent is usually corresponding to a player, it has its own + self.agents = [] + + # --- Printing + def __str__(self): + return self.__class__.__name__ + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, self.__dir__) + + # --- functionalities + def resolve_collision(self, pulls): + (nbPlayer, nbArm) = np.shape(pulls) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + + collisions = pulls.sum(axis=0) + + assert len(collisions) == nbArm, "dimension of collisions is incorrect" + return collisions + + def learn_policy(self, game_env, context=None, time=None): + """ + Learn policies based on the given game environments. + A game environment can be in the form of (context, sampel_reward_matrix) + """ + raise NotImplementedError("This method learn_policy(t) has to be implemented in the class inheriting from MABAlgorithm.") + + def reset(self, horizon=None): + """ + The rest parameters cannot be reset, except self.horizon. + """ + raise NotImplementedError("This method reset() has to be implemented in the class inheriting from MABAlgorithm.") + + def pulls2choices(self, pulls): + """ + Convert pulls into choices + """ + (nbPlayer, nbArm) = np.shape(pulls) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + + arm_choices = np.zeros(nbPlayer, dtype=int) + + arm_selected = np.nonzero(pulls) # index of non-zero values + + # for some algorithms there may be a case when a player refuse to choose any arm + for index in range(len(arm_selected[0])): + playerID = arm_selected[0][index] + arm_choices[playerID] = arm_selected[1][index] # playerID should be 0, 1, 2,..., nbPlayer-1 + + return arm_choices + + def observe_distributed_payoff(self, game_env, collisions): + (nbPlayer, nbArm) = np.shape(game_env) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + + current_reward = np.zeros(self.nbPlayer) + + for playerID in range(self.nbPlayer): + selected_arm = self.agents[playerID].selected_arm + + # for some algorithms there may be a case when a player refuses to choose any arm + if selected_arm < 0: + current_reward[playerID] = 0 + else: + if collisions[selected_arm] == 1: + current_reward[playerID] = game_env[playerID][selected_arm]# not collidiing + else: + current_reward[playerID] = 0# colliding or void + + # returen an array of dimension nbArm + return current_reward + +""" + Algorithm: centralized Hungarian +""" +class Hungarian(MABAlgorithm): + """ + Centralized assignment algorithm in the form of Hungarian (Munkres) algorithm. + Implemented based on scipy.optimize.linear_sum_assignment. + It does not have the structure of multiple agents as the other algorithms. + """ + def __init__(self, param): + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.context_set = param["context_set"] + """For simplicity we do not implement the single agent here.""" +# self.nbAgent = 0 + self.agents = [] + + # --- Printing + def __str__(self): + return "Hungarian" + + # --- functionalities + def reset(self, horizon=None): + pass # do nothing + + def learn_policy(self, game_env, context=None, time=None): + # context is not used in Hungarian + (nbPlayer, nbArm) = np.shape(game_env) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + + #the mehtod requires the number of rows (jobs) to be larger than that of columns (workers) + cost_matrix = np.negative(game_env.transpose()) + # note that the cost_matrix is a transpose of the original matrix + col_ind, row_ind = linear_sum_assignment(cost_matrix) + + pulls = np.zeros((nbPlayer, nbArm)) + sampled_rewards = np.zeros(nbPlayer) + for ii in range(len(row_ind)): + playerID = row_ind[ii] + sampled_rewards[playerID] = game_env[playerID][col_ind[ii]] + pulls[playerID, col_ind[ii]] = 1 + + total_rewards = game_env[row_ind, col_ind].sum() + + return pulls, total_rewards, sampled_rewards + +""" + Algorithm: centralized Hungarian over means of arm-values (static values) +""" +class StaticHungarian(Hungarian): + """ + This algorithm is implemented for the purpose of deriving the throetic regret + """ + def __init__(self, param): + super().__init__(param) + self.pulls = {} + + #we keep them for later use + self.total_rewards = {} + self.static_rewards = {} + + self.mean_env_payoff = param["mean_game_env"] + self.flag_allocation_ready = False + + for context in self.context_set: + self.pulls[context] = np.zeros((self.nbPlayer, self.nbArm)) + self.total_rewards[context] = 0 + self.static_rewards[context] = np.zeros(self.nbPlayer) + + self.array_context = param["array_context"] + self.array_prob = param["array_prob"] + + self.mean_total_reward = 0 + self.mean_static_reward = np.zeros(self.nbPlayer) + + # --- Printing + def __str__(self): + return "Static Hungarian" + + def reset(self, horizon=None): + self.mean_total_reward = 0 + self.mean_static_reward = np.zeros(self.nbPlayer) + self.flag_allocation_ready = False + + def learn_policy(self, game_env, context=None, time=None): + #ignore all the inputs + if self.flag_allocation_ready == False: + for context_id in range(len(self.array_context)): + tmp_context = self.array_context[context_id] + self.pulls[tmp_context], self.total_rewards[tmp_context], self.static_rewards[tmp_context] = super().learn_policy( + self.mean_env_payoff[tmp_context], tmp_context) + + self.mean_total_reward = self.mean_total_reward + self.total_rewards[tmp_context] * self.array_prob[context_id] + self.mean_static_reward = self.mean_static_reward + self.static_rewards[tmp_context] * self.array_prob[context_id] + +# print("Static Hungarian: {}".format(tmp_context)) + + self.flag_allocation_ready = True + + return self.pulls[context], self.mean_total_reward, self.mean_static_reward + +""" + Algorithm: musical chairs +""" +class MusicalChairs(MABAlgorithm): + """ + Decentralized assignment algorithm in the form of Musical Chair algorithm. + Implemented based on the paper "Multi-Player Bandits – a Musical Chairs Approach", by Jonathan Rosenski and + Ohad Shamir @2015 [Rosenski2015]. Note that this algorithm is designed for multi-player only and for + contextual bandit it adapts to the condition of unobservable context. + """ + def __init__(self, param): + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.context_set = param["context_set"] # not really used by the algorithm + self.horizon = param["horizon"] + + #each player will be attached a single agent +# self.nbAgent = self.nbPlayer + self.agents = [] + + for playerID in range(self.nbPlayer): + player_param = {"horizon": self.horizon, + "nbArm": self.nbArm, + "playerID": playerID + } + + if "T0" in param.keys(): + player_param["T0"] = param["T0"] + + self.agents.append(MusicChairPlayer(player_param)) + + self.time = 0 + self.T0 = self.agents[0].T0 + + # --- Printing + def __str__(self): + return "Musical Chairs" + + # --- functionalitiess + def reset(self, horizon=None): + self.time = 0 + for agent in self.agents: + agent.reset() + + if horizon is not None: + self.horizon = horizon + + + def learn_policy(self, game_env, context=None, time=None): + (nbPlayer, nbArm) = np.shape(game_env) +# print("number of arms: {}, number of recorded arms: {}".format(nbArm, self.nbArm)) + + assert nbArm == self.nbArm, "input arm number does not match the stored environment parameters." + assert nbPlayer == self.nbPlayer, "input player number does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + assert time is not None, "time is not given." + + pulls = np.zeros((nbPlayer, nbArm)) + + if time <= self.T0: + for agentID in range(nbPlayer): + armID = self.agents[agentID].explore(context, time) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(context, game_env[agentID,:], collisions) + else: + for agentID in range(nbPlayer): + armID = self.agents[agentID].exploit(context, time) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + + for agentID in range(nbPlayer): + self.agents[agentID].update_musical_chair(time, collisions) + + sampled_rewards = self.observe_distributed_payoff(game_env, collisions) + total_rewards = np.sum(sampled_rewards) + return pulls, total_rewards, sampled_rewards + +""" + Algorithm: trial and error [Wang2019] +""" +class TrialandError(MABAlgorithm): + """ + Decentralized assignment algorithm in the form of trial-and-error learning algorithm. + Implemented for the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed Bandwidth as a + Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al. + Note that this algorithm is designed for multi-player when contextual information is observable. + (If context is not observable, the algorithm produces a sub-optimal allocation in the same level as a distributed learning + algorithm for non-contextual MP-MAB) + """ + def __init__(self, param): + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.context_set = param["context_set"] + self.horizon = param["horizon"] # agents don't know the fixed horizon when running the algorithm + + # each player will be attached a single agent +# self.nbAgent = self.nbPlayer + self.agents = [] + + self.xi = param["xi"] if "xi" in param.keys() else 0.001 + + # a large epsilon will leads to more frequent transtions (explorations) in the intermedate game + self.epsilon = param["epsilon"] if "epsilon" in param.keys() else 0.1 + # see Theorem 1 in [Wang2019], not kept by the agents, determining trial-and-error rounds + self.delta = param["delta"] if "delta" in param.keys() else 2 + + self.rho = param["rho"] if "rho" in param.keys() else 0.5 # no longer used by the improved algorithm + + self.exploration_round = param["c1"] + self.c2 = param["c2"] + self.c3 = param["c3"] + + for playerID in range(self.nbPlayer): + player_param = {"context_set": self.context_set, + "nbArm": self.nbArm, + "playerID": playerID, + "xi": self.xi, + "epsilon": self.epsilon, + "delta": self.delta, + "rho": self.rho, + "alpha11": param['alpha11'] if 'alpha11' in param.keys() else None, + "alpha21": param['alpha21'] if 'alpha21' in param.keys() else None, + "alpha12": param['alpha12'] if 'alpha12' in param.keys() else None, + "alpha22": param['alpha22'] if 'alpha22' in param.keys() else None + } + + self.agents.append(TnEPlayer(player_param)) + + self.time = 0 + # used for determining the epoch + self.epoch = 1 + + #initialize for the first epoch + self.tne_round = self.exploration_round + self.c2 # *1 + self.rounds_in_epoch = self.tne_round + self.c3*2 # * (2** 1) # rounds in the first epoch + self.current_round = 1 + + self.flag_observable = True # set if the context is observable + + # for debug purpose + self.nbExploration = 0 + self.nbTnE = 0 + self.nbExploitation = 0 + + # --- Printing + def __str__(self): + return "Trial and Error" + + def set_context_observability(self, flag_observable = True): + """ + set_context_observability() turns on/off the observability of contexts (side information), + see Section V. of [Wang2019]. + """ + self.flag_observable = flag_observable + + # --- functionalitiess + def reset(self, horizon=None): + for agent in self.agents: + agent.reset() + + self.time = 0 + self.epoch = 1 + + # reset to the initial values + self.tne_round = self.exploration_round + self.c2 # *1 + self.rounds_in_epoch = self.tne_round + self.c3*2 # * (2** 1) # rounds in the first epoch + self.current_round = 1 + + self.nbExploration = 0 + self.nbTnE = 0 + self.nbExploitation = 0 + + if horizon is not None: + self.horizon = horizon + + def learn_policy(self, game_env, context=None, time=None): + """ + learn_policy() implements the 3 phases in Alg. 1 of [Wang2019]. + """ + (nbPlayer, nbArm) = np.shape(game_env) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + assert time is not None, "time is not given." + + if self.flag_observable == False: + # freeze the context s.t. the algorithm is reduced to an MP-MAP + context = list(self.context_set)[0] + + self.time = self.time + 1 + + if self.current_round > self.rounds_in_epoch: + #update epcoh + self.epoch = self.epoch + 1 + # rounds in the k-th epoch + self.tne_round = int(self.exploration_round + self.c2*(self.epoch**self.delta)) # insce delta may be non-integer + self.rounds_in_epoch = int(self.tne_round + self.c3*(2**self.epoch)) + #reset + self.current_round = 1 +# print("number of epoch: {}".format(self.epoch))# debug + + pulls = np.zeros((nbPlayer, nbArm)) + + if self.current_round <= self.exploration_round:# exploration rounds + # reset the phase to exploration in an epoch + if self.current_round == 1: + for agentID in range(nbPlayer): + for tmp_context in self.context_set: + self.agents[agentID].set_internal_state(tmp_context, 0) + +# print("reset iteration at epoch {}".format(self.epoch))# debug + + #exploration by randomly choosing actions + for agentID in range(nbPlayer): + armID = self.agents[agentID].explore(context, time) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(context, game_env[agentID,:], collisions) + + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + # for debugging + self.nbExploration = self.nbExploration + 1 + + elif self.current_round <= self.tne_round:# trial-and-error phase + if self.current_round == self.exploration_round + 1: + # reset the phase to learning in an epoch + for agentID in range(nbPlayer): + for tmp_context in self.context_set: + self.agents[agentID].set_internal_state(tmp_context, 1) + #set the static game + self.agents[agentID].perturb_estimated_payoff(tmp_context, self.epoch) + + # get the latest best policy (from the last epoch) + init_state = None + if self.epoch != 1: + init_state = [0, self.agents[agentID].best_policy[tmp_context]] + else: + #randomize + action = np.random.randint(self.nbArm) + init_state = [0, action] + + # can be moved into perturb_estimated_payoff() in the later versions + self.agents[agentID].init_tne_states(tmp_context, init_state) + + #trial-and-error phase, taking actions randomly according to the intermediate state + for agentID in range(nbPlayer): + armID = self.agents[agentID].learn_policy(context) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + + for agentID in range(nbPlayer): + self.agents[agentID].update_game_state(context, collisions) + + #update reward according to actions taken + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + # for debugging + self.nbTnE = self.nbTnE + 1 + else: + if self.current_round == self.tne_round + 1: + # reset the phase to exploration in an epoch + for agentID in range(nbPlayer): + for tmp_context in self.context_set: + self.agents[agentID].set_internal_state(tmp_context, 2) + + ############################################################### + # Debugging + for agentID in range(nbPlayer): + armID = self.agents[agentID].exploit(context, self.current_round) + pulls[agentID][armID] = 1 + collisions = self.resolve_collision(pulls) + + info_logger().log_info('TnE Context {}: collisions array {}'.format(context, collisions)) #debug + # End of debugging + ############################################################### + + #exploitation + for agentID in range(nbPlayer): + armID = self.agents[agentID].exploit(context, self.current_round) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + # for debugging + self.nbExploitation = self.nbExploitation + 1 + + #update round number + self.current_round = self.current_round + 1 + + total_rewards = np.sum(current_rewards) + return pulls, total_rewards, current_rewards + +""" + Algorithm: trial and error [Leshem2018] +""" +class GameofThrone(MABAlgorithm): + """ + Decentralized assignment algorithm in the form of game-of-throne learning algorithm. + Implemented for the paper "Distributed Multi-Player Bandits - a Game of Thrones Approach", by Ilai Bistritz et al. + Note that this algorithm is designed for multi-player without considering contextual information. + """ + def __init__(self, param): + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.horizon = param["horizon"] # agents don't know the fixed horizon when running the algorithm + + # each player will be attached a single agent +# self.nbAgent = self.nbPlayer + self.agents = [] + + # a large epsilon will leads to more frequent transtions (explorations) in the intermedate game + self.epsilon = param["epsilon"] if "epsilon" in param.keys() else 0.1 + # see Theorem 1 in [Wang2019], not kept by the agents, determining trial-and-error rounds + self.delta = param["delta"] if "delta" in param.keys() else 2 + # set the round of iteration where we + self.rho = param["rho"] if "rho" in param.keys() else 0.5 + + self.c1 = param["c1"] + self.c2 = param["c2"] + self.c3 = param["c3"] + + for playerID in range(self.nbPlayer): + player_param = {"nbArm": self.nbArm, + "nbPlayer": self.nbPlayer, + "playerID": playerID, + "epsilon": self.epsilon, + "delta": self.delta + } + + self.agents.append(GoTPlayer(player_param)) + + self.time = 0 + # used for determining the epoch + self.epoch = 1 + + # initialize for the first epoch, + # for simplicity, the parameter names are kept the same as the TnE algorithm. + self.exploration_round = self.c1 + self.got_round = self.exploration_round + self.c2 # *1 + self.rounds_in_epoch = self.got_round + self.c3*2 # * (2** 1) # rounds in the first epoch + self.current_round = 1 + + # --- Printing + def __str__(self): + return "Game of Throne" + + # --- functionalitiess + def reset(self, horizon=None): + for agent in self.agents: + agent.reset() + + self.time = 0 + self.epoch = 1 + + # reset to the initial values + self.got_round = self.exploration_round + self.c2 # *1 + self.rounds_in_epoch = self.got_round + self.c3*2 # * (2** 1) # rounds in the first epoch + self.current_round = 1 + + if horizon is not None: + self.horizon = horizon + + def learn_policy(self, game_env, context=None, time=None): + """ + learn_policy() implements the 3 phases in Alg. 1 of [Leshem2018]. + Implemented in the same structure for tial-and-error + """ + (nbPlayer, nbArm) = np.shape(game_env) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + assert time is not None, "time is not given." + + self.time = self.time + 1 + + if self.current_round > self.rounds_in_epoch: + #update epcoh + self.epoch = self.epoch + 1 + # rounds in the k-th epoch + self.exploration_round = int(self.c1*(self.epoch**self.delta)) + self.got_round = int(self.exploration_round + self.c2*(self.epoch**self.delta)) + self.rounds_in_epoch = int(self.got_round + self.c3*(2**self.epoch)) + #reset + self.current_round = 1 +# print("number of epoch: {}".format(self.epoch))# debug + + pulls = np.zeros((nbPlayer, nbArm)) + + if self.current_round <= self.exploration_round:# exploration rounds + # reset the phase to exploration in an epoch + if self.current_round == 1: + for agentID in range(nbPlayer): + self.agents[agentID].set_internal_state(context=None, input_state=0) + + # exploration by randomly choosing actions + for agentID in range(nbPlayer): + armID = self.agents[agentID].explore(None, time) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions) + + # learn the real payoff + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + elif self.current_round <= self.got_round:# game-and-thrones phase + if self.current_round == self.exploration_round + 1: + # reset the phase to learning in an epoch + for agentID in range(nbPlayer): + self.agents[agentID].set_internal_state(context=None, input_state=1) + + # as per Alg.1 in [Leshem2018], initialize the mood to be content + if self.epoch != 1: + init_state = [0, self.agents[agentID].best_policy] #(STATE_CONTENT, BEST ACTION) + else: + #randomize + action = np.random.randint(self.nbArm) + init_state = [0, action] + + # initialize the intermediate game + self.agents[agentID].initalize_static_game(init_state, self.epoch) + # initialize the intermediate states, and (TODO) this can be moved into perturb_estimated_payoff() + self.agents[agentID].init_got_states(context=None, starting_state=init_state) + + #game of throne phase, taking actions randomly according to the intermediate state + for agentID in range(nbPlayer): + armID = self.agents[agentID].learn_policy(context=None) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + + flag_count_frequency = False + # update the count of state-visit only for the last half starting from rho*c2*k^(1+delta) rounds +# if self.current_round >= self.got_round - 1 - self.rho*self.c2*(self.epoch**self.delta): + if self.current_round >= self.exploration_round + self.rho*self.c2*(self.epoch**self.delta): + flag_count_frequency = True +# flag_count_frequency = True + + for agentID in range(nbPlayer): + self.agents[agentID].update_game_state(context=None, collisions=collisions, + flag_record_frequency=flag_count_frequency) + + #update reward according to actions taken + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + else: + if self.current_round == self.got_round + 1: + # reset the phase to exploitation in an epoch + for agentID in range(nbPlayer): + # the best policy is computed in set_internal_state() + self.agents[agentID].set_internal_state(context=None, input_state=2) + + ############################################################### + # Debugging + for agentID in range(nbPlayer): + armID = self.agents[agentID].exploit(None, self.current_round) + pulls[agentID][armID] = 1 + collisions = self.resolve_collision(pulls) + + info_logger().log_info('GoT Context {}: collisions array {}'.format(context, collisions)) #debug + # End of debugging + ############################################################### + + #exploitation + for agentID in range(nbPlayer): + armID = self.agents[agentID].exploit(None, self.current_round) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + current_rewards = self.observe_distributed_payoff(game_env, collisions) + + #update round number + self.current_round = self.current_round + 1 + + total_rewards = np.sum(current_rewards) + return pulls, total_rewards, current_rewards + +__all__ = ["Hungarian", "StaticHungarian", "MusicalChairs", "TrialandError", "GameofThrone"] \ No newline at end of file diff --git a/MABAlgorithms2.py b/MABAlgorithms2.py new file mode 100644 index 0000000..0c8ab2b --- /dev/null +++ b/MABAlgorithms2.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +""" + +# This file defines and implements the following multi-player, multi-arm bandits algorithms (see also Algorithms.py). +# +# 1. [Sumit2019] Sumit J. Darak and Manjesh K. Hanawal, "Multi-player multi-armed bandits for stable allocation in +# heterogeneous ad-hoc networks", IEEE JSAC oct. 2019. + +__author__ = "Wenbo Wang" + +import numpy as np +from MABAlgorithms import MABAlgorithm +from Players2 import SOCPlayer + +if __name__ == '__main__': + print("Warning: this script 'MABAlgorithms2.py' is NOT executable..") # DEBUG + exit(0) + +""" + Algorithm: centralized Hungarian +""" +class SOC(MABAlgorithm): + """ + SOC implements the algorithm "stable orthogonal allocation (SOC)" proposed in + "Multi-player multi-armed bandits for stable allocation in heterogeneous ad-hoc networks", + IEEE JSAC oct. 2019, by Sumit J. Darak and Manjesh K. Hanawal [Sumit2019]. + + The algorithm is featured by a protocol explicitly resolving collisions with channel switching, + and the channel statistics (index) is learned based on upper confidence bound (UCB). + + However, it does not have a explicit function for when to stop exporation, as in the musical chairs. + Channel allocation is obtained through a master-slave allocation process, with explicit coordination, + Exploration time needs to be given. + """ + def __init__(self, param): + self.nbPlayer = param["nbPlayer"] + self.nbArm = param["nbArm"] + self.context_set = param["context_set"] # not used + + self.delta = 0.1 if 'delta' not in param.keys() else param['delta'] +# self.nbAgent = self.nbPlayer + + self.time = 0 + self.Trh = np.ceil(np.log(self.delta/self.nbArm) / np.log(1-1/4/self.nbArm)) + self.TExploration = 3000 if "exploration_time" not in param.keys() else param["exploration_time"] + + self.agents = [] + for playerID in range(self.nbPlayer): + player_param = {"context_set": self.context_set, + "nbArm": self.nbArm, + "playerID": playerID + } + + self.agents.append(SOCPlayer(player_param)) + + self.OHS_step = 2*(self.nbArm ** 2) + self.MB_step = 2*self.nbArm + self.SB_step = 2 + + self.current_MB_id = -1 # set to an invalid ID + self.current_master_node = -1 # there may not be a master node for the current MB + + # --- Printing + def __str__(self): + return "Static Orthogonal Allocation" + + # --- functionalities + def reset(self, horizon=None): + self.time = 0 + self.current_MB_id = -1 # set to an invalid ID + self.current_master_node = -1 + for agent in self.agents: + agent.reset() + + def learn_policy(self, game_env, context=None, time=None): + # context is not used in Hungarian + (nbPlayer, nbArm) = np.shape(game_env) + assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters." + assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number." + + self.time = self.time + 1 + + pulls = np.zeros((nbPlayer, nbArm)) + + # there are three phases in the game + if self.time < self.Trh: + #random hopping / exploration + for agentID in range(nbPlayer): + armID = self.agents[agentID].explore(None, time) + pulls[agentID][armID] = 1 + + collisions = self.resolve_collision(pulls) + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions) + elif self.time <= self.TExploration: + # master-slave process + # 1 OHS block has K macro blcoks (K=nbArm) + # 1 macro block has T_mb=2K time slots, namely, K sub-blocks of 2 slots each + OHS_id = int(np.floor((self.time - self.Trh) / (self.OHS_step))) + MB_id = int(np.floor((self.time - self.Trh - OHS_id*self.OHS_step) / self.MB_step)) # from 0 to nbArm-1 + SB_id = int(np.floor((self.time - self.Trh - OHS_id*self.OHS_step - MB_id*self.MB_step) / self.SB_step)) # from 0 to nbArm-1 + subslot_id = int ((self.time - self.Trh) % 2) # 0 is the CT slot and 1 is the CS slot + + if self.current_MB_id != MB_id: + # one master block occupies 2*nbArm slots. Update master node ID as MB_id + self.current_MB_id = MB_id + + # there may be no master node at the given MB (transmitting on MB_id), + # so we initialize it to an invalid value for later state-check + self.current_master_node = -1 + #prepare the master flags of each player, only when the master ID is updated + master_counter = 0 + for agentID in range(nbPlayer): + # reset the master flag of each node + ret_flag = self.agents[agentID].set_master(self.current_MB_id) + if ret_flag == True: + # if being a master, record its ID + self.current_master_node = agentID + master_counter = master_counter + 1 + + assert master_counter<=1, "error: more than one master" + + if self.current_master_node == -1: + # if there is no master node, the MB block is wasted, see Fig.2 [Sumit2019], + # and for the entire 2*nbArm slots no one will change actions + for agentID in range(nbPlayer): + arm_choice = self.agents[agentID].exploit() + pulls[agentID][arm_choice] = 1 + + collisions = self.resolve_collision(pulls) + # update the UCB ranking + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions) + else: + # a master node exists + if SB_id == 0: + # force transmission to align with the current policy at the first SB + for agentID in range(nbPlayer): + arm_choice = self.agents[agentID].exploit() + pulls[agentID][arm_choice] = 1 + + collisions = self.resolve_collision(pulls) + # update the UCB ranking + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions) + else: + # sub-slot CT or CS for SB=1,...nbArm-1, starting to switch channels + if subslot_id == 0: + # in the channel transit (CT) sub-slot, the master node chooses channel SB_id to switch (notify), + # channel SB_id is the index in its preference list. + # all non-master nodes stay on the their own channels + master_action, master_policy = self.agents[self.current_master_node].set_master_action(SB_id) + pulls[self.current_master_node][master_action] = 1 + + for agentID in range(nbPlayer): + if agentID != self.current_master_node: + # directly get slave response (instead of getting it by observing collisions) + # prepare the arm choice of the slave node for the next round + slave_action = self.agents[agentID].decide_switching(subslot_id, target_arm=master_policy) + pulls[agentID][slave_action] = 1 + + else: #subslot_id == 1: + assert subslot_id == 1, "sub-slot ID is invalid" + # in channel switch sub-slot, the master node tries to transmit on the channel to switch + # non-master nodes stays on their selected channels + for agentID in range(nbPlayer): + if agentID != self.current_master_node: + # only the slave occupying the target channel needs to answer the request + arm_choice = self.agents[agentID].decide_switching(subslot_id) + + if arm_choice == -1: + # use invalid choice to indicate no trnasmission + pulls[agentID,:] = 0 + else: + pulls[agentID][arm_choice] = 1 + else: + arm_choice = self.agents[agentID].selected_arm + pulls[agentID][arm_choice] = 1 + + # observe collision + collisions = self.resolve_collision(pulls) + + # update the UCB ranking + for agentID in range(nbPlayer): + self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions) + # update policy after learning + self.agents[agentID].update_policy(subslot_id, collisions) + else: + # exploitation (no mcuh is mentioned (theoretically) regarding the performance in [Sumit2019]) + for agentID in range(nbPlayer): + arm_choice = self.agents[agentID].exploit() + pulls[agentID][arm_choice] = 1 + + collisions = self.resolve_collision(pulls) + + current_rewards = self.observe_distributed_payoff(game_env, collisions) + total_rewards = np.sum(current_rewards) + return pulls, total_rewards, current_rewards + + # add other algorithms here +__all__ = ["SOC"] \ No newline at end of file diff --git a/MPMAB.py b/MPMAB.py new file mode 100644 index 0000000..f916115 --- /dev/null +++ b/MPMAB.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +""" + +""" +Implementing the class 'MAB' and its children classes, which define the environment of the bandit game (stochastic i.i.d.) +""" + +__author__ = "Wenbo Wang" + +import numpy as np +import scipy.io +from plotutils import prepare_file_name + +from Arms import UniformArm, GaussianArm + +class MP_MAB(object): + """ + i.i.d. multi-arm bandit problem. + The arm value is jointly sampled with the context, and for each player the underlying process may be different. + """ + def __init__(self, context_set, nbArms, nbPlayers): + """New MP-MAB.""" + print("\nCreating a contextual multi-player MAB game...") # DEBUG + + self.nbArms = nbArms + self.nbPlayers = nbPlayers + + self.context_set = context_set + self.context_probabilites = [] + self.context_array = [] # may the context iterable + self.flag_context_prob = False + + self.current_arm_value = np.zeros((nbPlayers, nbArms)) + self.current_context = None + + self.arms = {} + self.max_arm_value = {} # recording the maximum arm value in case of normalization for each context along the time horizon + + self.horizon = 0 + self.flag_sample_prepared = False + + """ + For different joint distributions of (context, arm-value), we may need different initilization variables. + Call one of the following methods for class instantiation with different types of arms instead of __init__. + """ + + @classmethod + def uniform_mab(cls, context_set, nbArms, nbPlayers, dic_lower, dic_upper): + uniform_inst = cls(context_set, nbArms, nbPlayers) + + # For each context and each player, we create an arm + for context in context_set: + player_arm_array = [[None]*nbArms for playerID in range(nbPlayers)] + for playerID in range(nbPlayers): + for armID in range(nbArms): + # if it is a uniform arm + param = {"lower_val": dic_lower[(context, playerID)][armID], + "upper_val": dic_upper[(context, playerID)][armID], + "context": context, + "playerID": playerID, + "armID": armID } + player_arm_array[playerID][armID] = UniformArm(param) +# print("size of the object array: ", len(arm_array))#debug + + uniform_inst.arms[context] = player_arm_array +# print("size of the object array for context: ", context, ": (", len(player_arm_array), ",", len(player_arm_array[0]), ")")#debug + + return uniform_inst + + @classmethod + def gaussian_mab(cls, context_set, nbArms, nbPlayers, dic_mean, dic_sigma): + gaussian_inst = cls(context_set, nbArms, nbPlayers) + + # For each context and each player, we create an arm + for context in context_set: + player_arm_array = [[None]*nbArms for playerID in range(nbPlayers)] + for playerID in range(nbPlayers): + for armID in range(nbArms): + # if it is a uniform arm + param = {"mu": dic_mean[(context, playerID)][armID], + "sigma": dic_sigma[(context, playerID)][armID], + "context": context, + "playerID": playerID, + "armID": armID } + player_arm_array[playerID][armID] = GaussianArm(param) +# print("size of the object array: ", len(arm_array))#debug + + gaussian_inst.arms[context] = player_arm_array +# print("size of the object array for context: ", context, ": (", len(player_arm_array), ",", len(player_arm_array[0]), ")")#debug + + return gaussian_inst + + + def set_discrete_context_prob(self, context_prob): + """ + assign arbitrary probabilities to contexts + """ + if set(context_prob.keys()) != self.context_set: + raise Exception("probability values do not match the set of context") + + self.context_array = np.array(list(context_prob.keys())) + + self.context_probabilites = np.array(list(context_prob.values())) + self.context_probabilites = self.context_probabilites / np.sum(self.context_probabilites) # normalize + + self.flag_context_prob = True + + def get_discrete_context_prob(self): + if self.flag_context_prob: + return self.context_array, self.context_probabilites + else: + prob = np.ones(len(self.context_set)) + return np.array(list(self.context_set)), prob / np.sum(prob) + + """Draw samples""" + def draw_sample(self, t=None): + """ + Draw samples for all the player-arm pairs in a given sampled context. + We enforce that the arm values are drawn in the same global context. + """ + + # context is finite, so here we can adopt a separate discrete (e.g., uniform) distribution for context evolution + # in the real-world situation context-arm-value can be seen as being sampled from a joint distribution + if self.flag_context_prob == False: + context = np.random.choice(tuple(self.context_set)) # uniform randomly sampled + else: + context = np.random.choice(self.context_array, p=self.context_probabilites) + + player_arm_array = self.arms[context] + for playerID in range(self.nbPlayers): + for armID in range(self.nbArms): + if player_arm_array[playerID][armID].playerID != playerID or player_arm_array[playerID][armID].armID != armID: + raise Exception("player ID and arm ID do not match!") + + self.current_arm_value[playerID][armID] = player_arm_array[playerID][armID].draw_sample(context, t) + +# print("Sampling arms completes") + self.current_context = context + + return self.current_context,self.current_arm_value + + """get the samples in advance""" + def prepare_samples(self, horizon, flag_progress_bar=False): + if horizon <= 0: + raise Exception("Input horizon is not valid") + + self.horizon = horizon + + for context in self.context_set: + for playerID in range(self.nbPlayers): + for armID in range(self.nbArms): + # for each player-arm pair, prepare its sample sequences in each context + self.arms[context][playerID][armID].prepare_samples(horizon) + + self.max_arm_value[context] = np.ones(horizon) # + + self.flag_sample_prepared = True + + """utility functions""" + def get_param(self, context): + lower = np.zeros((self.nbPlayers, self.nbArms)) + upper = np.zeros((self.nbPlayers, self.nbArms)) + means = np.zeros((self.nbPlayers, self.nbArms)) + variance = np.zeros((self.nbPlayers, self.nbArms)) + + for playerID in range(self.nbPlayers): + for armID in range(self.nbArms): + lower[playerID][armID] = self.arms[context][playerID][armID].lower + upper[playerID][armID] = self.arms[context][playerID][armID].upper + means[playerID][armID] = self.arms[context][playerID][armID].mean + variance[playerID][armID] = self.arms[context][playerID][armID].variance + + return lower, upper, means, variance + + def get_current_param(self, t=None): + """ + Get the current sampling parameters of arms in the given context. + """ + if self.current_context is None: + raise Exception("The MAB game is not started.") + + return self.get_param(self.current_context) + + """ + + """ + def save_environment(self, file_name=None): + if self.flag_sample_prepared == False: + print("No data is prepared") + else: + # TODO: we cannot select the path yet, put the file to the default directory "\results" of the current path + file_path = prepare_file_name("{}-{}".format(file_name if file_name is not None else "", "env"), + alg_name = None, ext_format = "mat") + + mdict = {} + for context in self.context_set: + for playerID in range(self.nbPlayers): + for armID in range(self.nbArms): + dict_key = "{}-{}-{}".format(context, playerID, armID) + mdict[dict_key] = self.arms[context][playerID][armID].prepared_samples + + scipy.io.savemat(file_path, mdict) + + def load_environment(self, file_path, horizon=None): + mdict = scipy.io.loadmat(file_path) + + for key in mdict: + key_strings = key.split('_') + context = key_strings[0] + playerID = int(key_strings[1]) + armID = int(key_strings[2]) + + self.arms[context][playerID][armID].prepared_samples = mdict[key] + + self.flag_sample_prepared = True + +# ploting methods \ No newline at end of file diff --git a/PlayResult.py b/PlayResult.py new file mode 100644 index 0000000..44eba75 --- /dev/null +++ b/PlayResult.py @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +This file defines the data recorder and parts of the virtualization mechanisms in our simulations. +class ResultMultiPlayers +""" + +# This file implements the data recorder for each single MAB algorithm + +__author__ = "Wenbo Wang" + +import numpy as np +import matplotlib.pyplot as plt +import scipy.io + +from plotutils import make_markers, make_palette, display_legend, prepare_file_name +from datetime import datetime + +if __name__ == '__main__': + print("Warning: this script 'PlayerResult.py' is NOT executable..") # DEBUG + exit(0) + +DELTA_T_PLOT = 50 +FIGURE_SIZE = (5, 3.75) + +class ResultMultiPlayers(object): + """ ResultMultiPlayers accumulators, for the multi-players case. """ + + def __init__(self, alg_name, context_set, player_no, arm_no, horizon): + """ Create ResultMultiPlayers.""" + self.alg_name = alg_name + + self.nbPlayer = player_no + self.nbArm = arm_no + self.context_set = context_set + self.horizon = horizon + + self.choices = np.zeros((player_no, horizon), dtype=int) #: Store all the arm choices of all the players + self.sampled_rewards = np.zeros((player_no, horizon)) #: Store all the rewards of all the players, to compute the mean + self.total_rewards = np.zeros(horizon) + + self.context_history = [None]*horizon + + self.pull_history = np.zeros((player_no, arm_no, horizon), dtype=int) #: Is a map of 0-1 for players and arms + self.collisions = np.zeros((arm_no, horizon), dtype=int) #: Store the number of collisions on all the arms + + self.delta_t_plot = 1 if self.horizon <= 10000 else DELTA_T_PLOT + + def store(self, time, context, choices, sampled_rewards, total_rewards, pulls, collision=None): + """ Store results.""" + self.context_history[time] = context + + self.choices[:, time] = choices + self.sampled_rewards[:, time] = sampled_rewards + self.total_rewards[time] = total_rewards + + self.pull_history[:, :, time] = pulls + + if collision is None: + self.collisions[:, time] = 0 + else: + self.collisions[:, time] = collision + + def reset_record(self, horizon=None): + if horizon is not None: + self.horizon = horizon + + self.choices = np.zeros((self.nbPlayer, self.horizon), dtype=int) #: Store all the arm choices of all the players + self.sampled_rewards = np.zeros((self.nbPlayer, self.horizon)) #: Store all the rewards of all the players, to compute the mean + self.total_rewards = np.zeros(self.horizon) + + self.context_history = [None]*self.horizon + + self.pull_history = np.zeros((self.nbPlayer, self.nbArm, self.horizon), dtype=int) #: Is a map of 0-1 for players and arms + self.collisions = np.zeros((self.nbArm, self.horizon), dtype=int) #: Store the number of collisions on all the arms + + + def dump2disk(self, file_name=None): + """Save the result into a Matlab .mat file""" + file_path = prepare_file_name(file_name, self.alg_name, "mat") + + scipy.io.savemat(file_path, mdict={"nbPlayer": self.nbPlayer, "nbArm": self.nbArm, "context_set": list(self.context_set), + "horizon": self.horizon, "context_history": self.context_history, + "sampled_reward": self.sampled_rewards, + "choices": self.choices, "collisions": self.collisions}) + + + """ + The following methods are used for plotting/saving figures. + Other figure plotting methods can be found in plotutils.py + """ + def plot_cumu_rewards(self, horizon=None, other_results=None, semilogx=False, save_fig=False, save_data=False): + #other_results are used for comparison with other algorithms + if other_results is not None: + #the other results should have the same player/arm numbers + for idx in range(len(other_results)): + nbPlayer = other_results[idx].nbPlayer + nbArm = other_results[idx].nbArm + + if nbPlayer != self.nbPlayer or nbArm != self.nbArm: + raise Exception("environment does not match!") + + nbCurves = self.nbPlayer * (1 + len(other_results)) + else: + nbCurves = self.nbPlayer + + """Plot the decentralized rewards, for each player.""" + fig = plt.figure(figsize=FIGURE_SIZE) + ymin = 0 + colors = make_palette(nbCurves) + markers = make_markers(nbCurves) + + if horizon is None: + horizon = self.horizon + + X = np.arange(start=0, stop=horizon, step=1) + + #plot the locally stored values + cumu_rewards = np.cumsum(self.sampled_rewards, axis=1) + + curve_idx = 0 + for playerId in range(self.nbPlayer): + label = '{}: Player {:>2}'.format(self.alg_name, playerId + 1) + Y = cumu_rewards[playerId, :horizon] + Y = Y / (X+1) + + ymin = min(ymin, np.min(Y)) + if semilogx: + plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + else: + plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + + curve_idx = curve_idx + 1 + + if other_results is not None: + for idx in range(len(other_results)): + cumu_rewards = np.cumsum(other_results[idx].sampled_rewards, axis=1) + for playerId in range(other_results[idx].nbPlayer): + label = '{}: Player {:>2}'.format(other_results[idx].alg_name, playerId + 1) + Y = cumu_rewards[playerId, :horizon] + Y = Y / (X+1) + ymin = min(ymin, np.min(Y)) + if semilogx: + plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + else: + plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + + curve_idx = curve_idx + 1 + + display_legend() + plt.xlabel("Number of rounds", fontsize=10) + plt.ylabel("Average reward over time", fontsize=10) + +# plt.title("Individual Average Rewards Over Time", fontsize=10) + if save_data: + print("saving figure...") + self.dump2disk() + + if save_fig: + self.save_figure(file_name = "indv_avg_result", fig=fig) + + return fig + + def plot_avg_reward(self, horizon=None, other_results=None, semilogx=False, save_fig=False, save_data=False): + #other_results are used for comparison with other algorithms + if other_results is not None: + #the other results should have the same player/arm numbers + nbCurves = 1 + len(other_results) + else: + nbCurves = 1 + + """Plot the average rewards, for each player in each algorithm.""" + fig = plt.figure(figsize=FIGURE_SIZE) + ymin = 0 + colors = make_palette(nbCurves) + markers = make_markers(nbCurves) + + if horizon is None: + horizon = self.horizon + + X = np.arange(start=0, stop=horizon, step=1) + + #plot the locally stored values + curve_idx = 0 + cumu_rewards = np.cumsum(self.total_rewards[:horizon]) + + label = '{}'.format(self.alg_name) + Y = cumu_rewards / (X+1) / self.nbPlayer + + ymin = min(ymin, np.min(Y)) + if semilogx: + plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + else: + plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + + if other_results is not None: + for idx in range(len(other_results)): + curve_idx = curve_idx + 1 + cumu_rewards = np.cumsum(other_results[idx].total_rewards[:horizon]) + + label = '{}'.format(other_results[idx].alg_name) + Y = cumu_rewards / (X+1) / other_results[idx].nbPlayer + + ymin = min(ymin, np.min(Y)) + if semilogx: + plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + else: + plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], + marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1) + + display_legend() + plt.xlabel("Number of rounds", fontsize=10) + plt.ylabel("Average reward over time", fontsize=10) +# plt.title("Individual Average Rewards Over Time", fontsize=10) + + if save_data: + print("saving figure data...") + self.dump2disk() + + if save_fig: + print("saving figure...") + self.save_figure(file_name = "avg_result", fig=fig) + + return fig + + def save_figure(self, file_name=None, formats={'pdf', 'png'}, fig=None): + now = datetime.now() + + for form in formats: + path = prepare_file_name(file_name, self.alg_name, form) + try: + current_time = now.strftime("%H:%M:%S") + plt.savefig(path, bbox_inches="tight") + print("Figure saved! {} at {} ...".format(path, current_time)) + + except Exception as exc: + print("Could not save figure to {} due to error {}!".format(path, exc)) # DEBUG + \ No newline at end of file diff --git a/Players.py b/Players.py new file mode 100644 index 0000000..fa00380 --- /dev/null +++ b/Players.py @@ -0,0 +1,852 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +""" + +# This file defines the player behavior for a series of MP-MAB algorithms + +__author__ = "Wenbo Wang" + +import numpy as np + +from loggingutils import info_logger + +if __name__ == '__main__': + print("Warning: this script 'Player.py' is NOT executable..") # DEBUG + exit(0) + +class Player(object): + """ Base class for a player class.""" + + def __init__(self, param): + """ + Base class for a player class. + For clarity, we require each child class to re-implement completely the __init__() method. + """ + self.horizon = param["horizon"] #: if the horizon is not known in advance, set it to None. + self.nbArm = param["nbArm"] + + #for arm of a specific context-player + self.context = param["context"] + self.playerID = param["playerID"] + + self.arm_estimate = np.zeros(self.nbArm) + + # --- Printing + def __str__(self): + return self.__class__.__name__ + + # --- functionalities + def explore(self, context = None, time = None): + print("decision() should be implemented for agent adopting a particular algorithm.") + + + def learn_arm_value(self, context = None, arm_values = None, collisions = None): + print("learn_arm_value() should be implemented for agent adopting a particular algorithm.") + + def exploit(self, context = None, time=None): + print("exploit() should be implemented for agent adopting a particular algorithm.") + + def reset(self): + print("reset() should be implemented for agent adopting a particular algorithm.") + +class MusicChairPlayer(Player): + """ + Class MusicChairPlayer for a player (agent) adopting the Music Chair algorithm. + Implemented based on the paper "Multi-Player Bandits – a Musical Chairs Approach", by Jonathan Rosenski and Ohad Shamir @2015 [Rosenski2015] + (https://arxiv.org/abs/1512.02866). + Note that this algorithm is designed for multi-player only and for contextual bandit it adapts to the condition of unobservable context. + """ + + def __init__(self, param): + self.horizon = param["horizon"] #: if the horizon is not known in advance, set it to None. + self.nbArm = param["nbArm"] + + #for arm of a specific context-player + self.context = None # not used by the player + self.playerID = param["playerID"] + + if "epsilon" in param: + self.epsilon = param["epsilon"] + else: + self.epsilon = 0.1 + + if "delta" in param: + self.delta = param["delta"] + else: + self.delta = 0.05 + + self.accumulated_value = np.zeros(self.nbArm) + self.arm_estimate = np.zeros(self.nbArm) # \tilde{\mu}_i in [Rosenski2015] + self.nb_collision = 0 # number of observed collision, C_{T_0} in [Rosenski2015] + self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff, o_i in [Rosenski2015] + + if "T0" in param.keys() and param["T0"] > 0: + self.T0 = param["T0"] + else: + self.T0 = self.get_optimalT0(self.nbArm, self.horizon, self.epsilon, self.delta) + + self.time = 0 + + self.sorted_chair = None + self.selected_arm = 0 + + self.flag_seated = False + self.selected_chair = 0 + self.estimated_nbPlayer = 0 + + def reset(self): + self.accumulated_value = np.zeros(self.nbArm) + self.arm_estimate = np.zeros(self.nbArm) # \tilde{\mu}_i in [Rosenski2015] + self.nb_collision = 0 # number of observed collision, C_{T_0} in [Rosenski2015] + self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff, o_i in [Rosenski2015] + + self.time = 0 + + self.sorted_chair = None + self.selected_arm = 0 + + self.flag_seated = False + self.selected_chair = 0 + self.estimated_nbPlayer = 0 + + + def get_optimalT0(self, nbArms, horizon=None, epsilon=0.1, delta=0.05): + """ + Estimate T0 for an error probability delta and a bound of gap between the rewards of N-th best arm and the (N+1)-th best arm. + The method is based on Theorem 1 of [Rosenski2015], which requires knowing the number of arms in the game. + + Equation: + \begin{equation} + T_0 = \ceil{\max (\frac{K}{2})\ln(\frac{2K^2}{\delta}), \frac{16K}{\epsilon^2}\ln(\frac{4K^2}{\delta}, \frac{K^2\log(\frac{2}{\delta})}{0.02}) } + \end{equation} + + Remark: note that the last term \frac{K^2\log(\frac{2}{\delta})}{0.02} was written in [Rosenski2015] as \frac{K^2\log(\frac{2}{\delta_2})}{0.02}, which is a typo. + $\delta_2$ should be $\delta$, since $\frac{K^2\log(\frac{2}{\delta_2})}{0.02}$ is derived from $t\ge \frac{\log(2/delta)}{2\epsilon_1^2}$, where + $\epsilon_1^2\ge \frac{0.01}{K^2}$. + + Examples: + + - For K arms, in order to have a constant regret with error probability delta, with the gap condition epsilon, we have + (1) optimalT0(2, None, 0.1, 0.05) = 18459 + (2) optimalT0(6, None, 0.01, 0.05) = 76469 + (3) optimalT0(17, None, 0.01, 0.05) = 273317 + """ + + T0_1 = (nbArms / 2.) * np.log(2 * nbArms**2 / delta) + T0_2 = ((16 * nbArms) / (epsilon**2)) * np.log(4 * nbArms**2 / delta) + T0_3 = (nbArms**2 * np.log(2 / delta)) / 0.02 # delta**2 or delta_2 ? Typing mistake in their paper + T0 = max(T0_1, T0_2, T0_3) + + if horizon is None: + raise Exception("the total number of rounds is not known.") + elif T0>= horizon: + raise Exception("the total number of rounds is too small for exploration.") + + return int(np.ceil(T0)) + + def explore(self, context = None, time = None): + if time is None or time != self.time: + raise Exception("Playing round does not match.") + + #update time + self.time = time + 1 + + if self.time <= self.T0: + #pahse of exploration + self.selected_arm = np.random.randint(self.nbArm) + + return self.selected_arm + + def learn_arm_value(self, context = None, arm_values = None, collisions = None): + # context is not used in this algorithm + # must be called after explore + if len(arm_values) != self.nbArm or len(collisions) != self.nbArm: + raise Exception("inputs are invalid.") + + if self.time <= self.T0: + # get the reward of exploration phase + if collisions[self.selected_arm] > 1: + #selects an arm with collision + self.nb_collision = self.nb_collision + 1 + else: + armID = self.selected_arm + self.nb_observation[armID] = self.nb_observation[armID] + 1 + self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID] + + def exploit(self, context = None, time=None): + if time is None or time != self.time: + raise Exception("Playing round does not match.") + + #update time + self.time = time + 1 + + if self.time > self.T0 and self.time <=self.horizon: + if self.sorted_chair is None: + # prepare only once + for armID in range(self.nbArm): + if self.nb_observation[armID] != 0: + self.arm_estimate[armID] = self.accumulated_value[armID] / self.nb_observation[armID] + + # if the estimated player nubmer is not obtained, calculate it first + # Equation for N^* is given in Alg. 1 of [Rosenski2015] + self.estimated_nbPlayer = int(round(1 + np.log((self.T0 - self.nb_collision) / self.T0) / np.log(1. - 1. / self.nbArm))) + if self.estimated_nbPlayer > self.nbArm: + self.estimated_nbPlayer = self.nbArm # force the number of players to be less than the number of arms + + # sort their index by empirical arm values (means) in decreasing order + sorted_arms = np.argsort(-self.arm_estimate) # FIXED among the best M arms! + self.sorted_chair = sorted_arms[:self.estimated_nbPlayer] + + if self.estimated_nbPlayer == 0: + raise Exception("estimated arm number is invalid.") + + if self.flag_seated == False: + self.selected_chair = np.random.randint(self.estimated_nbPlayer) + self.selected_arm = self.sorted_chair[self.selected_chair] + else: + pass + + return self.selected_arm + + def update_musical_chair(self, time = None, collisions = None): + if time is None or time <= self.T0: + raise Exception("Playing round does not match.") + + if self.flag_seated == False and collisions[self.selected_arm] == 1: + self.flag_seated = True + + +STATE_EXPLORE = 0 +STATE_LEARN = 1 +STATE_EXPLOIT = 2 + +STATE_CONTENT = 0 +STATE_HOPEFUL = 1 +STATE_WATCHFUL = 2 +STATE_DISCONTENT = 3 + +class TnEPlayer(Player): + """ + Class TnEPlayer for a player (agent) adopting the trial-and-error algorithm. + Implemented for the paper "Distributed Learning for Interference Avoidance as aContextual Multi-player Multi-armed Bandit Game", + by Wenbo Wang et al. [Wang2019] + """ + def __init__(self, param): + if "context_set" not in param.keys(): + raise Exception("context set is not given") + else: + self.context_set = param["context_set"] # has to be larger than or equal to 1 + + self.horizon = param["horizon"] if "horizon" in param.keys() else 0 + + #for arm of a specific context-player + self.playerID = param["playerID"] + self.nbArm = param["nbArm"] + + #used in Eq.(6) in [Wang2019] + self.xi = param["xi"] + #used in Eq. (10) and Eq. (11) in [Wang2019] + self.epsilon = param["epsilon"] + + self.rho = param["rho"] #no longer used in the new algorithm + + #log-linear function parameters, adopted from Young's paper "learning efficient Nash equilibrium in distributed systems" + self.alpha11 = -0.001 if param['alpha11'] is None else param['alpha11']# F(u)<1/2M + self.alpha12 = 0.1 if param['alpha12'] is None else param['alpha12'] + + self.alpha21 = -0.01 if param['alpha21'] is None else param['alpha21']# G(u)<1/2 + self.alpha22 = 0.5 if param['alpha22'] is None else param['alpha22'] + + # Initialization + self.nb_observation = {} + self.accumulated_value = {} + self.arm_estimate = {} + + self.learning_state = {} +# self.visit_frequency = {} + self.ptbd_arm_value = {} + self.selected_arm = 0 + + self.nb_state_visit = {} + self.nb_state_aligned = {} + + self.current_state = {} + self.reference_reward = {} + + self.best_policy = {} + + for context in self.context_set: + # for arm-value estimation + self.nb_observation[context] = np.zeros(self.nbArm) + self.accumulated_value[context] = np.zeros(self.nbArm) + # the static game is formulated on arm_estimate + self.arm_estimate[context] = np.zeros(self.nbArm) + + self.learning_state[context] = STATE_EXPLORE + + self.ptbd_arm_value[context] = np.zeros(self.nbArm) # perturbed arm values + + self.nb_state_visit[context] = np.zeros((4, self.nbArm)) # for debugging purpose + self.nb_state_aligned[context] = np.zeros(self.nbArm) + """ + One example of the intermediate states: + --- for a game of 2 arms, we have that for a given context (payoff is stored in self.reference_reward) + (0, 0, 0): Content, arm 0, payoff = 0, + (1, 0, 0): Hopeful, arm 0, payoff = 0, + (2, 0, 0): Watchful, arm 0, payoff = 0, + (3, 0, 0): Discontent, arm 0, payoff = 0, + + (0, 0, 1): Content, arm 0, payoff = arm-value, + (1, 0, 1): Hopeful, arm 0, payoff = arm-value, + (2, 0, 1): Watchful, arm 0, payoff = arm-value, + (3, 0, 1): Discontent, arm 0, payoff = arm-value, + + (0, 1, 0): Content, arm 1, payoff = 0, + (1, 1, 0): Hopeful, arm 1, payoff = 0, + (2, 1, 0): Watchful, arm 1, payoff = 0, + (3, 1, 0): Discontent, arm 1, payoff = 0, + + (0, 1, 1): Content, arm 1, payoff = arm-value, + (1, 1, 1): Hopeful, arm 1, payoff = arm-value, + (2, 1, 1): Watchful, arm 1, payoff = arm-value, + (3, 1, 1): Discontent, arm 1, payoff = arm-value, + + """ + + self.current_state[context] = [STATE_DISCONTENT, 0] #set as a default 3-tuple: (mood, reference action, reference payoff = 0) + self.reference_reward[context] = 0# record the real reference reward of the state + + self.best_policy[context] = 0 + + def reset(self): + for context in self.context_set: + # for arm-value estimation + self.nb_observation[context] = np.zeros(self.nbArm) + self.accumulated_value[context] = np.zeros(self.nbArm) + # the static game is formulated on arm_estimate + self.arm_estimate[context] = np.zeros(self.nbArm) + + self.learning_state[context] = STATE_EXPLORE + self.ptbd_arm_value[context] = np.zeros(self.nbArm) # perturbed arm values + + self.nb_state_visit[context] = np.zeros((4, self.nbArm)) + self.nb_state_aligned[context] = np.zeros(self.nbArm) + + #set as a default 3-tuple: (mood, reference action, reference payoff = 0 or none-zero) + self.current_state[context] = [STATE_DISCONTENT, 0] + self.reference_reward[context] = 0 # record the real reference reward of the state + + self.best_policy[context] = 0 + + + # --- functionalities + def explore(self, context=None, time=None): + """ + explore() only update when no collision occurs on the selected arm, see Eq. (5) of [Wang2019] + will update the value in learn_arm_value() + """ + assert self.learning_state[context] == STATE_EXPLORE, "learning state does not match"#debug + + self.selected_arm = np.random.randint(self.nbArm) + + return self.selected_arm + + def learn_arm_value(self, context=None, arm_values=None, collisions=None): + # must be called after explore + assert self.learning_state[context] == STATE_EXPLORE, "learning state does not match"#debug + assert len(arm_values) == self.nbArm and len(collisions) == self.nbArm, "inputs are invalid." + assert collisions[self.selected_arm] != 0, "arm selection error." + + if collisions[self.selected_arm] == 1: + armID = self.selected_arm + self.nb_observation[context][armID] = self.nb_observation[context][armID] + 1 # obtain a new valid arm-value observation + self.accumulated_value[context][armID] = self.accumulated_value[context][armID] + arm_values[armID] + + self.arm_estimate[context][armID] = self.accumulated_value[context][armID] / self.nb_observation[context][armID] + else: + pass # do not update + + return self.arm_estimate[context] + + def set_internal_state(self, context=None, input_state=STATE_EXPLORE): + # input_state: 0 --explore, 1 -- trial-and-error, 2 -- exploitation + if input_state < STATE_EXPLORE or input_state > STATE_EXPLOIT: + raise Exception("input state is invalid") + + if input_state == STATE_EXPLORE: + pass + elif input_state == STATE_LEARN: + self.ptbd_arm_value[context][:] = 0 + elif input_state == STATE_EXPLOIT: + # do it once for all + self.get_best_policy(context) + else: + raise Exception("input is not valid.") + + self.learning_state[context] = input_state + + + def perturb_estimated_payoff(self, context=None, epoch=None): + """ + The perturbation of estimated arm values guarantees that there is a unique social optimal equialibrium for the static game. + See Proposition 3 in [Wang2019] + """ + assert epoch is not None and epoch > 0, "the epoch index is invalid" + + #get a perturbation, which is only computed at the beginning of the learning phase in each each + perturbation = np.random.random_sample(self.nbArm) * self.xi/epoch + assert len(perturbation) == self.nbArm, "the dimension of perturbation is invalid" + + self.ptbd_arm_value[context] = self.arm_estimate[context] + perturbation +# self.init_tne_states(context) + + return self.ptbd_arm_value[context] + + def init_tne_states(self, context=None, starting_state=None): + """ + We have 4 states: Content (C), Hopeful (H), Watchful (W) and Discontent (D). + For each agent in a given context, the total # of local intermediate states is 4 * nbArm + + """ + # if we turn (1) on, in each exploration phase the learning algorithm will only use the outcomes of game play in this epoch. + self.nb_state_visit[context] = np.zeros((4, self.nbArm)) # (1): tracks the frequency state visits + self.nb_state_aligned[context] = np.zeros(self.nbArm) + + # set as a default 3-tuple: (mood=discontent, reference action (arm)=0, reference payoff = 0 or zero) + if starting_state is None: + self.current_state[context] = [STATE_DISCONTENT, 0] + + # reference_reward records the real reference reward of the state, + # initialization sets all players to select arm 0 so the reward is 0 due to collision + self.reference_reward[context] = 0 + else: + self.current_state[context] = starting_state + self.reference_reward[context] = 0 + + def learn_policy(self, context=None, time=None): + #note that here time is not used + assert context is not None, "context is not given" #debug + assert self.learning_state[context] == STATE_LEARN, "learning state does not match" #debug + + self.selected_arm = self.update_static_game_action(context, self.current_state[context]) + + return self.selected_arm + + def update_static_game_action(self, context=None, current_state=None): + """ + Update action in the static game according to Eq.(9) + """ + if current_state[0] == STATE_CONTENT: # if content + #content, Eq. (9), experiment with prob. epsilon + seed = np.random.random_sample() + if seed > self.epsilon: + action = current_state[1] + else: + remaining_actions = list(range(self.nbArm)) + remaining_actions.pop(current_state[1]) + action_id = np.random.randint(self.nbArm - 1) + action = remaining_actions[action_id] + assert action != current_state[1], "sampled action is invalid." + +# print("player {} taking action arm {}".format(self.playerID, action)) #debug + + elif current_state[0] == STATE_HOPEFUL or current_state[0] == STATE_WATCHFUL: # if hopeful or watchful + #hopeful or watchful + action = current_state[1] # do not change + elif current_state[0] == STATE_DISCONTENT: # if discontent + #discontent + action = np.random.randint(self.nbArm) + assert action >=0 and action < self.nbArm, "sampled action is invalid." + else: + raise Exception("the mood of the current state is invalid") + + return action + + def update_game_state(self, context, collisions): + """ + Update the state of agent in the static game according to Alg. 2 in [Wang2019]. + Note that self.current_state[context] is in the form of (mood, arm, value) + """ + current_reward = 0 # this is the reward of the static game + if collisions[self.selected_arm] == 1: + current_reward = self.ptbd_arm_value[context][self.selected_arm] + + if self.current_state[context][0] == STATE_CONTENT:# if content + # the current mood is content + if self.selected_arm != self.current_state[context][1]: + if current_reward > self.reference_reward[context]: + G_delta_u = (self.alpha21 * (current_reward - self.reference_reward[context]) + self.alpha22) + threshold = self.epsilon ** G_delta_u + + #update according to Eq. (10) with probability + sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold]) + + if sampled_result == 0: + self.current_state[context][1] = self.selected_arm #update reference action + self.reference_reward[context] = current_reward + else: + pass + else: + pass + else: # no experimenting + if current_reward > self.reference_reward[context]: + self.current_state[context][0] = STATE_HOPEFUL # hopeful + elif current_reward < self.reference_reward[context]: + self.current_state[context][0] = STATE_WATCHFUL # watchful + else: # current_reward == self.reference_reward[context]: + pass # do nothing + + elif self.current_state[context][0] == STATE_HOPEFUL: # if hopeful + if current_reward > self.reference_reward[context]: + self.current_state[context][0] = STATE_CONTENT # set to content + self.reference_reward[context] = current_reward + elif current_reward == self.reference_reward[context]: + self.current_state[context][0] = STATE_CONTENT + else:# current_reward < self.reference_reward[context]: + self.current_state[context][0] = STATE_WATCHFUL # set to watchful + + elif self.current_state[context][0] == STATE_WATCHFUL: # if watchful + if current_reward > self.reference_reward[context]: + self.current_state[context][0] = STATE_HOPEFUL # set to hopeful + elif current_reward == self.reference_reward[context]: + self.current_state[context][0] = STATE_CONTENT + else:# current_reward < self.reference_reward[context]: + self.current_state[context][0] = STATE_DISCONTENT # set to discontent + + elif self.current_state[context][0] == STATE_DISCONTENT: + if current_reward == 0: + pass# remain discontent, keep exploring + else: + F_u = self.alpha11 * current_reward + self.alpha12 # update with the probability in Eq. (11) + threshold = self.epsilon ** F_u + + sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold]) + if sampled_result == 0: + self.current_state[context][0] = STATE_CONTENT + self.current_state[context][1] = self.selected_arm #update reference action + + self.reference_reward[context] = current_reward + else: + pass #stay with the same state + + else: + raise Exception("unexpected state.") + + #update the number of visited states + id_mood = self.current_state[context][0] + id_action = self.current_state[context][1] + + self.nb_state_visit[context][id_mood][id_action] = 1 + self.nb_state_visit[context][id_mood][id_action] + + if id_mood == STATE_CONTENT and self.reference_reward[context] == current_reward: + self.nb_state_aligned[context][id_action] = 1 + self.nb_state_aligned[context][id_action] + + def exploit(self, context = None, time=None): + assert context is not None, "context is None" + assert self.learning_state[context] == STATE_EXPLOIT, "learning state does not match" + assert time is not None, "time is None" + +# self.selected_arm = self.get_best_policy(context) # if turning this on, we'll compute the best policy each time + + self.selected_arm = self.best_policy[context] + return self.selected_arm #return the action + + def get_best_policy(self, context = None): + assert context is not None, "context is None" + + mat_frequency = self.nb_state_aligned[context] # only count the Content mood + + id_max = np.argmax(mat_frequency) #over the remaining action/arm axis + + self.best_policy[context] = id_max + +# print("TnE - {}: Player {}: arm {}".format(context, self.playerID, id_max)) # debug + + return id_max + +""" +Implemented based on the method proposed in the paper, [Bistritz2019] +"Game of Thrones: Fully Distributed Learning for Multi-Player Bandits", by Ilai Bistritz and Amir Leshem, +NeurIPS2019 +""" +class GoTPlayer(Player): # with almost the same structure of TnE + def __init__(self, param): + self.horizon = param["horizon"] if "horizon" in param.keys() else 0 + + #for arm of a specific context-player + self.playerID = param["playerID"] + self.nbArm = param["nbArm"] + self.nbPlayer = param["nbPlayer"] # used for determining the probaibliy of intermediate state switching + + #used in Eq. (10) and Eq. (11) in [Wang2019] + self.epsilon = param["epsilon"] + + # Initialization + self.nb_observation = np.zeros(self.nbArm) + self.accumulated_value = np.zeros(self.nbArm) + self.arm_estimate = np.zeros(self.nbArm) + + self.learning_state = STATE_EXPLORE + + self.selected_arm = 0 + self.nb_state_visit = np.zeros((2, self.nbArm)) + + self.current_state = [STATE_DISCONTENT, 0] + + self.max_u = 1 + self.best_policy = 0 + + # requirement from [Bistritz2019], the discrepancy of sum of maximum value and the social-optimal value + self.c = 1.2 # this is an estimation + self.pert_factor = self.c * self.nbPlayer +# self.reference_reward = 0 # the current version of GoT doesn't need a reference reward + + def reset(self): + self.nb_observation = np.zeros(self.nbArm) + self.accumulated_value = np.zeros(self.nbArm) + + # the static game is formulated on arm_estimate + self.arm_estimate = np.zeros(self.nbArm) + + self.learning_state = STATE_EXPLORE + + self.selected_arm = 0 + self.nb_state_visit = np.zeros((2, self.nbArm)) + + #set as a default 3-tuple: (mood, reference action, reference payoff = 0 or none-zero) + self.current_state = [STATE_DISCONTENT, 0] +# self.reference_reward = 0 + + self.max_u = 1 + self.best_policy = 0 + + # --- functionalities + def explore(self, context = None, time = None): + """ + we will update the estimated arm values in function learn_arm_value() + context and time are not used for this version + """ + assert self.learning_state == STATE_EXPLORE, "learning state does not match"#debug + + self.selected_arm = np.random.randint(self.nbArm) + + return self.selected_arm + + def learn_arm_value(self, context = None, arm_values = None, collisions = None): + # must be called after explore + assert self.learning_state == STATE_EXPLORE, "learning state does not match"#debug + assert len(arm_values) == self.nbArm and len(collisions) == self.nbArm, "inputs are invalid" + assert collisions[self.selected_arm] != 0, "arm selection error" + + if collisions[self.selected_arm] == 1: + armID = self.selected_arm + self.nb_observation[armID] = self.nb_observation[armID] + 1 # obtain a new valid arm-value observation + self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID] + + self.arm_estimate[armID] = self.accumulated_value[armID] / self.nb_observation[armID] + else: + pass # do nothing + + return self.arm_estimate + + def set_internal_state(self, context=None, input_state=STATE_EXPLORE): + # GoT does not use context information + # input_state: 0 --explore, 1 -- trial-and-error, 2 -- exploitation + if input_state < STATE_EXPLORE or input_state > STATE_EXPLOIT: + raise Exception("input state is invalid") + + if input_state == STATE_EXPLORE: + pass + elif input_state == STATE_LEARN: + pass + elif input_state == STATE_EXPLOIT: + self.get_best_policy() # calculate once far all + else: + raise Exception("input is not valid.") + + self.learning_state = input_state + + def initalize_static_game(self, epoch=None, context=None): + """ + State initialization is done in init_got_states, + this function is to be removed in the future + """ + id_max_u = np.argmax(self.arm_estimate) + + self.max_u = self.arm_estimate[id_max_u] + +# print("id {} - max u {}".format(id_max_u, self.max_u))# debug + + def init_got_states(self, context=None, starting_state=None): + """ + We have 2 states: Content (C) and Discontent (D). + For each agent in each context, the total # of local intermediate state is 2 * nbArm + + + starting_state is used for initializing the state at the beginnning of the epoch + """ + # if we turn (1) on, in each exploration phase the learning algorithm will only use the outcomes of game play in this epoch. + self.nb_state_visit = np.zeros((2, self.nbArm)) # (1): tracks the frequency of state visits + + if starting_state is None: + # set as a default 3-tuple: (mood=discontent, reference action (arm)=0, reference payoff = 0 or zero) + self.current_state = [STATE_DISCONTENT, 0] + + # reference_reward records the real reference reward of the state, + # initialization sets all players to select arm 0 so the reward is 0 due to collision +# self.reference_reward = 0 + else: + self.current_state = starting_state +# self.reference_reward = 0 # need to learn and update the reference reward for the new static game + + + def learn_policy(self, context=None, time=None): + #note that here time is not used + assert self.learning_state == STATE_LEARN, "learning state does not match" #debug + + self.selected_arm = self.update_static_game_action(None, self.current_state) + + return self.selected_arm + + + def update_static_game_action(self, context=None, current_state=None): + """ + Update action in the static game + """ + if current_state[0] == STATE_CONTENT: # if content + #content, Eq. (8) Alg.2 of [Bistritz2019], experiment with prob. epsilon + tmp_factor = self.pert_factor # perturbation factor + + # sampling method 1 + prob_no_change = 1 - self.epsilon**(tmp_factor) + prob_rand_action = self.epsilon**(tmp_factor) / (self.nbArm - 1) + + action_array = list(range(self.nbArm)) + prob_array = np.zeros(self.nbArm) + prob_array[:] = prob_rand_action + prob_array[current_state[1]] = prob_no_change + + action = np.random.choice(action_array, size=None, p=prob_array) + + # sampling method 2 +# seed = np.random.random_sample() +# if seed <= 1 - self.epsilon**(tmp_factor): +# # at content state a player does not experiment frequently +# action = current_state[1] +# else: +# remaining_actions = list(range(self.nbArm)) +# remaining_actions.pop(current_state[1]) +# action_id = np.random.randint(self.nbArm - 1) +# action = remaining_actions[action_id] +# assert action != current_state[1], "sampled action is invalid." + + elif current_state[0] == STATE_DISCONTENT: # if discontent + #discontent + action = np.random.randint(self.nbArm) + assert action >=0 and action < self.nbArm, "sampled action is invalid." + else: + raise Exception("the mood of the current state is invalid") + + return action + + def update_game_state(self, context, collisions, flag_record_frequency=False): + """ + Ignore any context. The GoT algorithm is designed for the MP-MAB in stochastic environment w/o context + """ + current_reward = 0 # this is the reward of the static game + if collisions[self.selected_arm] == 1: + current_reward = self.arm_estimate[self.selected_arm] + elif collisions[self.selected_arm] == 0: + raise Exception("the collision is not correctly computed.") + else: + current_reward = 0 # if there is a collision + + if self.current_state[0] == STATE_CONTENT:# if content + # the current mood is content + # check the current reward first + if current_reward <= 0: + self.current_state[0] = STATE_DISCONTENT + self.current_state[1] = self.selected_arm + else: + # current_reward > 0 + if self.selected_arm == self.current_state[1]: + # If the current action is the same as the reference action, + # and utility > 0, then a content player remains content with probability 1 + pass # stay at the same state, w/ probability 1 + elif self.selected_arm != self.current_state[1]: + # set the probability + threshold = current_reward / self.max_u * (self.epsilon**(self.max_u - current_reward)) + sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold]) + + if sampled_result == 0: + self.current_state[0] = STATE_CONTENT + self.current_state[1] = self.selected_arm + +# info_logger().log_info('Player {}: action {} remains CONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold)) #debug + else: + self.current_state[0] = STATE_DISCONTENT + self.current_state[1] = self.selected_arm + +# info_logger().log_info('Player {}: action {} transit to DISCONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold))#debug + + elif self.current_state[0] == STATE_DISCONTENT: + if current_reward <= 0: + self.current_state[0] = STATE_DISCONTENT + self.current_state[1] = self.selected_arm + else: + threshold = current_reward / self.max_u * (self.epsilon**(self.max_u - current_reward)) + sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold]) + + if sampled_result == 0: + self.current_state[0] = STATE_CONTENT + self.current_state[1] = self.selected_arm + +# info_logger().log_info('Player {}: action {} transit to CONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold)) #debug + else: + self.current_state[0] = STATE_DISCONTENT + self.current_state[1] = self.selected_arm + else: + raise Exception("unexpected state.") + + # only the last few rounds are considered to count toward the optimal policy + if flag_record_frequency == True: + #update the number of visited states + id_mood = 0 if self.current_state[0] == STATE_CONTENT else 1 + id_action = self.current_state[1] + + self.nb_state_visit[id_mood][id_action] = 1 + self.nb_state_visit[id_mood][id_action] + + def exploit(self, context = None, time=None): + assert time is not None, "time is None" + assert self.learning_state == STATE_EXPLOIT, "learning state does not match at iteration {}".format(time) + +# self.selected_arm = self.get_best_policy(context) # if turning this line on, we'll compute the best policy each time + + self.selected_arm = self.best_policy + return self.selected_arm #return the action + + def get_best_policy(self, context = None): + mat_frequency = self.nb_state_visit[0,:] # over the mood axis, over CONTENT + assert np.shape(mat_frequency) == (self.nbArm,), "shape of frequency is wrong." + + id_max = np.argmax(mat_frequency) #over the remaining action/arm axis + + self.best_policy = id_max + +# info_logger().log_info("GoT - Player {}: frequency {} arm {}".format(self.playerID, mat_frequency, id_max)) #debug + + return id_max \ No newline at end of file diff --git a/Players2.py b/Players2.py new file mode 100644 index 0000000..e2fdc67 --- /dev/null +++ b/Players2.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +""" + +# This file defines the player behavior for the specific SOC-MP-MAB algorithms (SOC in MABAlgorithms2.py) +# see also Players.py for other algorithms + +__author__ = "Wenbo Wang" + +import numpy as np +from Players import Player + +if __name__ == '__main__': + print("Warning: this script 'PlayerResult2.py' is NOT executable..") # DEBUG + exit(0) + + +class SOCPlayer(Player): + + def __init__(self, param): + """ + SOCPlayer is the player for the algorithm "stable orthogonal allocation (SOC)" proposed in + "Multi-player multi-armed bandits for stable allocation in heterogeneous ad-hoc networks", IEEE JSAC oct. 2019, + Sumit J. Darak and Manjesh K. Hanawal [Sumit2019]. + + The algorithm is featured by a protocol explicitly resolving collisions with channel switching, + and the channel statistics (index) is learned based on upper confidence bound (UCB). + + Channel allocation is obtained through a master-slave allocation process. Social optimality is not guaranteed. + """ + self.nbArm = param["nbArm"] + + #for arm of a specific context-player + self.playerID = param["playerID"] + + self.flag_master = False + self.master_collision = np.zeros(2) + + self.flag_lock = False + + self.selected_arm = 0 + self.policy = -1 # set to an invalid value + + self.time = 0 + self.accumulated_value = np.zeros(self.nbArm) + self.arm_score = np.zeros(self.nbArm) # for UCB score computation + self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff + self.ranked_armIDs = np.array(list(range(0, self.nbArm))) #ranked according to UCB score + + self.flag_agree_switching = 0 # a 3-state flag, -1: not agree, 0: irrelavent, 1: agree + + def reset(self): + self.flag_master = False + self.flag_lock = False + self.flag_agree_switching = 0 + + self.selected_arm = 0 + self.policy = -1 # set to an invalid value + + self.time = 0 + + self.accumulated_value = np.zeros(self.nbArm) + self.arm_score = np.zeros(self.nbArm) # for UCB score computation + self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff + self.ranked_armIDs = np.array(list(range(0, self.nbArm))) #ranked according to UCB score + self.master_collision[:] = 0 + + # --- functionalities + def explore(self, context = None, time = None): + """ + explore() is equivalent to the algorithm "Random Hopping" in [Sumit2019], + it allows users to orthogonalize on channels through uniformly drawing action samples at random + + flag_lock has to be set after observing the collision feedback + """ + if self.flag_lock == True: + # choose the same action, do nothing + if self.policy == -1: + self.policy = self.selected_arm + else: + self.selected_arm = np.random.randint(self.nbArm) + + return self.selected_arm + + def learn_arm_value(self, context = None, arm_values = None, collisions = None): + # UCB score + if self.flag_agree_switching == -1: + # no arm is selected, this case happens when a slave node evacuates a channel + # to notify the master that it won't switch + pass + elif collisions[self.selected_arm] == 1: + # no collisions + self.flag_lock = True + + self.time = self.time + 1 # only increment when a good sample is obtained + + armID = self.selected_arm + self.nb_observation[armID] = self.nb_observation[armID] + 1 + self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID] + + # update UCB Scores + self.arm_score = self.accumulated_value / (self.nb_observation+1e-9) + np.sqrt(2*self.time / (self.nb_observation+1e-9)) + # get the preference + self.ranked_armIDs = np.argsort(-self.arm_score) + + def exploit(self, context=None, time=None): + # SOC doesn't have a clear phase of exploitation, the players uses a collision avoidance-like + # protocol to explicitly allocate the channels among players + assert self.policy != -1, "policy is not obtained" + + self.selected_arm = self.policy + return self.selected_arm + + def set_master(self, MB_id): + assert self.flag_lock == True, "the channel is not locked yet" + + # check if the MB_id is currently self.selected_arm + if self.policy == MB_id: +# print("set_master(): master node ID {} at MB {}".format(self.playerID, MB_id)) # debugging + self.flag_master = True + # reset the recorder + self.master_collision[:] = 0 + else: +# print("set_master: slave node ID {} at MB {}".format( self.playerID, MB_id)) # debugging + self.flag_master = False + + return self.flag_master + + def set_master_action(self, SB_id): + """ + set the action of the master node (as the channel indicated by the current block ID) + """ + assert self.flag_lock == True, "the channel is not locked yet" + assert self.flag_master == True, "not a master node" + assert self.policy == self.selected_arm, "action not aligned to policy" + + # get the ranked_arms without self.selected_arm + tmp_arm_rank = np.ndarray.tolist(self.ranked_armIDs) + + # see footnote 1 of [Sumit2019] + current_arm_rank = tmp_arm_rank.index(self.selected_arm) + tmp_arm_rank.pop(current_arm_rank) + + if SB_id - 1 < current_arm_rank: +# print("Master ID-{}: av-{:.2} ---> av-{:.2}".format(self.playerID, self.arm_score[self.selected_arm], +# self.arm_score[tmp_arm_rank[SB_id - 1]])) # debugging + master_arm_choice = tmp_arm_rank[SB_id-1] + else: + master_arm_choice = self.selected_arm + + # set policy to the currently reserved channel, signal over the new channel + self.policy = self.selected_arm + self.selected_arm = master_arm_choice + + return self.selected_arm, self.policy # new, old (MB) + + def decide_switching(self, subslot_id, target_arm=None): + # has to be called by a slave + assert self.flag_lock == True, "the channel is not locked yet" + assert self.flag_master == False, "not a slave node." + assert self.policy != -1, "policy is not set" + + if subslot_id == 0: + # it is in a channel transmit (CT) sub-slot + assert target_arm is not None, "master arm choice not set" + + if target_arm != self.selected_arm: + # not requested and do nothing + self.flag_agree_switching = 0 # not requested + +# print("Slave ID-{}: not requested {} ---> {}".format(self.playerID, self.selected_arm, target_arm)) # debugging + else: + arm_rank_list = np.ndarray.tolist(self.ranked_armIDs) + current_arm_rank = arm_rank_list.index(self.selected_arm) + requested_arm_rank = arm_rank_list.index(target_arm) + +# print("Slave ID-{}: av-{:.2} ---> av-{:.2}".format(self.playerID, self.arm_score[self.selected_arm], +# self.arm_score[target_arm])) # debugging + + if requested_arm_rank < current_arm_rank: + # if master_arm_choice has a higher score, switch + self.flag_agree_switching = 1 # agreed + + self.selected_arm = self.policy # choose the currently preferred arm + self.policy = target_arm # update policy + +# print("UE-{} agrees: CH-{} to CH-{} w/ scores: {} to {}".format(self.playerID, +# self.selected_arm, target_arm, arm_rank_list[current_arm_rank], arm_rank_list[requested_arm_rank])) # debugging + else: + # if master_arm_choice is worse than the current arm, refuse switching + self.flag_agree_switching = -1 # refused + # no change to policy + self.selected_arm = self.policy + else: + # it is in a channel switch (CS) sub-slot + if self.flag_agree_switching == -1: + # refuse swithcing, leave the channel for one slot + self.selected_arm = -1 + else: + # if self.flag_agree_switching == 1: # agree to switch, stay on the channel to collide + # if self.flag_agree_switching == 0: # not requested to switch, stay on the channel + # transmit on the same channel or not affected + pass + + return self.selected_arm + + def update_policy(self, subslot_id, collisions): + # the original paper does not specify when to stop updating the arm-value estimation + # so we aussme that it never stops + assert self.flag_lock == True, "the channel is not locked yet" + + # update actions + if subslot_id == 0: + # only update the master in CS slot + if self.flag_master == True: + self.master_collision[0] = collisions[self.selected_arm] + else: + pass + elif subslot_id == 1: + if self.flag_master == True: + self.master_collision[1] = collisions[self.selected_arm] + + #update policy and action, according to Fig.2 [Sumit2019] + if self.master_collision[0] > 1 and self.master_collision[1] > 1: # senario 1 (colliding twice): + # switching allowed + self.policy = self.selected_arm + +# print("Master ID-{}: policy updated w/ switching".format(self.playerID)) # debugging + elif self.master_collision[0] == 1 and self.master_collision[1] == 1: # senario 3 (no collision, twice) + self.policy = self.selected_arm + +# print("Master ID-{}: policy updated for vacant channel".format(self.playerID)) # debugging + else: + # roll back +# print("Master ID-{}: policy rolled back".format(self.playerID)) # debugging + pass + + # reset the recorder + self.master_collision[:] = 0 + else: + # reset flag to "not requested" + self.flag_agree_switching = 0 + else: + raise Exception("invalid sub-slot ID.") + + self.selected_arm = self.policy \ No newline at end of file diff --git a/envutils.py b/envutils.py new file mode 100644 index 0000000..1d97069 --- /dev/null +++ b/envutils.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +""" + +# This file defines the class Struct used in simu_config.py, +# and the automation method for arm parameter generation + +__author__ = "Wenbo Wang" + +import numpy as np + +if __name__ == '__main__': + print("Warning: this script 'envutils.py' is NOT executable..") # DEBUG + exit(0) + + +class Struct(object): + """ + Simple class for instantiating objects to add arbitrary attributes as variables. + Used for serializing configurations parameters. + Reference: + https://stackoverflow.com/questions/6198372/most-pythonic-way-to-provide-global-configuration-variables-in-config-py/43941592 + """ + def __init__(self, *args): + self.__header__ = str(args[0]) if args else None + + def __repr__(self): + if self.__header__ is None: + return super(Struct, self).__repr__() + return self.__header__ + + def next(self): + """ Fake iteration functionality. + """ + raise StopIteration + + def __iter__(self): + """ Fake iteration functionality. + We skip magic attribues and Structs, and return the rest. + """ + ks = self.__dict__.keys() + for k in ks: + if not k.startswith('__') and not isinstance(k, Struct): + yield getattr(self, k) + + def __len__(self): + """ Don't count magic attributes or Structs. + """ + ks = self.__dict__.keys() + return len([k for k in ks if not k.startswith('__')\ + and not isinstance(k, Struct)]) + + +def uniform_means(nbContext=2, nbPlayers=2, nbArms=4, delta=0.05, lower=0., upper=1.): + """ + Return a dictionary of lower and upper bounds of arm values, + well spaced (needed for some algorithms that requires arm-values to be distrigushed) for uniform distribution: + + - in [lower, upper], + - starting from lower + (upper-lower) * delta, up to lower + (upper-lower) * (1 - delta), + - and there is nbArms arms. + + >>> np.array(uniformMeans(2, 0.1)) + array([0.1, 0.9]) + >>> np.array(uniformMeans(3, 0.1)) + array([0.1, 0.5, 0.9]) + >>> np.array(uniformMeans(9, 1 / (1. + 9))) + array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) + """ + assert nbPlayers >= 1, "Error: 'nbPlayers' = {} has to be >= 1.".format(nbPlayers) # DEBUG + assert nbArms >= 1, "Error: 'nbArms' = {} has to be >= 1.".format(nbArms) # DEBUG + assert nbArms >= nbPlayers, "Error: 'nbArms' has to be larger than 'nbPlayers'." + assert upper - lower > 0, "Error: 'upper - lower' = {:.3g} has to be > 0.".format(upper - lower) # DEBUG + assert 0. < delta < 1., "Error: 'delta' = {:.3g} has to be in (0, 1).".format(delta) # DEBUG + mus = lower + (upper-lower) * np.linspace(delta, 1 - delta, nbArms) + + means = []; + for idPlayer in range(nbPlayers): + np.random.shuffle(mus) + means.append(mus) + return means + + +def randomMeans(nbPlayers=2, nbArms=4, mingap=None, lower=0., upper=1.): + """Return a list of means of arms, randomly sampled uniformly in [lower, lower + amplitude], with a min gap >= mingap. + + - All means will be different, except if ``mingap=None``, with a min gap > 0. + + """ + assert nbArms >= 1, "Error: 'nbArms' = {} has to be >= 1.".format(nbArms) # DEBUG + assert upper - lower > 0, "Error: 'upper - lower' = {:.3g} has to be > 0.".format(upper - lower) # DEBUG + mus = np.random.rand(nbArms) + if mingap is not None and mingap > 0: + assert (nbArms * mingap) < (upper - lower / 2.), "Error: 'mingap' = {:.3g} is too large, it might be impossible to find a vector of means with such a large gap for {} arms.".format(mingap, nbArms) # DEBUG + + means = [] + for idPlayer in range(nbPlayers): + while np.min(np.abs(np.diff(mus))) <= mingap: # Ensure a min gap > mingap + mus = np.random.rand(nbArms) + + mus = lower + (upper - lower) * mus + means.append(mus) + + return means \ No newline at end of file diff --git a/loggingutils.py b/loggingutils.py new file mode 100644 index 0000000..88403c5 --- /dev/null +++ b/loggingutils.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this +code for research that results in publications, please cite our original +article listed above. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +""" + +""" +This file implement the logging module as the wrapper of the standard logging API provided by python. +Please use the format, e.g., "info_logger().log_info("...")" to record the information of interest in a log file +stored in the path "$PWD/results" +""" + +__author__ = "Wenbo Wang" + +import logging +import os +import functools + +from datetime import datetime + + +def __singleton(class_): + """ + Make a singleton class with only one single instance. + Note that it cannot prevent instantiation in multiple processes + """ + @functools.wraps(class_) + def wrapper_singleton(*args, **kwargs): + if wrapper_singleton.instance is None: +# print("wrapper_singleton.instance") + wrapper_singleton.instance = class_(*args, **kwargs) + + return wrapper_singleton.instance + + wrapper_singleton.instance = None + + return wrapper_singleton + +@__singleton +class info_logger(object): + def __init__(self): + log_file_name = 'log' + # the logging module may be used by different process in the parallel mode + # for each process we create a single log file + process_id = os.getpid() + + now = datetime.now() + current_date = now.strftime("(%Y-%m-%d-%H-%M-%S)") + cwd = os.getcwd() # current directory + logFilePath = "{}\{}\{}-{}-{}.log".format(cwd, "results", log_file_name, process_id, current_date) + + # get the instance of logger + self.logger = logging.getLogger(log_file_name) + self.logger.setLevel(logging.DEBUG) + + #define the output format + logging_format = logging.Formatter("[%(threadName)s, %(levelname)s] %(message)s") +# logging_format = logging.Formatter('%(name)s %(asctime)s %(levelname)-8s:%(message)s') + + # file handler + file_handler = logging.FileHandler(logFilePath, mode='w') + file_handler.setFormatter(logging_format) + file_handler.setLevel(logging.DEBUG) + + self.logger.addHandler(file_handler) + + print("logger created @ {}".format(logFilePath)) + self.log_info("logger created") + + # for different levels of messages, we can also call the logger member directly + def log_info(self, msg): + self.logger.info(msg) + + def log_debug(self, msg): + self.logger.debug(msg) + + def log_error(self, msg): + self.logger.error(msg) + +if __name__ == '__main__': + print("Warning: this script 'loggingutils.py' is NOT executable..") # DEBUG + exit(0) +else: + # turn it on then we create one log file for each process before it is really needed +# fileLogger = info_logger() + pass \ No newline at end of file diff --git a/main_MPMAB.py b/main_MPMAB.py new file mode 100644 index 0000000..dd5ef3c --- /dev/null +++ b/main_MPMAB.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. +""" + +# Used for the simulations in the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed +# Bandwidth as a Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al. +# This file is the main entrance of all the simulations except that for those w.r.t. network sizes. + + +__author__ = "Wenbo Wang" + +import numpy as np +import pandas as pd + +import time +import datetime +import argparse + +from GameEvaluator import AlgEvaluator +from plotutils import plot_data_frame, plot_repeated_simu_results + +import simu_config as CONFIG + +def simulation_execution(game_config): + """ + simulation_execution() is the main body of the MP-MAP algorithm simulations + """ + print("MAB game with configuration '{}' starts to play...".format(game_config.__repr__())) + + game_horizon = game_config.game_horizon + alg_engine = AlgEvaluator(game_config.env_config) + + #add algorithms + for alg_id in range(len(game_config.alg_types)): + alg_engine.add_algorithm(algo_type=game_config.alg_types[alg_id], + custome_params=game_config.alg_configs[alg_id]) + + print("MAB game prepares the environment for arm type '{}' of {} rounds".format(game_config.env_config['env_type'], game_horizon)) + alg_engine.prepare_arm_samples() + + # simulation 1: reward plotting to compare the efficiency of the algorithms + if "enable_efficiency_simulation" in game_config.__dict__ and game_config.enable_efficiency_simulation: + start_time_oneshot = time.time() + + ####################################################################### + # + if game_config.flag_parallel != True: +# print("starting single-process simulation...") + alg_engine.play_game(flag_progress_bar=game_config.flag_progress_bar) + else: +# print("starting parallel simulation...") + alg_engine.play_game_parallel(flag_progress_bar=game_config.flag_progress_bar) + # + ####################################################################### + + alg_engine.plot_rewards(save_fig = game_config.flag_save_figure, save_data = game_config.save_data) + + # printing + running_time = time.time() - start_time_oneshot + print("Single-shot simulation completes in {} for {} iterations.".format( \ + datetime.timedelta(seconds=running_time), game_horizon)) + + # simulation 2/3/4: plotting regret or total rewards over horizon + if ("enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation) or \ + ("enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation) or \ + ("enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation): + start = game_config.T_start + nb_point = game_config.T_step + + horizon_list = np.exp(np.linspace(np.log(start), np.log(game_horizon), nb_point)) + simu_rounds = game_config.T_simu_rounds + + start_time_repeated = time.time() + + ####################################################################### + # + if game_config.flag_parallel != True: +# print("starting single-process simulation...") + simulation_results = alg_engine.play_repeated_game(horizon_list, simulation_rounds=simu_rounds, + flag_progress_bar=game_config.flag_progress_bar) + else: +# print("starting parallel simulation...") + simulation_results = alg_engine.play_repeated_game_parallel(horizon_list, simulation_rounds=simu_rounds, + flag_progress_bar=game_config.flag_progress_bar) + # + ####################################################################### + + # printing + running_time = time.time() - start_time_repeated + print("Repeated simulation completes in {} with maximum horizon {} in {} rounds of plays...".format(\ + datetime.timedelta(seconds=running_time), game_horizon, simu_rounds)) + + # virtualization for simulation 2 + if "enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation: + # locate the reference algorithm + optimal_alg_id = 0 + + len_horizon = simulation_results['horizon'].shape[1] + time_series = np.empty((0, len_horizon)) + alg_indicator_series = [] + + avg_regret_series = np.empty((0, len_horizon)) + for alg_id in range(len(simulation_results['algorithm_name'])): + if alg_id != optimal_alg_id: + # the returned value simulation_results['reward_series'] is organized as an array: + # (len(algorithm_ids), simulation_rounds*len(horizon_list)) + horizon_series = simulation_results['horizon'][alg_id,:] + avg_regret = (simulation_results['reward_series'][optimal_alg_id,:] - + simulation_results['reward_series'][alg_id,:]) / horizon_series + + avg_regret_series = np.append(avg_regret_series, avg_regret) # flatten + time_series = np.append(time_series, horizon_series) + + alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series)) + + prepared_results = {} + prepared_results['Average regret'] = avg_regret_series + prepared_results['Total number of plays'] = time_series + prepared_results['Algorithms'] = alg_indicator_series + + simu_data_frame = pd.DataFrame(prepared_results) + + # plot and save the figure + file_name = "monte_carlo_regret" if game_config.flag_save_figure==True else None + sns_figure_unused, repeated_play_data_name = plot_data_frame(simu_data_frame, + xlabel="Total number of plays", ylabel="Average regret", huelabel='Algorithms', + save_file_name=file_name, save_data_name=game_config.repeated_play_data_name) + + # post processing, add the theoretical bound to the figure + flag_bound = False + if hasattr(game_config, 'flag_regret_bound'): + flag_bound = game_config.flag_regret_bound + else: + flag_bound = False + + plot_repeated_simu_results(start=start, horzion=game_horizon, nbPoints=nb_point, flag_bound=flag_bound, + data_file_name=repeated_play_data_name) + + # virtualization for simulation 3 + if "enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation: + len_horizon = simulation_results['horizon'].shape[1] + time_series = np.empty((0, len_horizon)) + alg_indicator_series = [] + + reward_series = np.array([]) + for alg_id in range(len(simulation_results['algorithm_name'])): + horizon_series = simulation_results['horizon'][alg_id,:] + avg_rewards = simulation_results['reward_series'][alg_id, :] / horizon_series + + reward_series = np.append(reward_series, avg_rewards) # flatten + time_series = np.append(time_series, horizon_series) + alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series)) + + prepared_results = {} + prepared_results['Average sum of rewards'] = reward_series + prepared_results['Total number of plays'] = time_series + prepared_results['Algorithms'] = alg_indicator_series + + simu_data_frame = pd.DataFrame(prepared_results) + + #plot and save the figure + file_name = "monte_carlo_rewards" if game_config.flag_save_figure==True else None + plot_data_frame(simu_data_frame, + xlabel="Total number of plays", ylabel="Average sum of rewards", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=game_config.repeated_play_data_name) + + # virtualization for simulation 4 + if "enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation: + len_horizon = simulation_results['horizon'].shape[1] + time_series = np.empty((0, len_horizon)) + alg_indicator_series = [] + + switching_series = np.array([]) + collision_series = np.array([]) + + for alg_id in range(len(simulation_results['algorithm_name'])): + horizon_series = simulation_results['horizon'][alg_id,:] + switching = simulation_results['switching_count_series'][alg_id, :] + collisions = simulation_results['collision_series'][alg_id, :] + + switching_series = np.append(switching_series, switching) # flatten + collision_series = np.append(collision_series, collisions) # flatten + + time_series = np.append(time_series, horizon_series) + alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series)) + + prepared_results = {} + prepared_results['Accumulated switching counts'] = switching_series + prepared_results['Accumulated collision counts'] = collision_series + prepared_results['Total number of plays'] = time_series + prepared_results['Algorithms'] = alg_indicator_series + + assert len(switching_series) == len(collision_series), "switching array must be of the same length: {}, {}".format( + len(switching_series), len(collision_series)) + + simu_data_frame = pd.DataFrame(prepared_results) + + #plot and save the figure: 1 + file_name = "monte_carlo_switching" if game_config.flag_save_figure==True else None + plot_data_frame(simu_data_frame, + xlabel="Total number of plays", ylabel="Accumulated switching counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=game_config.repeated_play_data_name) + + #plot and save the figure: 2 + file_name = "monte_carlo_collision" if game_config.flag_save_figure==True else None + plot_data_frame(simu_data_frame, + xlabel="Total number of plays", ylabel="Accumulated collision counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=game_config.repeated_play_data_name) + + +if __name__ == '__main__': + """ + Parallel processing is suggested to be turned on for repeated simulations (see simu_config.py) + It is approximately 2X to 4X faster in terms of the total time than the single-process simulation + """ + arg_parser = argparse.ArgumentParser(description='Select a configuration set in \'simu_config.py\' to run the simulations') + # Add the arguments + arg_parser.add_argument('-id', metavar='ID', type=int, + help='Choose the configuration ID between [1-13], see the summary of simu_config.py') + args = arg_parser.parse_args() + + if args.id is None: + # default choice of configuration for a simulation + game_config = CONFIG.ENV_SCENARIO_7 # + else: + if args.id in CONFIG.CONFIGURATION_DICT.keys(): + game_config = CONFIG.CONFIGURATION_DICT[args.id] + else: + raise Exception('the input configuration ID is not valid') + + # beginning of the game + start_time = time.time()# record the starting time of the simulation, start simulations + + simulation_execution(game_config) + + #end of the game + running_time = time.time() - start_time + print("Simulation completes in {}.".format(datetime.timedelta(seconds=running_time))) diff --git a/main_MPMAB_IoT_Simu.py b/main_MPMAB_IoT_Simu.py new file mode 100644 index 0000000..74513c2 --- /dev/null +++ b/main_MPMAB_IoT_Simu.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. +""" + +# Used for the simulations in the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed +# Bandwidth as a Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al. +# This file is the main entrance of the simulations regarding the network performance vs. network scale. + +__author__ = "Wenbo Wang" + +# This file is the main entrance of the simulations for algorithm performance w.r.t. network sizes. + +__author__ = "Wenbo Wang" + +import numpy as np +import pandas as pd + +import time +import datetime +import sys +#import argparse + +from GameEvaluator import AlgEvaluator +from plotutils import plot_data_frame + +from envutils import Struct as Section + +def simulation_execution(alg_engine, game_config, player_number, game_horizon, simu_rounds, flag_parallel=False): + """ + simulation_execution() is the main body of the MP-MAP algorithm simulations + """ + +# print("number of arms: {}, number of players: {}".format(alg_engine.nbArms, alg_engine.nbPlayers)) + + #add algorithms + for alg_id in range(len(game_config.alg_types)): + alg_engine.add_algorithm(algo_type=game_config.alg_types[alg_id], + custome_params=game_config.alg_configs[alg_id]) + + if flag_parallel == True: + simulation_results = alg_engine.play_repeated_game_parallel([game_horizon], simulation_rounds=simu_rounds, + flag_progress_bar=True) + else: + # for large network, we use seuqnecial processing in order to avoid overwhelming the memory + simulation_results = alg_engine.play_repeated_game([game_horizon], simulation_rounds=simu_rounds, + flag_progress_bar=True) + + + network_size_indicator_series = [] + alg_indicator_series = [] + reward_series = np.array([]) + switching_series = np.array([]) + collision_series = np.array([]) + +# print("size of simulation results, rewards: {}".format(np.shape(simulation_results['reward_series']))) +# print("length of simulation_results: {}".format(len(simulation_results['algorithm_name']))) + + for alg_id in range(len(simulation_results['algorithm_name'])): + avg_rewards = simulation_results['reward_series'][alg_id, :] / game_horizon + switching = simulation_results['switching_count_series'][alg_id, :] + collisions = simulation_results['collision_series'][alg_id, :] + + network_sizes = np.zeros(avg_rewards.shape) + network_sizes[:] = player_number + + reward_series = np.append(reward_series, avg_rewards) # flatten + switching_series = np.append(switching_series, switching) # flatten + collision_series = np.append(collision_series, collisions) # flatten + alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * simu_rounds) + network_size_indicator_series.extend(network_sizes) + + + prepared_results = {} + prepared_results['Sum of rewards'] = reward_series + prepared_results['Node Number'] = network_size_indicator_series + prepared_results['Accumulated switching counts'] = switching_series + prepared_results['Accumulated collision counts'] = collision_series + prepared_results['Algorithms'] = alg_indicator_series + +# print("length: {}, {}, {}, {}, {}".format(len(reward_series), len(network_size_indicator_series), +# len(switching_series), len(collision_series), len(alg_indicator_series))) + + simu_data_frame = pd.DataFrame(prepared_results) + + + return simu_data_frame + + +def simulation_plot_results(input_data_frame): + #plot and save the figure: 1 + file_name = "network_switching" + plot_data_frame(input_data_frame, + xlabel="Node Number", ylabel="Accumulated switching counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=None) + + #plot and save the figure: 2 + file_name = "network_collision" + plot_data_frame(input_data_frame, + xlabel="Node Number", ylabel="Accumulated collision counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=None) + + file_name = "network_rewards" + plot_data_frame(input_data_frame, + xlabel="Node Number", ylabel="Sum of rewards", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=file_name, save_data_name=None) + +if __name__ == '__main__': + """ + Parallel processing is turned off by default. + Unless the machine memory is sufficiently large, we may have a risk of running out of memory + for a large network scale. + """ + yes = {'yes','y', 'ye', 'Y'} + no = {'no','n', 'N'} + + print("This simulation takes more than 10 hrs. \nDo you want to continue? [y/n]") + while True: + input_choice = input().lower() + if input_choice in yes: + break + elif input_choice in no: + print('execution is terminated.') + sys.exit() + else: + print("Please respond with 'yes' or 'no'") + + game_horizon = 400000 + simu_rounds = 40 + + max_player_no = 30 # the more nodes we have, the longer horizon we need for find a social-optimal allocation. + + player_numbers = np.linspace(5, max_player_no, 6) #Example: [max_player_no, 25, 20, 15, 10, 5] + max_arm_number = max_player_no + 1 # to save some memory + + env_config = {'horizon': game_horizon, + 'arm number': max_arm_number, + 'player number': max_player_no, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'HetNet simulator', # change the underlying distribution here + 'enabel mmWave': True, + 'cell range': 250, + 'context_prob': {'context 1': 2, 'context 2': 1, 'context 3': 1}, + 'los_prob': {'context 1': 1.5, 'context 2': 2, 'context 3': 1} + } + + # generate the arm-value sequence for only once + alg_engine = AlgEvaluator(env_config) + alg_engine.prepare_arm_samples() + + game_config = Section("Simulation of HetNet: reward evolution for 4 algorithms") + game_config.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Thrones'] #, + + # beginning of the game + start_time = time.time()# record the starting time of the simulation, start simulations + data_frame = [] + for player_no in player_numbers: + num_players = int(player_no) + + # be sure that the value of the two constant variables satisfiy the condition in Theorem 2 of [Wang2020] + alpha11 = -0.40/num_players + alpha12 = 0.45/num_players + + game_config.alg_configs = [None, + {"delta": 0.02, "exploration_time": 4000}, + {"c1": 2000, "c2": 10000,"c3":3000, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": alpha11, "alpha12": alpha12, "alpha21": -0.39, "alpha22": 0.4,}, + {"c1": 2000, "c2": 10000,"c3":3000, "epsilon": 0.01, "delta": 1.5}, + ] + + #set the arm number to be used in the simulation + alg_engine.reset_player_number(num_players) + alg_engine.reset_arm_number(num_players + 1) + alg_engine.clear_algorithms() + + if player_no >= 10: + temp_simu_data_frame = simulation_execution(alg_engine, game_config, num_players, game_horizon, simu_rounds) + else: + # There is always a risk of overwhelming the memory capacity with parallel processing, especially when num_players > 15 + # Set the last parameter to True to enable parallel processing + temp_simu_data_frame = simulation_execution(alg_engine, game_config, num_players, game_horizon, simu_rounds, False) + + data_frame.append(temp_simu_data_frame) + + #end of the numerical simulation + input_data = pd.concat(data_frame) + running_time = time.time() - start_time + print("Simulation completes in {}.".format(datetime.timedelta(seconds=running_time))) + + #plotting figures + simulation_plot_results(input_data) \ No newline at end of file diff --git a/obsolete/UniformArm.py b/obsolete/UniformArm.py new file mode 100644 index 0000000..cd370f3 --- /dev/null +++ b/obsolete/UniformArm.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 27 14:15:17 2019 + +Partially inspired by the project SMPyBandits. This file defines the running framework of the bandit simulation. +""" + +""" +Uniformly distributed arm in [0, 1], or [lower, upper]_context, for each context. + +Example of creating an arm: + +>>> import random; import numpy as np +>>> random.seed(0); np.random.seed(0) +>>> Unif01 = UniformArm(0, 1) +>>> Unif01 +U(0, 1) +>>> Unif01.mean +0.5 + +Examples of sampling from an arm: + +>>> Unif01.draw() # doctest: +ELLIPSIS +0.8444... +>>> Unif01.draw_nparray(20) # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE +array([0.54... , 0.71..., 0.60..., 0.54..., 0.42... , + 0.64..., 0.43..., 0.89... , 0.96..., 0.38..., + 0.79..., 0.52..., 0.56..., 0.92..., 0.07..., + 0.08... , 0.02... , 0.83..., 0.77..., 0.87...]) +""" +from __future__ import division, print_function # Python 2 compatibility + +__author__ = "Wenbo Wang" +__version__ = "0.6" + +from random import random +from numpy.random import random as nprandom + +# Local imports +try: + from .Arm import Arm +except ImportError: + from Arm import Arm + + +class UniformArm(Arm): + """ Uniformly distributed arm, default in [0, 1], + + - default to (mini, maxi), + - or [lower, lower + amplitude], if (lower=lower, amplitude=amplitude) is given. + + >>> arm_0_1 = UniformArm() + >>> arm_0_10 = UniformArm(0, 10) # maxi = 10 + >>> arm_2_4 = UniformArm(2, 4) + >>> arm_m10_10 = UniformArm(-10, 10) # also UniformArm(lower=-10, amplitude=20) + """ + + def __init__(self, lower=0., upper=1., context_set): + """New arm.""" + self.lower = lower #: Lower value of rewards, corresponding to array of states + self.upper = upper #: Upper value of rewards + self.amplitude = upper - lower #: Amplitude of value of rewards + self.context_set = context_set + + self.amplitude = upper - lower #: Amplitude of rewards + self.mean = (self.lower + self.upper) / 2.0 #: Mean for this UniformArm arm + + # --- Random samples + + def draw(self, t=None): + """ Draw one random sample. The parameter t is ignored in this Arm.""" + shape = (1, len(self.context_set)) + return self.lower + (nprandom(shape) * self.amplitude) + + # --- Printing + + def __str__(self): + return "UniformArm" + + def __repr__(self): + return "U({:.3g}, {:.3g})".format(self.lower, self.upper) + + # --- Lower bound + + @staticmethod + def kl(x, y): + """ The kl(x, y) to use for this arm.""" + return klBern(x, y) + + @staticmethod + def oneLR(mumax, mu): + """ One term of the Lai & Robbins lower bound for UniformArm arms: (mumax - mu) / KL(mu, mumax). """ + return (mumax - mu) / klBern(mu, mumax) + + +__all__ = ["UniformArm"] + + +# --- Debugging + +if __name__ == "__main__": + # Code for debugging purposes. + from doctest import testmod + print("\nTesting automatically all the docstring written in each functions of this module :") + testmod(verbose=True) \ No newline at end of file diff --git a/obsolete/collision_models.py b/obsolete/collision_models.py new file mode 100644 index 0000000..c0867b5 --- /dev/null +++ b/obsolete/collision_models.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 26 20:31:54 2019 + +@author: wenbo2017 +""" + +""" +Partially inspired by the project SMPyBandits. This file defines the running framework of the bandit simulation. +This file defines the reward generation and collision resolution method "collision_models". +to be extended to other types of collisions, currently only the non-colliding player is rewarded with non-zero value +""" + +__author__ = "Wenbo Wang" + +import numpy as np + +def onlyRewardNoCollision(t, arms, players, choices, pulls, collisions): + """ Simple collision model where only the players alone on one arm samples it and receives the reward. + + - The numpy array 'choices' is the choices of players choosing arms + - Collision should be rewarded 0 + """ + + nb_collisions = np.bincount(choices, minlength=len(arms)) + + for i, player in enumerate(players): # Loop over the player set + # pulls counts the number of selection, not the number of successful selection. + pulls[i, choices[i]] += 1 + if nb_collisions[choices[i]] <= 1: # No collision + player.getReward(choices[i]) # Observing reward + else: + collisions[choices[i]] += 1 # Should be counted here, onlyUniqUserGetsReward + # handleCollision_or_getZeroReward(player, choices[i]) # NOPE + player.getCollisionReward(choices[i]) + +# Default collision model to use +defaultCollisionModel = onlyRewardNoCollision + + +#: List of possible collision models +collision_models = [ + onlyRewardNoCollision, +] \ No newline at end of file diff --git a/obsolete/plotsettings.py b/obsolete/plotsettings.py new file mode 100644 index 0000000..1bfa6fb --- /dev/null +++ b/obsolete/plotsettings.py @@ -0,0 +1,458 @@ +# -*- coding: utf-8 -*- +""" plotsettings: use it like this, in the Environment folder: + +>>> import sys; sys.path.insert(0, '..') +>>> from .plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, wraptext, wraplatex, legend, show_and_save, nrows_ncols +""" +from __future__ import division, print_function # Python 2 compatibility + +__author__ = "Lilian Besson" +__version__ = "0.9" + +from textwrap import wrap +import os.path + +import matplotlib as mpl +# mpl.use('Agg') # XXX is it a good idea? Nope, use "export MPLBACKEND='Agg'" in your bashrc ... Cf. http://stackoverflow.com/a/4935945/ and http://matplotlib.org/faq/usage_faq.html#what-is-a-backend +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick + +import numpy as np +import seaborn as sns + +# Customize here if you want a signature on the titles or xlabel, of each plot +from datetime import datetime +import locale # See this bug, http://numba.pydata.org/numba-doc/dev/user/faq.html#llvm-locale-bug +locale.setlocale(locale.LC_TIME, 'C') +monthyear = "{:%b.%Y}".format(datetime.today()).title() #: Month.Year date + +from os import getenv + +# Backup figure objects +from pickle import dump as pickle_dump + +if getenv('DEBUG', 'False') == 'True': + signature = "\n(By Lilian Besson, {}, cf. SMPyBandits.GitHub.io - MIT Licensed)".format(monthyear) #: A small string to use as a signature +else: + signature = "" + +DPI = 120 #: DPI to use for the figures +# FIGSIZE = (19.80, 10.80) #: Figure size, in inches! +FIGSIZE = (16, 9) #: Figure size, in inches! +# FIGSIZE = (12.4, 7) #: Figure size, in inches! +# FIGSIZE = (8, 6) #: Figure size, in inches! +# FIGSIZE = (8, 4.5) #: Figure size, in inches! + +# Customize the colormap +HLS = True #: Use the HLS mapping, or HUSL mapping +VIRIDIS = False #: Use the Viridis colormap + +# Bbox in inches. Only the given portion of the figure is saved. If 'tight', try to figure out the tight bbox of the figure. +BBOX_INCHES = "tight" #: Use this parameter for bbox +BBOX_INCHES = None + +if __name__ != '__main__': + # use a clever color palette, eg http://seaborn.pydata.org/api.html#color-palettes + sns.set(context="talk", style="whitegrid", palette="hls" if HLS else "husl", font="sans-serif", font_scale=0.95) + + # Use tex by default http://matplotlib.org/2.0.0/users/dflt_style_changes.html#math-text + # mpl.rcParams['text.usetex'] = True # XXX force use of LaTeX + mpl.rcParams['font.family'] = "sans-serif" + mpl.rcParams['font.sans-serif'] = "DejaVu Sans" + mpl.rcParams['mathtext.fontset'] = "cm" + mpl.rcParams['mathtext.rm'] = "serif" + + # Configure size for axes and x and y labels + # Cf. https://stackoverflow.com/a/12444777/ + mpl.rcParams['axes.labelsize'] = "small" + mpl.rcParams['xtick.labelsize'] = "x-small" + mpl.rcParams['ytick.labelsize'] = "x-small" + mpl.rcParams['figure.titlesize'] = "small" + + # Configure the DPI of all images, once and for all! + mpl.rcParams['figure.dpi'] = DPI + # print(" - Setting dpi of all figures to", DPI, "...") # DEBUG + + # Configure figure size, even of if saved directly and not displayed, use HD screen + # cf. https://en.wikipedia.org/wiki/Computer_display_standard + mpl.rcParams['figure.figsize'] = FIGSIZE + # print(" - Setting 'figsize' of all figures to", FIGSIZE, "...") # DEBUG + + # XXX Set up a discrete version of the Viridis map for axes.prop_cycle + + +def palette(nb, hls=HLS, viridis=VIRIDIS): + """ Use a smart palette from seaborn, for nb different plots on the same figure. + + - Ref: http://seaborn.pydata.org/generated/seaborn.hls_palette.html#seaborn.hls_palette + + >>> palette(10, hls=True) # doctest: +ELLIPSIS + [(0.86..., 0.37..., 0.33...), (0.86...,.65..., 0.33...), (0.78..., 0.86...,.33...), (0.49..., 0.86...,.33...), (0.33..., 0.86...,.46...), (0.33..., 0.86...,.74...), (0.33..., 0.68..., 0.86...) (0.33..., 0.40..., 0.86...) (0.56..., 0.33..., 0.86...) (0.84..., 0.33..., 0.86...)] + >>> palette(10, hls=False) # doctest: +ELLIPSIS + [[0.96..., 0.44..., 0.53...], [0.88..., 0.52..., 0.19...], [0.71..., 0.60..., 0.19...], [0.54..., 0.65..., 0.19...], [0.19..., 0.69..., 0.34...], [0.20..., 0.68..., 0.58...],[0.21..., 0.67..., 0.69...], [0.22..., 0.65..., 0.84...], [0.55..., 0.57..., 0.95...], [0.85..., 0.44..., 0.95...]] + >>> palette(10, viridis=True) # doctest: +ELLIPSIS + [(0.28..., 0.13..., 0.44...), (0.26..., 0.24..., 0.52...), (0.22..., 0.34..., 0.54...), (0.17..., 0.43..., 0.55...), (0.14..., 0.52..., 0.55...), (0.11..., 0.60..., 0.54...), (0.16..., 0.69..., 0.49...), (0.31..., 0.77..., 0.41...), (0.52..., 0.83..., 0.28...), (0.76..., 0.87..., 0.13...)] + + - To visualize: + + >>> sns.palplot(palette(10, hls=True)) # doctest: +SKIP + >>> sns.palplot(palette(10, hls=False)) # use HUSL by default # doctest: +SKIP + >>> sns.palplot(palette(10, viridis=True)) # doctest: +SKIP + """ + if viridis: + return sns.color_palette('viridis', nb) + else: + return sns.hls_palette(nb + 1)[:nb] if hls else sns.husl_palette(nb + 1)[:nb] + + +def makemarkers(nb): + """ Give a list of cycling markers. See http://matplotlib.org/api/markers_api.html + + .. note:: This what I consider the *optimal* sequence of markers, they are clearly differentiable one from another and all are pretty. + + Examples: + + >>> makemarkers(7) + ['o', 'D', 'v', 'p', '<', 's', '^'] + >>> makemarkers(12) + ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>', 'o', 'D'] + """ + allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>'] + longlist = allmarkers * (1 + int(nb / float(len(allmarkers)))) # Cycle the good number of time + return longlist[:nb] # Truncate + + +#: Default parameter for legend(): if True, the legend is placed at the right side of the figure, not on it. +#: This is almost mandatory for plots with more than 10 algorithms (good for experimenting, bad for publications). +PUTATRIGHT = True +PUTATRIGHT = False + +#: Shrink factor if the legend is displayed on the right of the plot. +#: +#: .. warning:: I still don't really understand how this works. Just manually decrease if the legend takes more space (i.e., more algorithms with longer names) +SHRINKFACTOR = 0.60 +SHRINKFACTOR = 0.65 +SHRINKFACTOR = 0.70 +SHRINKFACTOR = 0.75 + +#: Default parameter for maximum number of label to display in the legend INSIDE the figure +MAXNBOFLABELINFIGURE = 8 + + +def legend(putatright=PUTATRIGHT, fontsize="xx-small", + shrinkfactor=SHRINKFACTOR, maxnboflabelinfigure=MAXNBOFLABELINFIGURE, + fig=None, title=None + ): + """plt.legend() with good options, cf. http://matplotlib.org/users/recipes.html#transparent-fancy-legends. + + - It can place the legend to the right also, see https://stackoverflow.com/a/4701285/. + """ + try: + len_leg = len(plt.gca().get_legend_handles_labels()[1]) + putatright = len_leg > maxnboflabelinfigure + if len_leg > maxnboflabelinfigure: print("Warning: forcing to use putatright = {} because there is {} items in the legend.".format(putatright, len_leg)) # DEBUG + except (ValueError, AttributeError, IndexError) as e: + # print(" e =", e) # DEBUG + pass + if fig is None: + # fig = plt.gcf() + fig = plt # HACK + if putatright: + try: + # Shrink current axis by 20% on xaxis and 10% on yaxis + delta_rect = (1. - shrinkfactor)/6.25 + # XXX rect = [left, bottom, right, top] in normalized (0, 1) figure coordinates. + fig.tight_layout(rect=[delta_rect, delta_rect, shrinkfactor, 1 - 2*delta_rect]) + # Put a legend to the right of the current axis + fig.legend(loc='center left', numpoints=1, fancybox=True, framealpha=0.8, bbox_to_anchor=(1, 0.5), title=title, fontsize=fontsize) + except: + fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize) + else: + fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize) + + +def maximizeWindow(): + """ Experimental function to try to maximize a plot. + + - Tries as well as possible to maximize the figure. + - Cf. https://stackoverflow.com/q/12439588/ + + .. warning:: This function is still experimental, but "it works on my machine" so I keep it. + """ + # plt.show(block=True) + # plt.tight_layout() + figManager = plt.get_current_fig_manager() + try: + figManager.window.showMaximized() + except Exception: + try: + figManager.frame.Maximize(True) + except Exception: + try: + figManager.window.state('zoomed') # works fine on Windows! + except Exception: + try: + figManager.full_screen_toggle() + except Exception: + print(" Note: Unable to maximize window...") + # plt.show() + + +#: List of formats to use for saving the figures, by default. +#: It is a smart idea to save in both a raster and vectorial formats +FORMATS = ('png', 'pdf') +# FORMATS = ('png', 'pdf', 'eps') +# FORMATS = ('png', 'pdf', 'eps', 'svg') + + +def show_and_save(showplot=True, savefig=None, formats=FORMATS, pickleit=False, fig=None): + """ Maximize the window if need to show it, save it if needed, and then show it or close it. + + - Inspired by https://tomspur.blogspot.fr/2015/08/publication-ready-figures-with.html#Save-the-figure + """ + if showplot: + maximizeWindow() + if savefig is not None: + if pickleit and fig is not None: + form = "pickle" + path = "{}.{}".format(savefig, form) + print("Saving raw figure with format {}, to file '{}'...".format(form, path)) # DEBUG + with open(path, "bw") as f: + pickle_dump(fig, f) + print(" Saved! '{}' created of size '{}b', at '{:%c}' ...".format(path, os.path.getsize(path), datetime.fromtimestamp(os.path.getatime(path)))) + for form in formats: + path = "{}.{}".format(savefig, form) + print("Saving figure with format {}, to file '{}'...".format(form, path)) # DEBUG + try: + plt.savefig(path, bbox_inches=BBOX_INCHES) + print(" Saved! '{}' created of size '{}b', at '{:%c}' ...".format(path, os.path.getsize(path), datetime.fromtimestamp(os.path.getatime(path)))) + except Exception as exc: + print("Error: could not save current figure to {} because of error {}... Skipping!".format(path, exc)) # DEBUG + try: + plt.show(block=True) if showplot else plt.close() + except (TypeError, AttributeError): + print("Failed to show the figure for some unknown reason...") # DEBUG + + +def add_percent_formatter(which="xaxis", amplitude=1.0, oldformatter="%.2g%%", formatter="{x:.1%}"): + """ Small function to use a Percentage formatter for xaxis or yaxis, of a certain amplitude. + + - which can be "xaxis" or "yaxis", + - amplitude is a float, default to 1. + + - More detail at http://stackoverflow.com/a/36320013/ + - Not that the use of matplotlib.ticker.PercentFormatter require matplotlib >= 2.0.1 + - But if not available, use matplotlib.ticker.StrMethodFormatter("{:.0%}") instead + """ + # Which axis to use ? + if which == "xaxis": + ax = plt.axes().xaxis + elif which == "yaxis": + ax = plt.axes().yaxis + else: + raise ValueError("Unknown value '{}' for 'which' in function add_percent_formatter() : only xaxis,yaxis are accepted...".format(which)) + # Which formatter to use ? + try: + my_frmt = mtick.StrMethodFormatter(formatter) # Use new format string + except Exception: + my_frmt = mtick.FormatStrFormatter(oldformatter) # Use old format string, better looking but not correctly scaled + if hasattr(mtick, 'PercentFormatter'): + my_frmt = mtick.PercentFormatter(amplitude) + # Use it! + ax.set_major_formatter(my_frmt) + + +#: Default value for the ``width`` parameter for :func:`wraptext` and :func:`wraplatex`. +WIDTH = 95 + + +def wraptext(text, width=WIDTH): + """ Wrap the text, using ``textwrap`` module, and ``width``.""" + return "\n".join(wrap(text, width=width)) + + +def wraplatex(text, width=WIDTH): + """ Wrap the text, for LaTeX, using ``textwrap`` module, and ``width``.""" + return "$\n$".join(wrap(text, width=width)) + + +def nrows_ncols(N): + """Return (nrows, ncols) to create a subplots for N plots of the good size. + + >>> for N in range(1, 22): + ... nrows, ncols = nrows_ncols(N) + ... print("For N = {:>2}, {} rows and {} cols are enough.".format(N, nrows, ncols)) + For N = 1, 1 rows and 1 cols are enough. + For N = 2, 2 rows and 1 cols are enough. + For N = 3, 2 rows and 2 cols are enough. + For N = 4, 2 rows and 2 cols are enough. + For N = 5, 3 rows and 2 cols are enough. + For N = 6, 3 rows and 2 cols are enough. + For N = 7, 3 rows and 3 cols are enough. + For N = 8, 3 rows and 3 cols are enough. + For N = 9, 3 rows and 3 cols are enough. + For N = 10, 4 rows and 3 cols are enough. + For N = 11, 4 rows and 3 cols are enough. + For N = 12, 4 rows and 3 cols are enough. + For N = 13, 4 rows and 4 cols are enough. + For N = 14, 4 rows and 4 cols are enough. + For N = 15, 4 rows and 4 cols are enough. + For N = 16, 4 rows and 4 cols are enough. + For N = 17, 5 rows and 4 cols are enough. + For N = 18, 5 rows and 4 cols are enough. + For N = 19, 5 rows and 4 cols are enough. + For N = 20, 5 rows and 4 cols are enough. + For N = 21, 5 rows and 5 cols are enough. + """ + nrows = int(np.ceil(np.sqrt(N))) + ncols = N // nrows + while N > nrows * ncols: + ncols += 1 + nrows, ncols = max(nrows, ncols), min(nrows, ncols) + return nrows, ncols + + +def addTextForWorstCases(ax, n, bins, patches, rate=0.85, normed=False, fontsize=8): + """Add some text labels to the patches of an histogram, for the last 'rate'%. + + Use it like this, to add labels for the bins in the 65% largest values n:: + + >>> n, bins, patches = plt.hist(...) + >>> addTextForWorstCases(ax, n, bins, patches, rate=0.65) + """ + # DONE add an automatic detection of the cases where a regret was found to not be O(log(T)) to display on the histogram the count of bad cases + assert 0 <= rate <= 1, "Error: 'rate' = {:.3g} should be in [0, 1].".format(rate) # DEBUG + if not isinstance(n, list) and not isinstance(n, np.ndarray): + n = [n] + if hasattr(patches, 'patches'): + # assert isinstance(patches, mpl.container.BarContainer) # DEBUG + patches = patches.patches + if not isinstance(patches, list): + patches = [patches] + max_x = max(p.xy[0] for p in patches) + for nx, p in zip(n, patches): + text = "{:.3%}".format(nx) if normed else "{:.3g}".format(nx) + x, y = p.xy[0], 1.015 * nx # 1.5% higher than the top of the patch rectangle + # Simple detection can be if a box is for a regret larger than some fraction of T + if nx > 0 and x > (rate * max_x): + # print("Writing text =", text, "at x =", x, "and y =", y) # DEBUG + ax.text(x, y, text, fontsize=fontsize) + + +def myviolinplot(*args, nonsymmetrical=False, **kwargs): + try: + return sns.violinplot(*args, nonsymmetrical=nonsymmetrical, cut=0, inner="stick", **kwargs) + except (TypeError, NameError): + return sns.violinplot(*args, cut=0, inner="stick", **kwargs) + + +def violin_or_box_plot(data=None, labels=None, boxplot=False, **kwargs): + """ Automatically add labels to a box or violin plot. + + .. warning:: Requires pandas (https://pandas.pydata.org/) to add the xlabel for violin plots. + """ + if boxplot: + return plt.boxplot(data, labels=labels, showmeans=True, meanline=True, **kwargs) + if labels is not None: + try: + import pandas as pd + dict_of_data = { + label: column + for label, column in zip(labels, data) + } + df = pd.DataFrame(dict_of_data) + return myviolinplot(nonsymmetrical="left", data=df, orient="v", **kwargs) + except ImportError: + return violin_or_box_plot(data, boxplot=boxplot, **kwargs) + return myviolinplot(nonsymmetrical="left", data=data, orient="v", **kwargs) + + +MAX_NB_OF_LABELS = 50 #: If more than MAX_NB_OF_LABELS labels have to be displayed on a boxplot, don't put a legend. + + +def adjust_xticks_subplots(ylabel=None, labels=(), maxNbOfLabels=MAX_NB_OF_LABELS): + """Adjust the size of the xticks, and maybe change size of ylabel. + + - See https://stackoverflow.com/a/37708190/ + """ + if len(labels) >= maxNbOfLabels: + return + max_length_of_labels = max([len(label) for label in labels]) + locs, xticks_labels = plt.xticks() # XXX don't name xticks_labels, labels or it erases the argument of the function and labels are not correctly displayed. + plt.xticks(locs, labels, rotation=80, verticalalignment="top", fontsize="xx-small") + if max_length_of_labels >= 50: + plt.subplots_adjust(bottom=max_length_of_labels/135.0) + if ylabel is not None: plt.ylabel(ylabel, fontsize="x-small") + else: + plt.subplots_adjust(bottom=max_length_of_labels/90.0) + + +def table_to_latex(mean_data, std_data=None, + labels=None, fmt_function=None, name_of_table=None, + filename=None, erase_output=False, + *args, **kwargs + ): + """ Tries to print the data from the input array or collection of array or :class:`pandas.DataFrame` to the stdout and to the file ``filename`` (if it does not exist). + + - Give ``std_data`` to print ``mean +- std`` instead of just ``mean`` from ``mean_data``, + - Give a list to ``labels`` to use a header of the table, + - Give a formatting function to ``fmt_function``, like :func:`IPython.core.magics.execution._format_time` to print running times, or :func:`memory_consumption.sizeof_fmt` to print memory usages, or ``lambda s: "{:.3g}".format(s)`` to print ``float`` values (default), + - Uses :func:`tabulate.tabulate` (https://bitbucket.org/astanin/python-tabulate/) or :func:`pandas.DataFrame.to_latex` (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_latex.html#pandas.DataFrame.to_latex). + + .. warning:: FIXME this is still experimental! And useless, most of the time we simply do a copy/paste from the terminal to the LaTeX in the article... + """ + if fmt_function is None: fmt_function = lambda s: "{:.3g}".format(s) + output_string = None + input_data = mean_data + if std_data is not None: + format_data = np.vectorize(lambda xi, yi: r"{} \pm {}".format(fmt_function(xi), fmt_function(yi))) + input_data = format_data(mean_data, std_data) + else: + format_data = np.vectorize(fmt_function) + input_data = format_data(mean_data) + print("Using input_data of shape = {} and size = {}\n{}".format(np.shape(input_data), np.size(input_data), input_data)) # DEBUG + # 1. try with pandas module + try: + import pandas as pd + if labels is not None: + df = pd.DataFrame(input_data, columns=labels) + else: + df = pd.DataFrame(input_data) + output_string = df.to_latex(*args, **kwargs) + except ImportError: + print("Error: the pandas module is not available, install it with 'pip install pandas' or 'conda install pandas'.") # DEBUG + # 2. if pandas failed, try with tabulate + if output_string is None: + try: + import tabulate + if labels is not None: + output_string = tabulate.tabulate(input_data, tablefmt="latex_raw", headers=labels, *args, **kwargs) + else: + output_string = tabulate.tabulate(input_data, tablefmt="latex_raw", *args, **kwargs) + except ImportError: + print("Error: the tabulate module is not available, install it with 'pip install tabulate' or 'conda install tabulate'.") # DEBUG + if filename is not None and not erase_output and os.path.exists(filename): + print("Error: the file named '{}' already exists, and option 'erase_output' is False.".format(filename)) + return -1 + if name_of_table is not None: + output_string = r"""%% LaTeX code for a table, produced by SMPyBandits.Environment.plotsetting.table_to_latex() +\begin{table} +%s +\caption{%s} +\end{table}""" % (output_string, name_of_table) + print("\nThe data from object (shape = {} and size = {}) can be pretty printed in a LaTeX table looking like this one:".format(np.shape(input_data), np.size(input_data))) # DEBUG + print(output_string) + if filename is not None: + print("\nThe data from object (shape = {} and size = {}) will be saved to the file {}...".format(np.shape(input_data), np.size(input_data), filename)) # DEBUG + with open(filename, 'w') as open_file: + print(output_string, file=open_file) + return 0 + + +# --- Debugging + +if __name__ == "__main__": + # Code for debugging purposes. + from doctest import testmod + print("\nTesting automatically all the docstring written in each functions of this module :") + testmod(verbose=True) diff --git a/obsolete/simu_config_network_only.py b/obsolete/simu_config_network_only.py new file mode 100644 index 0000000..f6be446 --- /dev/null +++ b/obsolete/simu_config_network_only.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. +""" + +# This file provides the configurations for each simulation. + +__author__ = "Wenbo Wang" + +from envutils import Struct as Section + +if __name__ == '__main__': + print("Warning: this script 'simu_config.py' is NOT executable..") # DEBUG + exit(0) + +############################################################################### +# Section 1: +# Define the algorithms that are used in the simulation +############################################################################### +ENV_ALG_SETTING_1 = Section("Simulation of HetNet: reward evolution for 5 algorithms") +ENV_ALG_SETTING_1.game_horizon = 200000 + + +# Disable simulation for reward evolution in a single shot +ENV_ALG_SETTING_1.enable_reward_simulation = True +ENV_ALG_SETTING_1.enable_switching_simulation = True + +ENV_ALG_SETTING_1.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Throne', 'TnE Nonobservable'] #, +ENV_ALG_SETTING_1.alg_configs = [None, + {"delta": 0.02, "exploration_time": 4000}, + {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.39, "alpha22": 0.4,}, + {"c1": 100, "c2": 300,"c3":200, "epsilon": 0.025, "delta": 1.5}, + {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.025, "delta": 1.5, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4, "observable": 0} + ] + +# Experiment parameters +ENV_ALG_SETTING_1.flag_save_figure = True +ENV_ALG_SETTING_1.save_data = False + +# Experiment parameters +ENV_ALG_SETTING_1.T_repr_rounds = 40 + +ENV_ALG_SETTING_1.repeated_play_data_name = 'reward_data_4_alg_HetNet' + +# Enable parallel processing +ENV_ALG_SETTING_1.flag_parallel = True +ENV_ALG_SETTING_1.flag_progress_bar = True \ No newline at end of file diff --git a/obsolete/test_PPP.py b/obsolete/test_PPP.py new file mode 100644 index 0000000..737605f --- /dev/null +++ b/obsolete/test_PPP.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Dec 18 11:16:15 2019 + +@author: wenbo2017 +""" + +import scipy +import numpy as np +import matplotlib.pyplot as plt + +import simu_config as CONFIG +#import matlab.engine + +#from HetNetSimulator import HomeBrewedHetNetEnv +import argparse +import os +import sys + +from loggingutils import info_logger + +def PoissonPP( rt, Dx, Dy=None ): + ''' + Determines the number of events `N` for a rectangular region, + given the rate `rt` and the dimensions, `Dx`, `Dy`. + Returns a <2xN> NumPy array. + ''' + if Dy == None: + Dy = Dx + N = scipy.stats.poisson( rt*Dx*Dy ).rvs() + x = scipy.stats.uniform.rvs(0,Dx,((N,1))) + y = scipy.stats.uniform.rvs(0,Dy,((N,1))) + P = np.hstack((x,y)) + return P + +if __name__ == '__main__': +# rate, Dx = 10, 1 +# P = PoissonPP( rate, Dx ).T +# fig, ax = plt.subplots() +# ax = fig.add_subplot(111) +# ax.scatter( P[0], P[1], edgecolor='b', facecolor='none', alpha=0.5 ) +# # lengths of the axes are functions of `Dx` +# plt.xlim(0,Dx) ; plt.ylim(0,Dx) +# # label the axes and force a 1:1 aspect ratio +## plt.xlabel('X') ; plt.ylabel('Y') ; ax.set_aspect(1) +# plt.title('Poisson Process {}'.format(rate)) +## savefig( 'poisson_lambda_0p2.png', fmt='png', dpi=100 ) + + epsilon = 0.02 + nbArm = 10 + tmp_factor = 0.1 + + current_action = 3 + + for ii in range(10): + prob_no_change = 1 - epsilon**(tmp_factor) + prob_rand_action = epsilon**(tmp_factor) / (nbArm - 1) + + action_array = list(range(nbArm)) + prob_array = np.zeros(nbArm) + prob_array[:] = prob_rand_action + prob_array[current_action] = prob_no_change + + action = np.random.choice(action_array, size=None, p=prob_array) + + print("new action: {}; prob_stay: {:.2}, prob_rnd_change: {:.2}".format(action, prob_no_change, prob_rand_action)) + +# test_simulator = HomeBrewHetNetEnv({'context 1'}, 10, 10) +# test_simulator.initialize_UE(10, distance = 200, dist_mode = 0) +# +# test_simulator.helper_plot_ue_posiiton() +# bs_position = [1,2] +# bs_position = np.broadcast_to(bs_position, (10,2)) +# +# print(bs_position) +# eng = matlab.engine.connect_matlab() +# eng.sqrt(4.0) + + C_set = {"context 1", "context 2", "context 3"} + + my_logger = info_logger() + my_logger.logger.debug("test message.") + + record_series = np.empty((0,4)) + + record1 = np.array([1, 2, 3, 4]) + record2 = np.array([0, 9, 8, 7]) + + record_series = np.append(record_series, [record1], axis=0) + record_series = np.append(record_series, [record2], axis=0) + print(record_series) + print(record_series.shape) + + ret_rand = np.random.uniform(low=0.5, high=1.0, size=3) + print(ret_rand) + +# game_config = CONFIG.ENV_SCENARIO_3 +# print("MAB game with configuration '{}' starts to play...".format(game_config.__repr__())) + my_parser = argparse.ArgumentParser(description='Select the configuration type to run the simulations') + + # Add the arguments + my_parser.add_argument('-id', + metavar='ID', + type=int, + help='Choose the configuration ID between [1-6]') + + # Execute the parse_args() method + args = my_parser.parse_args() + + if args.id is not None: + print ("id has been set to {}".format(args.id)) + else: + args.id = 1 + print ("id has been set to {}".format(args.id)) \ No newline at end of file diff --git a/obsolete/test_main_MPMAB.py b/obsolete/test_main_MPMAB.py new file mode 100644 index 0000000..8e1702a --- /dev/null +++ b/obsolete/test_main_MPMAB.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + + +This file tests the running framework of the bandit simulation +""" + +__author__ = "Wenbo Wang" + +import numpy as np + +from MPMAB import MP_MAB +from HetNetSimulator import HomeBrewedHetNetEnv +from PlayResult import ResultMultiPlayers +from MABAlgorithms import Hungarian, MusicalChairs, TrialandError, GameofThrone +from Arms import * + +import time +from tqdm import tqdm + +if __name__ == '__main__': + # test code + horizon = 10000# should not be less than 100000 for MC + + context_set = {"context 1", "context 2", "context 3"} + +# nb_player = 2 +# nb_arms = 3 +# dic_lower = {("context 1", 0): np.array([0., 0.5, 0.3]), ("context 2", 0): np.array([0.1, 0.2, 0.2]), ("context 3", 0): np.array([0., 0.2, 0.25]), +# ("context 1", 1): np.array([0.1, 0.6, 0.2]), ("context 2", 1): np.array([0., 0., 0.]), ("context 3", 1): np.array([0.2, 0.1, 0.45])} +# dic_upper = {("context 1", 0): np.array([0.5, 0.8, 0.6]), ("context 2", 0): np.array([1., 1., 0.4]), ("context 3", 0): np.array([1, 0.3, 0.65]), +# ("context 1", 1): np.array([0.81, 0.96, 0.52]), ("context 2", 1): np.array([0.5, 0.4, 0.9]), ("context 3", 1): np.array([0.62, 0.21, 0.95])} + + nb_player = 5 + nb_arms = 6 + + """ + (1) Create an environment instance (e.g., with uniform arms) of the MPMAB + """ + hetnet_params = {'enabel mmWave': True, + 'horizon': horizon, + 'cell range': 200, + 'context_prob': {'context 1':2, 'context 2':1, 'context 3':1}, + 'los_prob': {'context 1':2, 'context 2':1, 'context 3':1} + } + multi_player_MAB = HomeBrewedHetNetEnv.HetNet_mab(context_set, nb_arms, nb_player, hetnet_params) +# multi_player_MAB = MP_MAB.gaussian_mab(context_set, nb_arms, nb_player, dic_lower, dic_upper) + + multi_player_MAB.prepare_samples(horizon) + multi_player_MAB.save_environment() + + start_time = time.time() + + """ + (2) Create Musical Chairs algorithm + """ + alg_param_mc = {"nbPlayer": nb_player, + "nbArm": nb_arms, + "context_set": context_set, + "horizon": horizon, + "T0": 3000 + } + alg_MC = MusicalChairs(alg_param_mc) + + # to record the learning results of alg_MC + result_MC = ResultMultiPlayers("Musical Chair", context_set, nb_player, nb_arms, horizon) + + """ + (3) Create Hungarian algorithm + """ + alg_param_hungarian = {"nbPlayer": nb_player, + "nbArm": nb_arms, + "context_set": context_set + } + + alg_hungarian = Hungarian(alg_param_hungarian) + +# dic_pulls_on_means = dict() +# dic_total_rewards_on_means = dict() +# dic_sampled_rewards_on_means = dict() +# #get static allocation w.r.t. the means in each context +# for context in context_set: +# lower, upper, means, variance = multi_player_MAB.get_param(context) +# static_pulls, static_total_reward, static_sampled_rewards = alg_hungarian.learn_policy(means) +# +# dic_pulls_on_means[context] = static_pulls +# dic_total_rewards_on_means[context] = static_total_reward +# dic_sampled_rewards_on_means[context] = static_sampled_rewards +# +# #recorder of learning results +# # to store the centralized algorithm result of alg_hungarian + result_hungarian = ResultMultiPlayers("Instant Hungarian", context_set, nb_player, nb_arms, horizon) +# result_hungarian_mean = ResultMultiPlayers("Hungarian", context_set, nb_player, nb_arms, horizon) + + """ + (4) Create trial-and-error algorithm + """ + alg_param_tne = {"nbPlayer": nb_player, + "nbArm": nb_arms, + "context_set": context_set, + "horizon": horizon, + "c1": 100, "c2": 200, "c3": 100, + "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4 + } + alg_TnE = TrialandError(alg_param_tne) + # to store the centralized algorithm result of alg_hungarian + result_TnE = ResultMultiPlayers("Trial-n-Error", context_set, nb_player, nb_arms, horizon) + + """ + (5) Create game-of-throne algorithm + """ + alg_param_got = {"nbPlayer": nb_player, + "nbArm": nb_arms, + "context_set": context_set, + "horizon": horizon, + "c1": 100, "c2": 200, "c3": 100, + "epsilon": 0.01, "delta": 2, "xi": 0.001, + } + + alg_GoT = GameofThrone(alg_param_got) + + result_GoT = ResultMultiPlayers("Game of Throne", context_set, nb_player, nb_arms, horizon) + + # Main loop of learning + for t in tqdm(range(horizon)): + context, arm_values = multi_player_MAB.draw_sample(t) + + # Hungarian algoirthm over the instantaneous samples and results + pulls, total_reward, sampled_rewards = alg_hungarian.learn_policy(arm_values) + choices = alg_hungarian.pulls2choices(pulls) + result_hungarian.store(t, context, choices, sampled_rewards, total_reward, pulls) + + # Hungarian algoirthm over the mean samples and results +# static_pulls = dic_pulls_on_means[context] +# static_choices = alg_hungarian.pulls2choices(static_pulls) +# static_reward = dic_sampled_rewards_on_means[context] +# static_total_reward = dic_total_rewards_on_means[context] +# result_hungarian_mean.store(t, context, static_choices, static_reward, static_total_reward, static_pulls) + + # Musical-chair algorithm over the instantaneous samples and the learning results + pulls, total_reward, sampled_rewards = alg_MC.learn_policy(arm_values, context, t) + choices = alg_MC.pulls2choices(pulls) + collisions = alg_MC.resolve_collision(pulls) + result_MC.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions) + + # Trial-and-error algorithm over the instantaneous samples and the learning results + pulls, total_reward, sampled_rewards = alg_TnE.learn_policy(arm_values, context, t) + choices = alg_TnE.pulls2choices(pulls) + collisions = alg_TnE.resolve_collision(pulls) + result_TnE.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions) + + # Game of Throne + pulls, total_reward, sampled_rewards = alg_GoT.learn_policy(arm_values, context, t) + choices = alg_GoT.pulls2choices(pulls) + collisions = alg_GoT.resolve_collision(pulls) + result_GoT.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions) + + #end of play + running_time = time.time() - start_time + print("Simulation completes in {}s for {} rounds".format(running_time, horizon)) + + # for debugging + print("Trial-and-error Algorithm: {} exploration rounds, {} learning rounds, {} exploitation rounds".format(alg_TnE.nbExploration, + alg_TnE.nbTnE, alg_TnE.nbExploitation)) + + print("Context 1: {}, Context 2: {}, Context 3: {}".format(result_MC.context_history.count("context 1"), + result_MC.context_history.count("context 2"), + result_MC.context_history.count("context 3")) ) + +# result_hungarian.plot_cumu_rewards(other_results=[result_MC, result_TnE], save_fig=True, save_data=False) + result_hungarian.plot_avg_reward(other_results=[result_MC, result_GoT, result_TnE], save_fig=True, save_data=False) + + + + + + + + + \ No newline at end of file diff --git a/obsolete/test_parallel.py b/obsolete/test_parallel.py new file mode 100644 index 0000000..ad3b65f --- /dev/null +++ b/obsolete/test_parallel.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Dec 20 12:36:37 2019 + +@author: wenbo2017 +""" + +#import multiprocessing as mp +#import time +# +#class someClass(object): +# +# def __init__(self): +# self.var = 1 +# +# def test(self): +# print(self) +# print ("Variable value: {}".format(self.var)) +# self.var += 1 +# +# def apply_async_with_callback(self): +# pool = mp.Pool(processes = 3) +# for i in range(10): +# pool.apply_async(self.test) #, callback = self.log_result +# +# pool.close() +# pool.join() +# +# +#if __name__ == '__main__': +# sc = someClass() +# +# sc.apply_async_with_callback() + + +#from multiprocessing import Pool +#import time +#from tqdm import * +# +#def _foo(my_number): +# square = my_number * my_number +# time.sleep(1) +# return square +# +#if __name__ == '__main__': +# with Pool(processes=2) as p: +# max_ = 30 +# with tqdm(total=max_) as pbar: +# for i, _ in tqdm(enumerate(p.imap_unordered(_foo, range(0, max_)))): +# pbar.update() + + +import numpy as np + +import multiprocessing as mp +from tqdm import tqdm +from time import sleep + +SENTINEL = 1 + +def test(q=None): + for i in range(1000): + sleep(0.01) + + if q is not None: + q.put(SENTINEL) + +def listener(q, nbProcess): + pbar = tqdm(total = 1000*nbProcess) + for item in iter(q.get, None): + pbar.update() + +if __name__ == '__main__': +# pool = mp.Pool(processes=5) +# manager = mp.Manager() +# queue = manager.Queue() +# +# proc = mp.Process(target=listener, args=(queue, 5)) +# +# for ii in range(5): +# pool.apply_async(test, args=(queue, )) +# +# proc.start() +# pool.close() +# pool.join() +# queue.put(None) +# proc.join() +# +# print("process is done") +# c = np.array([]) + c = None + + d = np.array([1, 2, 0, 4, 0]) + + idx = np.where(d != 0) + + d[idx] = -1 + + print(d) + + +# a = [0, 0, 0, 0, 0] +# +# arm_selected = np.nonzero(a) +# +# print(arm_selected[0]) +## print(arm_selected[1]) +# +# indx = np.where(a == 6) +# +# print(indx) +# print(indx[0].ndim) +# print(indx[0].shape) +# +# aa = np.array(a) +# +# aa[:] = 0 +# +# b = np.array(list(range(0, 10))) +# print(b) +# +# print(aa) +# q = mp.Queue() +# proc = mp.Process(target=listener, args=(q,)) +# proc.start() +# workers = [mp.Process(target=test, args=(q,)) for i in range(5)] +# for worker in workers: +# worker.start() +# for worker in workers: +# worker.join() +# q.put(None) +# proc.join() \ No newline at end of file diff --git a/obsolete/test_plot.py b/obsolete/test_plot.py new file mode 100644 index 0000000..8d721f8 --- /dev/null +++ b/obsolete/test_plot.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Dec 11 10:06:46 2019 + +@author: wenbo2017 +""" + +# testing plotting methods in +import seaborn as sns +import numpy as np +import pandas as pd + +#import matplotlib.pyplot as plt +from plotutils import plot_data_frame, prepare_file_name, read_data_frame, plot_repeated_simu_results +#from matplotlib.lines import Line2D +import simu_config as CONFIG + +flag_test1 = False + # start the simulation +if flag_test1 == True: + horizon_list = np.linspace(5000, 50000, 20) + + timepoints = [] + alg_len = 3 + regret_series = [] + #Monte Carlo Simulation + simu_rounds = 300 + + alg_types = [ii for ii in range(alg_len)] + + type_series = [] + + for simu_index in range(simu_rounds): + print("Simulation round {} of total rounds {}...".format(simu_index, simu_rounds)) + # 2d array of payoff for a single simulation round + learned_total_payoff = np.zeros((alg_len, len(horizon_list))) + + horizon_index = 0 + for horizon_index in range(len(horizon_list)): + # example: for 3 algorithms, len(tmp_total_payoff) == 3 + tmp_total_payoff = np.random.rand(alg_len) + + for alg_index in range(alg_len): + learned_total_payoff[alg_index][horizon_index] = tmp_total_payoff[alg_index] #/ horizon_list[horizon_index] + + type_series.extend(alg_types) + + tmp_time = [horizon_list[horizon_index]]*alg_len + + timepoints.extend(tmp_time) + + regret_series.extend(tmp_total_payoff) + + recorded_data = {} + + recorded_data['signal'] = regret_series + + recorded_data['time'] = timepoints + + recorded_data['algorithms'] = type_series + + my_data = pd.DataFrame(recorded_data) + +# sns.relplot(x="time", y="signal", hue = 'algorithms', +# kind="line", data=my_data, height=5, aspect=1.25 ); + plot_data_frame(my_data, xlabel="time", ylabel="signal", huelabel='algorithms', save_file_name='test') + +flag_test2=False +if flag_test2 == True: + T = np.linspace(start=25000, stop = 10000, num=20) + X = (5*2*np.log(T+2)**2 + 100*2*np.log(T+2))/T + 0.1 + Label = ['$0.1+200\log(T+2)+10\log^2(T+2)$']*len(T) + + recorded_data = {} + + recorded_data['Total number of plays'] = T + + recorded_data['Average regret over time'] = X + + recorded_data['Algorithm'] = Label + + my_data = pd.DataFrame(recorded_data) + + colors = ["#4374B3"] + # Set your custom color palette + sns.set_palette(sns.color_palette(colors)) + + file_name = "bound_regret" + plot_data_frame(my_data, xlabel="Total number of plays", + ylabel="Average regret over time", huelabel='Algorithm', + save_file_name=file_name, save_data_Name='test_data') + +flag_test3=False +if flag_test3==True: + plot_average_regret(start=25000, horzion=200000, nb_points=20) + + + regret_data = read_data_frame('regret_data_3_alg') + + T = np.exp(np.linspace(start=np.log(45000), stop = np.log(300000), num=20)) + X = (25*2*(np.log2(T+2)**2) + 100*2*np.log2(T+2)+10000)/T + Label = ['$O(M\log_2^{\delta}(T))$']*len(T) + + Dash = [1]*len(T) + + T = np.append(regret_data['Total number of plays'], T) + X = np.append((regret_data['Average regret over time']), X) + Label = np.append((regret_data['Algorithm']), Label) + Dash = np.append([0]*len(regret_data['Algorithm']), Dash) + + recorded_data = {} + recorded_data['Total number of plays'] = T + recorded_data['Average regret over time'] = X + recorded_data['Algorithms'] = Label + recorded_data['Dash'] = Label + + bound_data = pd.DataFrame(recorded_data) + + g = plot_data_frame(bound_data, xlabel="Total number of plays", + ylabel="Average regret over time", huelabel='Algorithms') + + g.ax.lines[3].set_linestyle("--") + g.ax.lines[3].set_color("grey") + + g.ax.set(xscale="log") + + le = g.ax.legend() + le.get_lines()[4].set_color('grey') + le.get_lines()[4].set_linestyle("--") + le.get_frame().set_facecolor('none') + le.get_frame().set_edgecolor('none') + + file_path = prepare_file_name(file_name="monte_carlo_regret", ext_format='pdf', add_timestamp=False) + g.savefig(file_path) + +flag_test4=True +if flag_test4==True: + game_config = CONFIG.ENV_SCENARIO_3 + start = game_config.T_start + nb_point = game_config.T_step + game_horizon = game_config.game_horizon + + plot_repeated_simu_results(start=start, horzion=game_horizon, nbPoints=nb_point, flag_bound=True, + data_file_name=game_config.repeated_play_data_name) \ No newline at end of file diff --git a/plotutils.py b/plotutils.py new file mode 100644 index 0000000..b34e54c --- /dev/null +++ b/plotutils.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +""" + +# This file defines the plotting methods for the simulation. +# The configuration for pallette creation are partially inspired by the ones in the SMPyBandits project, +# see plotsettings.py in SMPyBandits (https://github.com/SMPyBandits/SMPyBandits) + + +__author__ = "Wenbo Wang" + +from datetime import datetime + +import matplotlib as mpl +#from matplotlib.ticker import FuncFormatter + +import os, errno + +import matplotlib.pyplot as plt +import matplotlib.ticker as mticker + +#import numpy as np +import seaborn as sns +import pandas as pd + +import numpy as np + +#from pickle import dump as pickle_dump # alternative choice of dumping files + +DPI = 120 #: DPI to use for the figures +FIGSIZE = (4,3) #: Figure size, in inches +#FIGSIZE = (5,4) #: Figure size, in inches + +# Customize the colormap +HLS = True #: Use the HLS mapping, or HUSL mapping +VIRIDIS = False #: Use the Viridis colormap + +# Bbox in inches. Only the given portion of the figure is saved. If 'tight', try to figure out the tight bbox of the figure. +BBOX_INCHES = "tight" #: Use this parameter for bbox +BBOX_INCHES = None + +if __name__ != '__main__': + # use a clever color palette, eg http://seaborn.pydata.org/api.html#color-palettes + sns.set(context="talk", style="whitegrid", palette="hls", font="sans-serif", font_scale=0.95) + + # Use tex by default http://matplotlib.org/2.0.0/users/dflt_style_changes.html#math-text + # mpl.rcParams['text.usetex'] = True # XXX force use of LaTeX + mpl.rcParams['font.family'] = "sans-serif" + mpl.rcParams['font.sans-serif'] = "DejaVu Sans" + mpl.rcParams['mathtext.fontset'] = "cm" + mpl.rcParams['mathtext.rm'] = "serif" + + # Configure size for axes and x and y labels + # Cf. https://stackoverflow.com/a/12444777/ + mpl.rcParams['axes.labelsize'] = "x-small" + mpl.rcParams['xtick.labelsize'] = "x-small" + mpl.rcParams['ytick.labelsize'] = "x-small" + mpl.rcParams['figure.titlesize'] = "x-small" + + # Configure the DPI of all images, once for all! + mpl.rcParams['figure.dpi'] = DPI + # print(" - Setting dpi of all figures to", DPI, "...") # DEBUG + + # Configure figure size, even of if saved directly and not displayed, use HD screen + # cf. https://en.wikipedia.org/wiki/Computer_display_standard + mpl.rcParams['figure.figsize'] = FIGSIZE + # print(" - Setting 'figsize' of all figures to", FIGSIZE, "...") # DEBUG + +def prepare_file_name(file_name = None, alg_name = None, ext_format = None, add_timestamp=True): + now = datetime.now() + current_date = now.strftime("%Y-%m-%d-%H-%M-%S") + + cwd = os.getcwd() # current directory + target_directory = "{}\{}".format(cwd, "results") + + if not os.path.exists(target_directory): + try: + os.makedirs(target_directory) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + file_name_no_ext = "" + file_path = "" + if alg_name is None and add_timestamp == False: + file_name_no_ext = "{}".format(file_name if file_name is not None else "-") + + file_path = "{}\{}.{}".format(target_directory, file_name_no_ext, + ext_format if ext_format is not None else "") + else: + file_name_no_ext = "{}-{}-{}".format(file_name if file_name is not None else "", + alg_name if alg_name is not None else "", + current_date if add_timestamp else "") + + file_path = "{}\{}.{}".format(target_directory, file_name_no_ext, + ext_format if ext_format is not None else "") + + + return file_path, file_name_no_ext + +def read_data_frame(file_name, ext_format='pkl'): + """ + Read a DataFrame from the default path with file name identified as 'file_name' + """ + file_path, file_name = prepare_file_name(file_name=file_name, ext_format=ext_format, add_timestamp=False) + df = pd.read_pickle(file_path) + + return df + +def make_palette(nbColors, hls=HLS, viridis=False): + """ + Use the seaborn palette to create nbColors different curves on the same figure. + See also http://seaborn.pydata.org/generated/seaborn.hls_palette.html#seaborn.hls_palette + """ + if viridis: + return sns.color_palette('viridis', nbColors) + else: + return sns.hls_palette(nbColors + 1)[:nbColors] if hls else sns.husl_palette(nbColors + 1)[:nbColors] + + +def make_markers(nbMarkers): + """ + Give a list of cycling markers. See also https://matplotlib.org/3.1.1/api/markers_api.html + List of markers in SMPyBandits (as an example): + allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>'] + + """ + allmarkers = ['o', 'D', 'v', 'X', 'P', '^', 'p', '<', 's', '^', '*', 'h', '>'] + marker_list = allmarkers * (1 + int(nbMarkers / float(len(allmarkers)))) # Cycle the good number of time + return marker_list[:nbMarkers] # Truncate + + +#: Shrink factor if the legend is displayed on the right of the plot. +SHRINKFACTOR = 0.60 + +#: Default parameter for maximum number of label to display in the legend INSIDE the figure +MAXNBOFLABELINFIGURE = 8 + +def display_legend(putatright=False, fontsize="xx-small", shrinkfactor=SHRINKFACTOR, + maxnboflabelinfigure=MAXNBOFLABELINFIGURE, fig=None, title=None): + """plt.legend() with good options, cf. http://matplotlib.org/users/recipes.html#transparent-fancy-legends. + - For the purpose of generating figures for papers, it is not recommended to place it at the right-side. + """ + try: + len_leg = len(plt.gca().get_legend_handles_labels()[1]) + putatright = len_leg > maxnboflabelinfigure + if len_leg > maxnboflabelinfigure: + print("Warning: forcing to use putatright = {} because there is {} items in the legend.".format(putatright, len_leg)) # DEBUG + except (ValueError, AttributeError, IndexError) as e: + print("error =", e) # DEBUG + + if fig is None: + fig = plt + if putatright: + try: + # Shrink current axis by 20% on xaxis and 10% on yaxis + delta_rect = (1. - shrinkfactor)/6.25 + fig.tight_layout(rect=[delta_rect, delta_rect, shrinkfactor, 1 - 2*delta_rect]) + # Put a legend to the right of the current axis + fig.legend(loc='center left', numpoints=1, fancybox=True, framealpha=0.8, bbox_to_anchor=(1, 0.5), title=title, fontsize=fontsize) + except: + fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize) + else: + fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize) + + +def plot_data_frame(input_dframe, xlabel, ylabel, huelabel, stylelabel=None, height=5, aspect=1.25, flag_semilogx=False, + save_file_name=None, sav_file_ext=None, save_data_name=None): + """ + plot_data_frame() takes 'input_dframe' as the payload data. \ + It also tries to plot the repeated simulation results with the labels of x, y axis and + the huelabel identified by the keys of 'input_dframe' as 'xlabel', 'ylabel' and 'huelabel'. + """ +# sns.set(font_scale=1.0) + sns_figure = sns.relplot(x=xlabel, y=ylabel, hue = huelabel, style=stylelabel, + kind="line", data=input_dframe, height=height, aspect=aspect); + + if flag_semilogx == True: + sns_figure.ax.set(xscale="log") + + # force scientific notations on x-axis + formatter = mticker.ScalarFormatter(useOffset=False, useMathText=True) + formatter_func = lambda x,pos : "${}$".format(formatter._formatSciNotation('%1.10e' % x)) + + sns_figure.ax.get_xaxis().set_major_formatter(mticker.FuncFormatter(formatter_func)) + sns_figure.ax.get_yaxis().set_major_formatter(mticker.FuncFormatter(formatter_func)) + + if save_file_name is not None: + sav_file_ext = sav_file_ext if sav_file_ext is not None else 'pdf' + figure_file_path, figure_file_name = prepare_file_name(file_name=save_file_name, ext_format=sav_file_ext) + sns_figure.savefig(figure_file_path) + + data_file_name = None + if save_data_name is not None: + data_file_path, data_file_name = prepare_file_name(file_name=save_data_name, ext_format='pkl', add_timestamp=True) + input_dframe.to_pickle(data_file_path) + + return sns_figure, data_file_name + + +""" +Specifically used for plotting regret data, with theoretical bound +""" +def plot_repeated_simu_results(start, horzion, nbPoints, + nbArm=2, c1=100, c2=20, flag_bound = False, + key_x='Total number of plays', key_y='Average regret', key_alg='Algorithms', + data_file_name='regret_data', save_fig_name="monte_carlo_regret"): + #plot key_x, key_y with huelable as key_alg + repeated_play_data = read_data_frame(data_file_name) + + if flag_bound: + T = np.linspace(start=4*start, stop = horzion, num=nbPoints) + + # This formula is heuristic, and for different parameter sets (context-arm numbers) + # we need to obtain the proper parameters of a tight bound with manually testing. + X = (c2*nbArm*(np.log2(T+2)**2) + c1*nbArm*np.log2(T+2))/T + Label = ['$O(M\log_2^{\delta}(T))$']*len(T) + + Dash = [1]*len(T) + + T = np.append(repeated_play_data[key_x], T) + X = np.append((repeated_play_data[key_y]), X) + Label = np.append((repeated_play_data[key_alg]), Label) + Dash = np.append([0]*len(repeated_play_data[key_alg]), Dash) + + recorded_data = {} + recorded_data[key_x] = T + recorded_data[key_y] = X + recorded_data[key_alg] = Label + recorded_data['Dash'] = Label + + final_data = pd.DataFrame(recorded_data) + + g, data_file_name = plot_data_frame(final_data, xlabel=key_x, ylabel=key_y, huelabel=key_alg) + + nbLines = len(set(final_data[key_alg])) + print(nbLines) + +# # force scientific notations on x-axis +# g.ax.get_xaxis().get_major_formatter().set_scientific(True) + g.ax.lines[nbLines-1].set_linestyle("--") + g.ax.lines[nbLines-1].set_color("grey") + + le = g.ax.legend() + le.get_lines()[nbLines].set_color('grey') + le.get_lines()[nbLines].set_linestyle("--") + le.get_frame().set_facecolor('none') + le.get_frame().set_edgecolor('none') + else: + final_data = repeated_play_data + g, data_file_name = plot_data_frame(final_data, xlabel=key_x, ylabel=key_y, huelabel=key_alg) +# # force scientific notations on x-axi +# g.ax.get_xaxis().get_major_formatter().set_scientific(True) + + # force scientific notations on x-axis + formatter = mticker.ScalarFormatter(useOffset=False, useMathText=True) + formatter_func = lambda x,pos : "${}$".format(formatter._formatSciNotation('%1.10e' % x)) + + g.ax.get_xaxis().set_major_formatter(mticker.FuncFormatter(formatter_func)) + g.ax.get_yaxis().set_major_formatter(mticker.FuncFormatter(formatter_func)) + + file_path, file_name = prepare_file_name(file_name=save_fig_name, ext_format='pdf', add_timestamp=False) + g.savefig(file_path) \ No newline at end of file diff --git a/results/tmp_plot_postprocess.py b/results/tmp_plot_postprocess.py new file mode 100644 index 0000000..c1a916d --- /dev/null +++ b/results/tmp_plot_postprocess.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Mar 3 13:28:25 2020 + +@author: wenbo2017 +""" + +# remeber to change the names of data source files + +import sys +import os.path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) + +import pandas as pd +from plotutils import plot_data_frame, plot_repeated_simu_results + +data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-00.pkl') + +plot_data_frame(data_reward, + xlabel="Total number of plays", ylabel="Average sum of rewards", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=None, save_data_name=None) + + +data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-03.pkl') + +plot_data_frame(data_reward, + xlabel="Total number of plays", ylabel="Accumulated switching counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=None, save_data_name=None) + +data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-03.pkl') + +plot_data_frame(data_reward, + xlabel="Total number of plays", ylabel="Accumulated collision counts", huelabel='Algorithms', + flag_semilogx = False, + save_file_name=None, save_data_name=None) \ No newline at end of file diff --git a/simu_config.py b/simu_config.py new file mode 100644 index 0000000..e10ca44 --- /dev/null +++ b/simu_config.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +""" +@author: Wenbo Wang + +[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel +Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game" + +License: +This program is licensed under the GPLv2 license. If you in any way use this code for research +that results in publications, please cite our original article listed above. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. +""" + +# This file provides the configurations for each simulation. + +__author__ = "Wenbo Wang" + +import numpy as np +from envutils import Struct as Section + +if __name__ == '__main__': + print("Warning: this script 'simu_config.py' is NOT executable..") # DEBUG + exit(0) + +# (context-player): arm-vector: {lower bound} - {upper bound} +initial_data = [{("context 1", 0): np.array([0., 0.5, 0.3]), ("context 2", 0): np.array([0.1, 0.2, 0.2]), ("context 3", 0): np.array([0., 0.2, 0.25]), + ("context 1", 1): np.array([0.1, 0.6, 0.2]), ("context 2", 1): np.array([0., 0., 0.]), ("context 3", 1): np.array([0.2, 0.1, 0.45])}, + {("context 1", 0): np.array([0.5, 0.8, 0.6]), ("context 2", 0): np.array([1., 1., 0.4]), ("context 3", 0): np.array([1, 0.3, 0.65]), + ("context 1", 1): np.array([0.81, 0.96, 0.52]), ("context 2", 1): np.array([0.5, 0.4, 0.9]), ("context 3", 1): np.array([0.62, 0.21, 0.95])} + ] + +initial_data_2 = [{("context 1", 0): np.array([0.0, 0.5, 0.3, 0.1]), ("context 2", 0): np.array([0.1, 0.2, 0.2, 0.5]), ("context 3", 0): np.array([0.0, 0.2, 0.25, 0.4]), + ("context 1", 1): np.array([0.1 , 0.6 , 0.2 , 0.44]), ("context 2", 1): np.array([0.0, 0.0, 0.0, 0.2]), ("context 3", 1): np.array([0.2 , 0.1 , 0.45, 0.36]), + ("context 1", 2): np.array([0.24, 0.11, 0.3 , 0.14]), ("context 2", 2): np.array([0.2, 0.0 , 0.1, 0.2]), ("context 3", 2): np.array([0.32, 0.21, 0.25, 0.59])}, + + {("context 1", 0): np.array([0.5, 0.8, 0.6, 0.7]), ("context 2", 0): np.array([1.0, 1.0, 0.4, 1.0]), ("context 3", 0): np.array([1.0, 0.3, 0.65, 0.9]), + ("context 1", 1): np.array([0.81, 0.96, 0.52, 1.0 ]), ("context 2", 1): np.array([0.5, 0.4, 0.9, 0.6]), ("context 3", 1): np.array([0.62, 0.31, 0.95, 0.79]), + ("context 1", 2): np.array([0.81, 0.78, 0.67, 1.0 ]), ("context 2", 2): np.array([0.3, 0.95, 0.9, 0.6]), ("context 3", 2): np.array([0.75, 0.63, 1.0 , 0.99]),} + ] + +############################################################################### +# Section 1: +# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with +# 3-contexts, 2-plaers, 3-arms +############################################################################### +ENV_SCENARIO_1 = Section("2-player-3-context-3-unifroms-arm MAB: regret evolution") +ENV_SCENARIO_1.game_horizon = 200000 +ENV_SCENARIO_1.env_config = {'horizon': ENV_SCENARIO_1.game_horizon, + 'arm number': 3, + 'player number': 2, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'uniform', # change the underlying distribution here + 'initial data': initial_data + } +# Enable simulation for regret evolution with repetition +ENV_SCENARIO_1.enable_regret_simulation = True +ENV_SCENARIO_1.enable_reward_simulation = True + +ENV_SCENARIO_1.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error'] +ENV_SCENARIO_1.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}] + +# Experiment parameters +ENV_SCENARIO_1.T_start = 5000 +ENV_SCENARIO_1.T_step = 20 +ENV_SCENARIO_1.T_simu_rounds = 20 + +ENV_SCENARIO_1.flag_save_figure = True +ENV_SCENARIO_1.repeated_play_data_name = 'regret_data' + +# Enable parallel processing +ENV_SCENARIO_1.flag_parallel = False +ENV_SCENARIO_1.flag_progress_bar = True + +############################################################################### +# Section 1: +# Parallel version +############################################################################### +ENV_SCENARIO_1_PARALLEL = ENV_SCENARIO_1 +ENV_SCENARIO_1_PARALLEL.flag_parallel = True + +############################################################################### +# Section 2: +# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with +# 3-contexts, 2-plaers, 3-arms +############################################################################### +ENV_SCENARIO_2 = Section("2-player-3-context-3-unifroms-arm MAB: reward evolution") +ENV_SCENARIO_2.game_horizon = 80000 +ENV_SCENARIO_2.env_config = {'horizon': ENV_SCENARIO_2.game_horizon, + 'arm number': 3, + 'player number': 2, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'uniform', # change the underlying distribution here + 'initial data': initial_data + } + +# Disable simulation for reward evolution in a single shot +ENV_SCENARIO_2.enable_efficiency_simulation = True + +ENV_SCENARIO_2.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error'] +ENV_SCENARIO_2.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}] + +# Experiment parameters +ENV_SCENARIO_2.flag_save_figure = True +ENV_SCENARIO_2.save_data = True + +# Enable parallel processing +ENV_SCENARIO_2.flag_parallel = False +ENV_SCENARIO_2.flag_progress_bar = True + +############################################################################### +# Section 2: +# Parallel version +############################################################################### +ENV_SCENARIO_2_PARALLEL = ENV_SCENARIO_2 +ENV_SCENARIO_2_PARALLEL.flag_parallel = True + +############################################################################### +# Section 3: +# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with +# 3-contexts, 2-plaers, 3-arms +############################################################################### +ENV_SCENARIO_3 = Section("2-player-3-context-3-unifroms-arm MAB: regret evolution") +ENV_SCENARIO_3.game_horizon = 200000 +ENV_SCENARIO_3.env_config = {'horizon': ENV_SCENARIO_3.game_horizon, + 'arm number': 3, + 'player number': 2, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'uniform', # change the underlying distribution here + 'initial data': initial_data + } + +# Enable simulation for regret evolution with repetition +ENV_SCENARIO_3.enable_regret_simulation = True + +ENV_SCENARIO_3.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error', 'Game of Thrones'] +ENV_SCENARIO_3.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}, + {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}] + +# Experiment parameters +ENV_SCENARIO_3.flag_save_figure = True +ENV_SCENARIO_3.save_data = True + +# Experiment parameters +ENV_SCENARIO_3.T_start = 5000 +ENV_SCENARIO_3.T_step = 20 +ENV_SCENARIO_3.T_simu_rounds = 200 + +ENV_SCENARIO_3.repeated_play_data_name = 'regret_data_3_alg' + +# Enable parallel processing +ENV_SCENARIO_3.flag_parallel = False +ENV_SCENARIO_3.flag_progress_bar = True + +############################################################################### +# Section 3: +# Parallel version +############################################################################### +ENV_SCENARIO_3_PARALLEL = ENV_SCENARIO_3 +ENV_SCENARIO_3_PARALLEL.flag_parallel = True + +############################################################################### +# Section 4: +# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with +# 3-contexts, 2-plaers, 3-arms, test of parallel simulation +# for a single round of this 4-algorithm example, multiprocessing accelerates by +# about 1/3 +############################################################################### +ENV_SCENARIO_4 = Section("2-player-3-context-3-unifroms-arm MAB: reward evolution") +ENV_SCENARIO_4.game_horizon = 200000 +ENV_SCENARIO_4.env_config = {'horizon': ENV_SCENARIO_4.game_horizon, + 'arm number': 3, + 'player number': 2, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'uniform', # change the underlying distribution here + 'initial data': initial_data + } + +# Disable simulation for reward evolution in a single shot +ENV_SCENARIO_4.enable_efficiency_simulation = True + +ENV_SCENARIO_4.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error', 'Game of Thrones'] +ENV_SCENARIO_4.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}, + {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}] + +# Experiment parameters +ENV_SCENARIO_4.flag_save_figure = True +ENV_SCENARIO_4.save_data = False + +# Experiment parameters +ENV_SCENARIO_4.T_start = 5000 +ENV_SCENARIO_4.T_step = 20 +ENV_SCENARIO_4.T_simu_rounds = 200 + +ENV_SCENARIO_4.repeated_play_data_name = 'regret_data_3_alg' + +# Enable parallel processing +ENV_SCENARIO_4.flag_parallel = False +ENV_SCENARIO_4.flag_progress_bar = True + +############################################################################### +# Section 4: +# Parallel version +############################################################################### +ENV_SCENARIO_4_PARALLEL = ENV_SCENARIO_4 +ENV_SCENARIO_4_PARALLEL.flag_parallel = True + +############################################################################### +# Section 5: +# MAB environment in HetNet, with 12 random arms/channel and 10 randomly placed +# users, 3 contexts (MUE transmission in the underlying macro cells) +# for a single round of this 4-algorithm example, multiprocessing is to be implemented +############################################################################### +ENV_SCENARIO_5 = Section("10-UE-10-Channel HetNet: regret evolution") +ENV_SCENARIO_5.game_horizon = 80000 +ENV_SCENARIO_5.env_config = {'horizon': ENV_SCENARIO_5.game_horizon, + 'arm number': 12, + 'player number': 10, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'HetNet simulator', # change the underlying distribution here + 'enabel mmWave': True, + 'cell range': 200, + 'context_prob': {'context 1': 1, 'context 2': 1, 'context 3': 1}, + 'los_prob': {'context 1': 1, 'context 2': 1, 'context 3': 1} + } + +# Disable simulation for reward evolution in a single shot +ENV_SCENARIO_5.enable_efficiency_simulation = True + +ENV_SCENARIO_5.alg_types = ['Musical Chairs', 'Trial and Error', 'Game of Thrones'] +ENV_SCENARIO_5.alg_configs = [None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.39, "alpha22": 0.4,}, + {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}] + +# Experiment parameters +ENV_SCENARIO_5.flag_save_figure = True +ENV_SCENARIO_5.save_data = False + +# Experiment parameters +ENV_SCENARIO_5.T_start = 5000 +ENV_SCENARIO_5.T_step = 20 +ENV_SCENARIO_5.T_simu_rounds = 200 + +ENV_SCENARIO_5.repeated_play_data_name = 'regret_data_3_alg' + +# Enable parallel processing +ENV_SCENARIO_5.flag_parallel = False +ENV_SCENARIO_5.flag_progress_bar = True + +############################################################################### +# Section 5: +# Parallel version +############################################################################### +ENV_SCENARIO_5_PARALLEL = ENV_SCENARIO_5 +ENV_SCENARIO_5_PARALLEL.flag_parallel = True + +############################################################################### +# Section 6: +# MAB environment in HetNet, with 12 random arms/channel and 10 randomly placed +# users, 3 contexts (MUE transmission in the underlying macro cells) +############################################################################### +ENV_SCENARIO_6 = Section("10-UE-12-Channel HetNet: reward evolution") +ENV_SCENARIO_6.game_horizon = 200000 +ENV_SCENARIO_6.env_config = {'horizon': ENV_SCENARIO_6.game_horizon, + 'arm number': 12, + 'player number': 10, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'HetNet simulator', # change the underlying distribution here + 'enabel mmWave': True, + 'cell range': 250, + 'context_prob': {'context 1': 2, 'context 2': 1, 'context 3': 1}, + 'los_prob': {'context 1': 1.5, 'context 2': 2, 'context 3': 1} + } + +# Disable simulation for reward evolution in a single shot +ENV_SCENARIO_6.enable_efficiency_simulation = False +ENV_SCENARIO_6.enable_regret_simulation = False +ENV_SCENARIO_6.enable_reward_simulation = True +ENV_SCENARIO_6.enable_switching_simulation = True + +ENV_SCENARIO_6.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Thrones'] #, +ENV_SCENARIO_6.alg_configs = [None, + {"delta": 0.02, "exploration_time": 10000}, + {"c1": 1000, "c2": 3000,"c3":3000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, + "alpha11": -0.04, "alpha12": 0.05, "alpha21": -0.035, "alpha22": 0.04, "observable": 1}, + {"c1": 1000, "c2": 3000,"c3":3000, "epsilon": 0.01, "delta": 1.5}, + ] + +# Experiment parameters +ENV_SCENARIO_6.flag_save_figure = True +ENV_SCENARIO_6.save_data = False + +# Experiment parameters +ENV_SCENARIO_6.T_start = 40000 +ENV_SCENARIO_6.T_step = 12 +ENV_SCENARIO_6.T_simu_rounds = 200 + +ENV_SCENARIO_6.repeated_play_data_name = 'reward_data_4_alg_HetNet' + +# Enable parallel processing +ENV_SCENARIO_6.flag_parallel = False +ENV_SCENARIO_6.flag_progress_bar = True + +############################################################################### +# Section 6: +# Parallel version +############################################################################### +ENV_SCENARIO_6_PARALLEL = ENV_SCENARIO_6 +ENV_SCENARIO_6_PARALLEL.flag_parallel = True + +############################################################################### +# Section 7: +# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with +# 3-contexts, 2-plaers, 3-arms +############################################################################### +ENV_SCENARIO_7 = Section("3-context-3-player-4-unifroms-arm MAB: reward evolution") +ENV_SCENARIO_7.game_horizon = 100000 +ENV_SCENARIO_7.env_config = {'horizon': ENV_SCENARIO_7.game_horizon, + 'arm number': 4, + 'player number': 3, + 'context set': {"context 1", "context 2", "context 3"},# + 'env_type': 'uniform', # change the underlying distribution here + 'initial data': initial_data_2 + } + +# add algorithms +ENV_SCENARIO_7.alg_types = ['Musical Chairs', 'SOC', 'Game of Thrones', 'Trial and Error'] #, , 'TnE Nonobservable' +ENV_SCENARIO_7.alg_configs = [None, + {"delta": 0.02, "exploration_time": 10000}, + {"c1": 500, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5}, + {"c1": 500, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, + "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4}, +# {"c1": 300, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, +# "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4, "observable": 0} + ] + +# Disable simulation for reward evolution in a single shot +ENV_SCENARIO_7.enable_efficiency_simulation = False +ENV_SCENARIO_7.enable_regret_simulation = False +ENV_SCENARIO_7.enable_reward_simulation = True +ENV_SCENARIO_7.enable_switching_simulation = True + +# Experiment parameters +ENV_SCENARIO_7.T_start = 20000 +ENV_SCENARIO_7.T_step = 10 +ENV_SCENARIO_7.T_simu_rounds = 20 + +ENV_SCENARIO_7.repeated_play_data_name = 'congfig_7_5_algs_uniform' + +# Experiment parameters +ENV_SCENARIO_7.flag_save_figure = True +ENV_SCENARIO_7.save_data = False + +# Enable parallel processing +ENV_SCENARIO_7.flag_parallel = True +ENV_SCENARIO_7.flag_progress_bar = True + +############################################################################### +# All configurations are stored in the following dictionary: +############################################################################### +CONFIGURATION_DICT = {1: ENV_SCENARIO_1, + 2: ENV_SCENARIO_2, + 3: ENV_SCENARIO_3, + 4: ENV_SCENARIO_4, + 5: ENV_SCENARIO_5, + 6: ENV_SCENARIO_6, + 7: ENV_SCENARIO_1_PARALLEL, + 8: ENV_SCENARIO_2_PARALLEL, + 9: ENV_SCENARIO_3_PARALLEL, + 10: ENV_SCENARIO_4_PARALLEL, + 11: ENV_SCENARIO_5_PARALLEL, + 12: ENV_SCENARIO_6_PARALLEL, + 13: ENV_SCENARIO_7 + }