diff --git a/Arms.py b/Arms.py
new file mode 100644
index 0000000..24a922f
--- /dev/null
+++ b/Arms.py
@@ -0,0 +1,219 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+"""
+
+__author__ = "Wenbo Wang"
+
+#from random import random
+from numpy.random import random as nprandom
+
+import scipy.stats as stats
+
+class Arm(object):
+    """ Base class for an arm class."""
+
+    def __init__(self, param):
+        """ Base class for an arm class."""
+        self.lower = param["lower_val"]  #: Lower value of rewardd, array[context]
+        self.upper = param["upper_val"]  #: Upper value of rewards
+        self.amplitude = self.upper - self.lower  #: Amplitude of value of rewards
+        
+        # for arm of a specific context-player
+        self.context = param["context"]
+        self.playerID = param["playerID"]
+        self.armID = param["armID"]
+        
+        # prepare samples
+        self.horizon = 0
+        self.prepared_samples = []
+        
+    # --- Printing
+
+    # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property
+    @property
+    def lower_amplitude(self):
+        """(lower, amplitude)"""
+        if hasattr(self, 'lower') and hasattr(self, 'amplitude'):
+            return self.lower, self.amplitude
+        else:
+            raise NotImplementedError("This method lower_amplitude() has to be implemented in the class inheriting from Arm.")
+            
+    @property
+    def current_context(self):
+        """(lower, amplitude)"""
+        if hasattr(self, 'context_set'): 
+            return self.context
+        else:
+            raise NotImplementedError("This method current_context() has to be implemented in the class inheriting from Arm.")
+
+    # --- Printing
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def __repr__(self):
+        return "{}({})".format(self.__class__.__name__, self.__dir__)
+
+    # --- Random samples
+
+    def draw_sample(self, t=None):
+        """ Draw one random sample."""
+        raise NotImplementedError("This method draw_sample(t) has to be implemented in the class inheriting from Arm.")  
+        
+    def prepare_samples(self, horizon):
+        raise NotImplementedError("This method prepare_samples(horizon) has to be implemented in the class inheriting from Arm.")  
+        
+"""
+Uniform distribution arms
+"""
+class UniformArm(Arm):
+    """ Uniformly distributed arm, default in [0, 1],
+    """
+
+    def __init__(self, param):
+        """New arm."""
+        self.lower = param["lower_val"]  #: Lower value of rewardd, array[context]
+        self.upper = param["upper_val"]  #: Upper value of rewards
+        self.amplitude = self.upper - self.lower  #: Amplitude of value of rewards
+        if self.amplitude <= 0:
+            raise Exception("The upper bound must be larger than the lower bound")
+        
+        self.mean = (self.lower + self.upper) / 2.0  #: Mean for this UniformArm arm
+        self.variance = self.amplitude**2 / 12.0 #: Variance for ths UniformArm arm
+        
+        self.context = param["context"]
+        self.playerID = param["playerID"]
+        self.armID = param["armID"]
+        
+        # prepare samples
+        self.horizon = 0
+        self.prepared_samples = []
+
+    # --- Random samples
+
+    def draw_sample(self, context, t=None):
+        """ Draw one random sample."""
+        if self.context != context:
+            raise Exception("the arm corresponding to a different context is called")
+        
+        if t is None:
+            # The parameter t is ignored in this Arm. Do sampling right away.
+            return self.lower + (nprandom() * self.amplitude)
+        else:
+            if t >= self.horizon:
+                raise Exception("the time instance is beyond the horizon")
+            else:
+                return self.prepared_samples[t]
+
+    def prepare_samples(self, horizon):
+        if horizon <= 0:
+            raise Exception("the input horizon is invalid")
+        else:
+            self.horizon = horizon
+            self.prepared_samples = self.lower + (nprandom(self.horizon) * self.amplitude)
+
+    # --- Printing
+
+    def __str__(self):
+        return "UniformArm"
+
+    def __repr__(self):
+        return "U({:.3g}, {:.3g})".format(self.lower, self.upper)
+
+"""
+Gaussian distribution arms
+"""
+class GaussianArm(Arm):
+    """ 
+    Gaussian distributed arm, possibly truncated.
+    - The default setting is to truncate into [0, 1] (so Gaussian.draw() is sampled in [0, 1]).
+    """
+
+    def __init__(self, param):
+        """New arm."""
+        self.mu = param["mu"]
+        if "sigma" not in param.keys():
+            self.sigma = 0.05
+        else:
+            self.sigma = param["sigma"]
+            assert self.sigma > 0, "The parameter 'sigma' for a Gaussian arm has to be > 0."
+        
+        self.lower = 0# used to truncate the sampled value
+        self.upper = 1# used to truncate the sampled value
+        
+        # For the trunctated normal distribution, see:
+        # "Simulation of truncated normal variables", https://arxiv.org/pdf/0907.4010.pdf
+        # Section "Two-sided truncated normal distribution"
+        
+        alpha = (self.lower - self.mu) / self.sigma
+        beta = (self.upper - self.mu) / self.sigma
+        
+        self.sampler = stats.truncnorm(alpha, beta, loc=self.mu, scale=self.sigma)
+        
+        self.mean, self.variance = self.sampler.stats(moments='mv')
+        
+        
+        self.context = param["context"]
+        self.playerID = param["playerID"]
+        self.armID = param["armID"]
+    
+    
+    # --- Random samples
+
+    def draw_sample(self, context, t=None):
+        """ 
+        Draw one random sample. The parameter t is ignored in this Arm.
+        """
+        if self.context != context:
+            raise Exception("the arm corresponding to a different context is called")
+        
+        if t is None:
+            # The parameter t is ignored in this Arm. Do sampling right away.
+            return self.sampler.rvs(1)
+        else:
+            if t >= self.horizon:
+                raise Exception("the time instance is beyond the horizon")
+            else:
+                return self.prepared_samples[t]
+
+    def prepare_samples(self, horizon):
+        """
+        The runcated normal distribution takes a lot more time for giving a single sample each time
+        We could pre-sample an array and then retrive them with index of t.
+        """
+        if horizon <= 0:
+            raise Exception("the input horizon is invalid")
+        else:
+            self.horizon = horizon
+            self.prepared_samples = self.sampler.rvs(self.horizon)
+
+    # --- Printing
+    def __str__(self):
+        return "Gaussian"
+
+    def __repr__(self):
+        return "N({:.3g}, {:.3g})".format(self.mean, self.sigma)
+
+"""
+Other types of distribution should be implemented here.
+"""
+
+if __name__ == '__main__':
+    print("Warning: this script 'Arms.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+__all__ = ["UniformArm", "GaussianArm"]
\ No newline at end of file
diff --git a/GameEvaluator.py b/GameEvaluator.py
new file mode 100644
index 0000000..5d0a734
--- /dev/null
+++ b/GameEvaluator.py
@@ -0,0 +1,829 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+"""
+
+# This file defines the evaluation and virtualization mechanisms of the simulations.
+# class AlgEvaluator
+#
+# For each simulation there are two versions: single-process and multi-process (parallel).
+# Note that the parallel version is usually 2X to 4X faster than the single-process version, depending on the 
+# simulation configuration. However, it is at the cost of consuming the same folds of memory. 
+# It may use up the machine memory and result in a program collapse when the horizon/player nunber/arm number
+# is too large
+
+__author__ = "Wenbo Wang"
+
+from tqdm import tqdm
+import multiprocessing as mp
+import numpy as np
+
+#import environemnt generators
+from MPMAB import MP_MAB
+from HetNetSimulator import HomeBrewedHetNetEnv
+
+#import algorithms
+from MABAlgorithms import Hungarian, StaticHungarian, MusicalChairs, TrialandError, GameofThrone
+from MABAlgorithms2 import SOC
+
+from loggingutils import info_logger
+
+# result recorder
+from PlayResult import ResultMultiPlayers
+
+if __name__ == '__main__':
+    print("Warning: this script 'GameEvaluator.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+class AlgEvaluator:
+    def __init__(self, configuration):        
+        self.horizon = configuration['horizon']
+        
+        self.nbArms = configuration['arm number']        
+        self.nbPlayers = configuration['player number']
+        
+        self.context_set = configuration['context set']
+        self.nbContext = len(self.context_set)
+        
+        # for loaded values or when calling the prepare() methods, set true
+        self.flag_pre_prepare = False        
+        self.flag_simulation_done = False
+                
+        # we only have a unique bandit game, but may have more than one algorithms
+        self.mp_mab_env = None
+        # to be extended
+        if configuration['env_type'] == 'uniform':            
+            self.mp_mab_env = MP_MAB.uniform_mab(self.context_set, self.nbArms, self.nbPlayers, 
+                                                  dic_lower = configuration['initial data'][0], 
+                                                  dic_upper = configuration['initial data'][1])
+            
+            # 'context probabilites' is used for a differernt purpose in HetNet simulator
+            if 'context probabilites' in configuration.keys():
+                # set arbitrary probabilities for discrete context distribution
+                context_probabilites = configuration['context probabilites']
+                self.mp_mab_env.set_discrete_context_prob(context_probabilites)
+        elif configuration['env_type'] == 'gaussian':
+            self.mp_mab_env = MP_MAB.uniform_mab(self.context_set, self.nbArms, self.nbPlayers, 
+                                                  dic_mean = configuration['initial data'][0], 
+                                                  dic_sigma = configuration['initial data'][1])
+            
+            # 'context probabilites' is used for a differernt purpose in HetNet simulator
+            if 'context probabilites' in configuration.keys():
+                # set arbitrary probabilities for discrete context distribution
+                context_probabilites = configuration['context probabilites']
+                self.mp_mab_env.set_discrete_context_prob(context_probabilites)       
+        elif configuration['env_type'] == 'HetNet simulator':
+            hetnet_params = {'enabel mmWave': configuration['enabel mmWave'],
+                             'horizon': self.horizon,
+                             'cell range': configuration['cell range'],
+                             'context_prob': configuration['context_prob'],
+                             'los_prob': configuration['los_prob']
+                             }
+            
+            self.mp_mab_env = HomeBrewedHetNetEnv.HetNet_mab(self.context_set, self.nbArms, self.nbPlayers, 
+                                                             hetnet_params)            
+#            print("showing UE and MUE positions") #debugging
+#            self.mp_mab_env.helper_plot_ue_posiiton() #debugging
+            
+        elif configuration['env_type'] == 'load data':
+            #TODO: load the series of arm values from a existing file
+#            self.flag_pre_prepare = True
+            pass        
+                        
+        self.algorithms = [] # a list of algorithms        
+        self.result_recorders = [] # a list of result recorder for each algorithm  
+        self.alg_names = []
+        
+    def prepare_arm_samples(self, horizon = None):
+        if horizon is not None:
+            self.horizon = horizon
+        
+        self.mp_mab_env.prepare_samples(self.horizon)
+            
+        self.flag_pre_prepare = True
+        
+    def reset_player_number(self, nbPlayer=None):
+        # it is allowed only to be done after the samples are prepared
+        if nbPlayer is None or self.flag_pre_prepare == False:
+            return False
+        else:
+            self.nbPlayers = nbPlayer
+            self.mp_mab_env.nbPlayers = nbPlayer
+            
+            return True
+            
+    def reset_arm_number(self, nbArm=None):
+        # it is allowed only be done after the samples are prepared
+        # we are not goning to change the real record of the arm values
+        if nbArm is None or self.flag_pre_prepare == False:
+            return False
+        else:
+            self.nbArms = nbArm
+            self.mp_mab_env.nbArms = nbArm
+            
+            return True
+    
+    def clear_algorithms(self):
+        # clear all existing algorithms and their corresponding recorders
+        self.algorithms = []
+        self.result_recorders = []
+        self.alg_names = []
+    
+    def add_algorithm(self, algo_type = 'Trial and Error', custome_params=None):
+        """ Create environments."""
+        alg_params = {"nbPlayer": self.nbPlayers, "nbArm": self.nbArms, "context_set": self.context_set}
+        
+        #for each algorithm, append a recorder
+        if algo_type == 'Trial and Error' or algo_type == 'TnE Nonobservable':
+            #create a trial-and-error algorithm
+            alg_params["horizon"] = self.horizon
+            alg_params["c1"] = custome_params["c1"] if custome_params is not None else 100
+            alg_params["c2"] = custome_params["c2"] if custome_params is not None else 5
+            alg_params["c3"] = custome_params["c3"] if custome_params is not None else 1
+            
+            alg_params["epsilon"] = custome_params["epsilon"] if custome_params is not None else 0.1
+            alg_params["delta"] = custome_params["delta"] if custome_params is not None else 2
+                    
+            if "alpha11" in custome_params.keys():
+                alg_params["alpha11"] = custome_params["alpha11"]
+            
+            if "alpha12" in custome_params.keys():
+                alg_params["alpha12"] = custome_params["alpha12"]
+                
+            if "alpha21" in custome_params.keys():
+                alg_params["alpha21"] = custome_params["alpha21"]
+                
+            if "alpha22" in custome_params.keys():
+                alg_params["alpha22"] = custome_params["alpha22"]                
+            
+            alg_TnE = TrialandError(alg_params)
+                        
+            if  "observable" in custome_params.keys():
+                alg_TnE.set_context_observability(custome_params["observable"]==1)
+            
+            self.algorithms.append(alg_TnE)
+                        
+            if algo_type == 'Trial and Error':
+                result_TnE = ResultMultiPlayers(algo_type, 
+                                            self.context_set, self.nbPlayers, self.nbArms, self.horizon) 
+                self.result_recorders.append(result_TnE)
+                self.alg_names.append(algo_type)
+            else:
+                result_TnE = ResultMultiPlayers('Non-Contextual TnE', 
+                                            self.context_set, self.nbPlayers, self.nbArms, self.horizon)  
+                self.result_recorders.append(result_TnE)                                              
+                self.alg_names.append('Non-Contextual TnE')
+            
+        elif algo_type == 'Musical Chairs': #str(MusicalChair)
+            alg_params["horizon"] = self.horizon
+            # 3000 is hardcoded, as given by the original paper [Rosenski2015]
+            alg_params["T0"] = custome_params["T0"] if custome_params is not None else 3000 
+            
+            alg_MC = MusicalChairs(alg_params)
+            self.algorithms.append(alg_MC)
+            
+            # to record the learning results of alg_MC
+            result_MC = ResultMultiPlayers(algo_type, 
+                                           self.context_set, self.nbPlayers, self.nbArms, self.horizon)
+            self.result_recorders.append(result_MC)
+            
+            self.alg_names.append(algo_type)
+            
+        elif algo_type == 'Hungarian': #str(Hungarian)
+            alg_Hungarian = Hungarian(alg_params)
+            self.algorithms.append(alg_Hungarian)
+             
+            result_hungarian = ResultMultiPlayers(algo_type, 
+                                                  self.context_set, self.nbPlayers, self.nbArms, self.horizon)
+            self.result_recorders.append(result_hungarian)
+            
+            self.alg_names.append(algo_type)
+            
+        elif algo_type == 'Static Hungarian':
+            game_env = {}
+            
+            array_context, array_prob = self.mp_mab_env.get_discrete_context_prob()
+            alg_params["array_context"] = array_context
+            alg_params["array_prob"] = array_prob
+            
+            for context in self.context_set:
+                 lower, upper, means, variance = self.mp_mab_env.get_param(context)                 
+                 game_env[context] = means
+
+            alg_params["mean_game_env"] = game_env
+            
+            alg_SHungarian = StaticHungarian(alg_params)
+            self.algorithms.append(alg_SHungarian)
+             
+            result_static_hungarian = ResultMultiPlayers(algo_type, 
+                                                         self.context_set, self.nbPlayers, self.nbArms, self.horizon)
+            self.result_recorders.append(result_static_hungarian)
+            
+            self.alg_names.append(algo_type)
+        elif  algo_type == 'Nonobservable-context Hungarian':
+            # when the algorithm is not able to observe the context (side information)
+            # the algorithm provides a optimal result in terms of normal MP-MAB            
+            game_env = {}            
+            game_mean = np.zeros((self.nbPlayers,self.nbArms))
+            
+            array_context, array_prob = self.mp_mab_env.get_discrete_context_prob()
+            alg_params["array_context"] = array_context
+            alg_params["array_prob"] = array_prob
+
+            for context_id in range(len(array_context)):
+                lower, upper, means, variance = self.mp_mab_env.get_param(array_context[context_id]) 
+                game_mean = game_mean + means * array_prob[context_id]
+            
+            for context in self.context_set:
+                 lower, upper, means, variance = self.mp_mab_env.get_param(context)                 
+                 game_env[context] = game_mean
+
+            alg_params["mean_game_env"] = game_env
+            
+            alg_SHungarian = StaticHungarian(alg_params)
+            self.algorithms.append(alg_SHungarian)
+             
+            result_static_hungarian = ResultMultiPlayers(algo_type, 
+                                                         self.context_set, self.nbPlayers, self.nbArms, self.horizon)
+            self.result_recorders.append(result_static_hungarian)  
+            
+            self.alg_names.append(algo_type)
+        elif algo_type == 'Game of Thrones':
+            alg_params["horizon"] = self.horizon
+            
+            alg_params["c1"] = custome_params["c1"] if custome_params is not None else 100
+            alg_params["c2"] = custome_params["c2"] if custome_params is not None else 5
+            alg_params["c3"] = custome_params["c3"] if custome_params is not None else 1
+            
+            alg_params["epsilon"] = custome_params["epsilon"] if custome_params is not None else 0.1
+            alg_params["delta"] = custome_params["delta"] if custome_params is not None else 2
+            
+            alg_GoT = GameofThrone(alg_params)
+            self.algorithms.append(alg_GoT)
+                        
+            result_GoT = ResultMultiPlayers(algo_type, 
+                                            self.context_set, self.nbPlayers, self.nbArms, self.horizon) 
+            self.result_recorders.append(result_GoT)
+            
+            self.alg_names.append(algo_type)
+        elif algo_type == "SOC":
+            alg_params["delta"] = custome_params["delta"] if custome_params is not None else 0.1
+            
+            alg_SOC = SOC(alg_params)
+            self.algorithms.append(alg_SOC)
+            
+            result_GoT = ResultMultiPlayers(algo_type, 
+                                            self.context_set, self.nbPlayers, self.nbArms, self.horizon) 
+            self.result_recorders.append(result_GoT)
+            
+            self.alg_names.append(algo_type) # use the full name of 'Stable Orthogonal Allocation'
+        else:
+             #TODO: add other algorithms here
+             print("The algorithm type '{}' is not identified".format(algo_type))    
+    
+    def reset_algorithms(self, horizon = None):
+        """
+        reset the internal states/recorders of the algorithms
+        """
+        if horizon is not None:
+            if self.flag_pre_prepare:
+                if self.horizon < horizon:
+                    raise Exception("horizon exceeds the maximum recorded values")
+                else:
+                    self.horizon = horizon
+            else:
+                self.horizon = horizon
+        
+        for index in range(len(self.algorithms)):
+            self.algorithms[index].reset(horizon)
+            self.result_recorders[index].reset_record(horizon)
+            
+        self.flag_simulation_done = False
+
+    #----- play the bandit game with all the registered algorithms
+    def play_game(self, algorithm_ids=None, horizon=None, flag_progress_bar=False):
+        """
+        play_game() produces a single round of simulation results in a sequentail way.
+        It also works if there is no pre-prepared environment.
+        """
+        self.reset_algorithms()
+        
+        alg_list = []
+        recorder_list = []
+        if algorithm_ids is None:
+            alg_list = self.algorithms
+            recorder_list = self.result_recorders
+        else:
+            alg_list = [self.algorithms[index] for index in algorithm_ids]
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]
+            
+        if horizon is None:
+            horizon = self.horizon
+        
+        if flag_progress_bar:
+            progress_range = tqdm(range(horizon))
+        else:
+            progress_range = range(horizon)       
+        
+        for t in progress_range:
+            # sample arms
+            if self.flag_pre_prepare == True:
+                context, arm_values = self.mp_mab_env.draw_sample(t)
+            else:
+                context, arm_values = self.mp_mab_env.draw_sample()         
+                
+            # trim the arm_value array if needed                
+            arm_values = arm_values[:self.nbPlayers, :self.nbArms]
+#            print("shape of arm_values: {}".format(np.shape(arm_values)))
+            
+            for alg_index in range(len(alg_list)):
+               pulls, total_reward, sampled_rewards = alg_list[alg_index].learn_policy(arm_values, context, t)
+               arm_choices = alg_list[alg_index].pulls2choices(pulls)
+               action_collisions = alg_list[alg_index].resolve_collision(pulls)
+               recorder_list[alg_index].store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions)
+               
+        self.flag_simulation_done = True
+                    
+    #----- play the bandit game with all the registered algorithms in a parallel manner
+    def play_game_parallel(self, algorithm_ids=None, horizon=None, flag_progress_bar=False, step=100):
+        """
+        play_game_parallel() is restricted to work for the pre-prepared environment only.
+        The extral time used for pickling the data is not negligible. 
+        Multiprocessing doesn't improve much the efficiency if len(algorithm_ids) is less than 3 for small horizons.
+        """        
+        assert self.flag_pre_prepare == True, "the environment has to be prepared"
+        self.reset_algorithms()
+        
+        # for parallel computing on a sngle machine
+        max_nb_processes = max(mp.cpu_count()-2, 1)
+        task_pool = mp.Pool(processes = max_nb_processes)     
+        
+        alg_list = []
+        recorder_list = []
+        if algorithm_ids is None:
+            alg_list = self.algorithms
+            recorder_list = self.result_recorders
+        else:
+            alg_list = [self.algorithms[index] for index in algorithm_ids]
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]
+            
+        if horizon is None:
+            horizon = self.horizon
+            
+        results = []
+        
+        if flag_progress_bar == False:
+            for alg_index in range(len(alg_list)):
+                 res = task_pool.apply_async(self.async_simulation_work, 
+                                               args = (horizon, alg_index, self.mp_mab_env, 
+                                                       alg_list[alg_index], recorder_list[alg_index]))                
+                 results.append(res)
+                      
+            task_pool.close()
+            task_pool.join()
+        else:
+            manager = mp.Manager()  
+            queue = manager.Queue()
+            for alg_index in range(len(alg_list)):
+                 res = task_pool.apply_async(self.async_simulation_work, 
+                                               args = (horizon, alg_index, self.mp_mab_env, 
+                                                       alg_list[alg_index], recorder_list[alg_index], queue, step))                
+                 results.append(res)
+                 
+            # add the monitoring process
+            print("single-shot: number of iteration: {}".format(len(alg_list)*horizon))
+            # add the monitoring process
+            proc = mp.Process(target=self.porgress_monitor, 
+                              args=(queue, len(alg_list), horizon))
+            
+            # start the processes
+            proc.start()            
+            task_pool.close()
+            task_pool.join()               
+            queue.put(None)
+            proc.join()
+               
+        # each task do not exchange info. with each other
+        self.flag_simulation_done = True
+        
+        for res in results:
+            recorder = res.get()
+            recorder_list[recorder[0]] = recorder[1]
+        
+#        print("AlgEvaluator finishes parallelization")
+  
+    @staticmethod
+    def async_simulation_work(horizon, alg_index, env, alg, recorder, queue=None, step=100):
+        """
+        async_simu_work() is restricted to be called in play_game_parallel() only.
+        To avoid passing the pool member, we make it a static method.
+        """  
+        # each task is identified by a tuple (alg_index, horizon)
+        progress_range = range(horizon)
+            
+        for t in progress_range:
+            context, arm_values = env.draw_sample(t)
+            
+            arm_values= arm_values[:env.nbPlayers, :env.nbArms]
+#            print("shape of arm_values: {}".format(np.shape(arm_values)))
+                
+            pulls, total_reward, sampled_rewards = alg.learn_policy(arm_values, context, t)
+            arm_choices = alg.pulls2choices(pulls)
+            action_collisions = alg.resolve_collision(pulls)
+            recorder.store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions)     
+            
+            if queue is not None:
+                if t % step == 0:
+                    queue.put_nowait(step)                 
+                
+        return (alg_index, recorder)
+    
+    def play_repeated_game(self, horizon_list, algorithm_ids=None, 
+                           simulation_rounds=1, flag_progress_bar=False):
+        """
+        Play the game repeatedly with different horizons in single-process mode. 
+        It only works with the pre-prepared environment.
+        The recorder accompanying each algorithm do not work here,
+        since they store only the results from the last run.
+        
+        play_repeated_game() return a dictionary with the keys:
+            {'algorithm_name', 'reward_series', 'collision_series', 'horizon'},
+        where 'reward_series', 'horizon' and 'collision_series' are 2D arrays,
+        with the rows aligned with elements in 'algorithm_name'
+        """
+        assert self.flag_pre_prepare == True, "the environment has to be prepared"
+        self.reset_algorithms()
+
+        alg_names = self.get_alg_names(algorithm_ids)        
+        # reward_series records the reward data for each algorithm 
+        # in a form (len(algorithm_ids), simulation_rounds*len(horizon_list))
+        # other records are defined in the same form
+        if algorithm_ids==None:
+            algorithm_ids = list(range(len(self.algorithms)))
+        
+        reward_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list)))
+        collision_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list)))
+        switching_count_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list)))
+        horizon_series = np.zeros((len(algorithm_ids), simulation_rounds*len(horizon_list)))
+        
+        # convert types (convert ndarray to list)
+        if isinstance(horizon_list, list) != True:
+            horizon_list = np.ndarray.tolist(horizon_list)
+        
+#        print("number of algorithms: {}".format(len(algorithm_ids)))
+        
+        if flag_progress_bar:
+            progress_range = tqdm(range(simulation_rounds))
+        else:
+            progress_range = range(simulation_rounds)    
+        
+        for simu_index in progress_range:
+            if flag_progress_bar == False:
+                print("Simulation round {} of total rounds {}...".format(simu_index+1, simulation_rounds))       
+            
+            for horizon in horizon_list:
+                self.play_game(algorithm_ids, horizon=int(horizon), flag_progress_bar=False) # could set to None
+                    
+                # example: for 3 algorithms, len(tmp_total_payoff) == 3
+                tmp_total_payoff = self.get_total_payoff(algorithm_ids, horizon=int(horizon))
+                tmp_total_collision = self.get_total_collision(algorithm_ids, horizon=int(horizon))
+                tmp_total_switching = self.get_total_switching_count(algorithm_ids, horizon=int(horizon))
+                
+                idx_horizon = horizon_list.index(horizon)
+
+                id_plays = simu_index * len(horizon_list) + idx_horizon
+                # record the reward obtained in this single round, 
+                # the following is prepared for a dataframe format                
+                for id_alg in range(len(algorithm_ids)):
+                    horizon_series[id_alg][id_plays] = horizon
+                    reward_series[id_alg][id_plays] = tmp_total_payoff[id_alg]
+                    collision_series[id_alg][id_plays] = tmp_total_collision[id_alg]#
+                    switching_count_series[id_alg][id_plays] = tmp_total_switching[id_alg]
+                
+        simulation_results = {}                
+        simulation_results['reward_series'] = reward_series
+        simulation_results['collision_series'] = collision_series          
+        simulation_results['switching_count_series'] = switching_count_series
+        simulation_results['horizon'] = horizon_series
+        simulation_results['algorithm_name'] = alg_names
+        
+        return simulation_results
+    
+    #----- play the bandit game with (all) the registered algorithms in a parallel manner
+    def play_repeated_game_parallel(self, horizon_list, algorithm_ids=None, 
+                                    simulation_rounds=1, flag_progress_bar=False, step=1):
+        """
+        parallel version of repeated_game_play(). 
+        play_repeated_game_parallel() only works with the pre-prepared environment.
+        """
+        assert self.flag_pre_prepare == True, "the environment has to be prepared"
+        self.reset_algorithms()        
+
+        alg_list = []
+        recorder_list = []
+        if algorithm_ids is None:
+            alg_list = self.algorithms
+            recorder_list = self.result_recorders
+        else:
+            alg_list = [self.algorithms[index] for index in algorithm_ids]
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]
+                
+        # for parallel computing on a sngle machine
+        max_nb_processes = max(mp.cpu_count()-2, 1)        
+        task_pool = mp.Pool(processes = max_nb_processes)       
+            
+        # add works to the task pool
+        results = []        
+        if flag_progress_bar == True:
+            manager = mp.Manager()  
+            queue = manager.Queue()
+            for alg_index in range(len(alg_list)):    
+                res = task_pool.apply_async(self.async_repeated_work, 
+                                            args = (self.mp_mab_env, alg_list[alg_index], 
+                                                    alg_index, horizon_list, recorder_list[alg_index], 
+                                                    simulation_rounds, queue, step)) 
+                # append the results
+                results.append(res)
+                
+            # add the monitoring process
+            proc = mp.Process(target=self.porgress_monitor, 
+                              args=(queue, len(alg_list), simulation_rounds))
+            # start the processes
+            proc.start()            
+            task_pool.close()
+            task_pool.join()               
+            queue.put(None)
+            proc.join()            
+        else:
+            for alg_index in range(len(alg_list)): 
+                res = task_pool.apply_async(self.async_repeated_work, 
+                                            args = (self.mp_mab_env, alg_list[alg_index], 
+                                                    alg_index, horizon_list, recorder_list[alg_index], 
+                                                    simulation_rounds)) 
+                # append the results
+                results.append(res)            
+            # start the processes
+            task_pool.close()
+            task_pool.join()             
+               
+        # each task do not exchange info. with each other
+        self.flag_simulation_done = True
+                
+        # reward_series records the reward data for each algorithm 
+        # in a form (len(algorithm_ids), simulation_rounds*len(horizon_list))
+        # all other records are defined in the same form
+        reward_series = np.empty((0, simulation_rounds*len(horizon_list)))
+        collision_series = np.empty((0, simulation_rounds*len(horizon_list)))
+        switching_count_series = np.empty((0, simulation_rounds*len(horizon_list)))
+        horizon_series = np.zeros((0, simulation_rounds*len(horizon_list)))
+        alg_indicators = []
+        
+        # re-organize the results of each algorithm        
+        for res in results:            
+            alg_id, recorder, reward, collision, switching_count, horizons = res.get()                        
+            # fill the recorded data with the last-round result
+            self.result_recorders[alg_id] = recorder                       
+            
+            # add a new row
+            reward_series = np.append(reward_series, [reward], axis=0)
+            collision_series = np.append(collision_series, [collision], axis=0)
+            switching_count_series = np.append(switching_count_series, [switching_count], axis=0)
+            horizon_series = np.append(horizon_series, [horizons], axis=0)
+            
+            alg_indicators.append(alg_id)
+
+        simulation_results = {}                
+        simulation_results['reward_series'] = reward_series       
+        simulation_results['collision_series'] = collision_series            
+        simulation_results['switching_count_series'] = switching_count_series      
+        
+        simulation_results['horizon'] = horizon_series
+        simulation_results['algorithm_name'] = [self.alg_names[index] for index in alg_indicators] 
+
+#        print("len of collision_series:{}".format((collision_series.shape)))
+#        print("len of reward_series:{}".format((reward_series.shape)))
+#        print("len of switching_count_series:{}".format((switching_count_series.shape)))
+#        print("len of horizon_series:{}".format((horizon_series.shape)))
+        
+        return simulation_results   
+     
+    @staticmethod
+    def async_repeated_work(env, algrithm, alg_index, horizon_list, recorder, simulation_rounds=1, queue=None, step=1):
+        """
+        async_repeated_work() is should be only called by repeated_game_play_parallel().
+        To avoid passing the pool member, we make it a static method.
+ 
+        - a task is identified by a tuple (algrithm, horizon_list)        
+        - 'reward_series' records the reward data for algorithm identified by 'alg_index'
+          in an 1-D array of len(simulation_rounds)*len(horizon_list)
+        - other records are defined in the same form      
+        """ 
+        reward_series = np.zeros(simulation_rounds*len(horizon_list))
+        collision_series = np.zeros(simulation_rounds*len(horizon_list))     
+        switching_count_series = np.zeros(simulation_rounds*len(horizon_list))
+        horizon_series = np.zeros(simulation_rounds*len(horizon_list))
+        
+        #convert horizon type to list if it is an ndarray
+        if isinstance(horizon_list, list) != True:
+            horizon_list = np.ndarray.tolist(horizon_list)
+        
+        for simu_index in range(simulation_rounds):                            
+            for horizon in horizon_list:
+                idx_horizon = horizon_list.index(horizon)
+                
+                # reset the algorithm
+                algrithm.reset()
+                recorder.reset_record()
+                
+                # play the game
+                progress_range = range(int(horizon))
+                # initialize the switching count records
+                tmp_total_switching = 0
+                
+                # store the choices according to the contexts that they are in
+                tmp_switch_dic = {}
+                tmp_context_count = {}
+                for context in env.context_set:
+                    tmp_switch_dic[context] = np.zeros([int(horizon), env.nbPlayers])
+                    tmp_context_count[context] = 0
+                
+                for t in progress_range:
+                    context, arm_values = env.draw_sample(t)
+                    
+                    arm_values = arm_values[:env.nbPlayers, :env.nbArms]
+                                
+                    # all in arrays
+                    pulls, total_reward, sampled_rewards = algrithm.learn_policy(arm_values, context, t)
+                    arm_choices = algrithm.pulls2choices(pulls)
+                    action_collisions = algrithm.resolve_collision(pulls)                    
+                    
+                    #get collision in arrays
+                    id_nonzero = np.where(action_collisions != 0)
+                    action_collisions[id_nonzero] = action_collisions[id_nonzero] - 1
+
+                    recorder.store(t, context, arm_choices, sampled_rewards, total_reward, pulls, action_collisions)  
+                    
+                    # store choices according to contexts                    
+                    tmp_switch_dic[context][tmp_context_count[context],:] = arm_choices
+                    tmp_context_count[context] =  tmp_context_count[context] + 1                    
+                    
+                for context in env.context_set:
+                    # count the switching for each context
+#                    print("Contex: {}, shape: {}".format(context,  tmp_switch_dic[context].shape))
+                    
+                    for tt in range(1, tmp_context_count[context]+1):                
+                        tmp_switching_count = np.sum(tmp_switch_dic[context][tt,:] != tmp_switch_dic[context][tt-1, :])
+                        tmp_total_switching += tmp_switching_count     
+            
+                # compute directly instead of calling get_total_payoff()
+                tmp_total_payoff = np.sum(recorder.total_rewards[:int(horizon)])
+                tmp_total_collision = np.sum(recorder.collisions[:int(horizon)])                            
+                
+                id_plays = simu_index * len(horizon_list) + idx_horizon
+               
+                reward_series[id_plays] = tmp_total_payoff
+                collision_series[id_plays] = tmp_total_collision
+                switching_count_series[id_plays] = tmp_total_switching
+                horizon_series[id_plays] = horizon
+            
+            if queue is not None:
+                if simu_index % step == 0:                    
+                    queue.put_nowait(step)
+                
+        return (alg_index, recorder, reward_series, collision_series, switching_count_series, horizon_series)
+    
+    @staticmethod
+    def porgress_monitor(queue, nbAlgorithm, nbRound):
+        """
+        porgress_monitor() is added by the monitor process for updating the simulation progress bar.
+        nbRound represents the total number of repeatitions in case of a repeated simulation,
+        or the number of horizon in case of a single-shot simulation
+        """ 
+        pbar = tqdm(total = nbAlgorithm*nbRound)
+        for item in iter(queue.get, None):        
+            pbar.update(item)
+    
+    #----- utility functions
+    def get_total_payoff(self, algorithm_ids = None, horizon = None):
+        assert self.flag_simulation_done == True, "no simulation record is available"  
+        
+        recorder_list = []
+        if algorithm_ids is None:                
+            recorder_list = self.result_recorders
+        else:                
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]   
+            
+        if horizon is None:
+            horizon = self.horizon
+        else:
+            assert self.horizon >= horizon, "not enough data for the given value of horizon"
+            
+        array_total_payoff = np.zeros(len(recorder_list))
+        for index in range(len(recorder_list)):                
+            array_total_payoff[index] = np.sum(recorder_list[index].total_rewards[:horizon])
+                
+        return array_total_payoff
+        
+    def get_total_collision(self, algorithm_ids = None, horizon = None):
+        assert self.flag_simulation_done == True, "no simulation record is available"        
+        
+        recorder_list = []
+        if algorithm_ids is None:                
+            recorder_list = self.result_recorders
+        else:                
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]   
+        
+        if horizon is None:
+            horizon = self.horizon
+        else:
+            assert self.horizon >= horizon, "not enough data for the given value of horizon"
+            
+        array_total_collision = np.zeros(len(recorder_list))
+        for index in range(len(recorder_list)):    
+            idx_nonzero = np.where(recorder_list[index].collisions != 0)
+            
+            recorder_list[index].collisions[idx_nonzero] = recorder_list[index].collisions[idx_nonzero] - 1
+            array_total_collision[index] = np.sum(recorder_list[index].collisions[:horizon])
+            
+        return array_total_collision
+    
+    def get_total_switching_count(self, algorithm_ids = None, horizon = None):
+        """
+        get the action switching count of the given list of algorithms,
+        we do it w/r to the context 
+        """
+        assert self.flag_simulation_done == True, "no simulation record is available"
+        
+        recorder_list = []
+        if algorithm_ids is None:                
+            recorder_list = self.result_recorders
+        else:                
+            recorder_list = [self.result_recorders[index] for index in algorithm_ids]   
+            
+        if horizon is None:
+            horizon = self.horizon
+        else:
+            assert self.horizon >= horizon, "not enough data for the given value of horizon"
+            
+        array_total_switching_count = np.zeros(len(recorder_list)) # with a number of the algorithms
+        for index in range(len(recorder_list)):    
+            total_switching_count = 0
+            # we add choices into lists w/r to contexts
+            tmp_switch_dic = {}
+            tmp_context_count = {}
+            for context in self.context_set:
+                # we allocate a bit more than needed
+                tmp_switch_dic[context] = np.zeros([horizon, self.nbPlayers])
+                tmp_context_count[context] = 0
+            
+            # separate the action choices according to contexts
+            for tt in range(0, horizon):
+                context = self.result_recorders[index].context_history[tt]
+                tmp_switch_dic[context][tmp_context_count[context],:] = self.result_recorders[index].choices[:,tt]
+                tmp_context_count[context] = tmp_context_count[context] + 1
+                
+            for context in self.context_set:
+                # count the switching for each context
+                for tt in range(1, tmp_context_count[context]+1):                
+                    tmp_switching_count = np.sum(tmp_switch_dic[context][tt,:] != tmp_switch_dic[context][tt-1,:])
+                    total_switching_count += tmp_switching_count
+                
+            array_total_switching_count[index] = total_switching_count
+            
+        return array_total_switching_count
+        
+    def get_alg_names(self, algorithm_ids = None):
+        """
+        get the name list of the given algorithms
+        """        
+        if algorithm_ids is None:                
+            name_list = self.alg_names
+        else:                
+            name_list = [self.alg_names[index] for index in algorithm_ids] 
+            
+        return name_list
+    
+    #----- plotting
+    def plot_rewards(self, algorithm_ids = None, horizon = None, save_fig = False, save_data = False):
+        if self.flag_simulation_done == False:
+            print("No simulation results are ready")
+        else:
+            recorder_list = []
+            if algorithm_ids is None:                
+                recorder_list = self.result_recorders
+            else:                
+                recorder_list = [self.result_recorders[index] for index in algorithm_ids]
+        
+            recorder_list[0].plot_cumu_rewards(horizon, other_results=recorder_list[1:], save_fig=save_fig, save_data=save_data)
+            recorder_list[0].plot_avg_reward(horizon, other_results=recorder_list[1:], save_fig=save_fig, save_data=save_data)
\ No newline at end of file
diff --git a/HetNetSimulator.py b/HetNetSimulator.py
new file mode 100644
index 0000000..4699923
--- /dev/null
+++ b/HetNetSimulator.py
@@ -0,0 +1,548 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+"""
+
+# This file implements a simple heterogeneous network with underlying macro-cell UEs 
+# working in a typical 5G cell, and the overlaying IoT devices working in a narrow-bandwidth (NB)
+# mode. IoT devices are placed randomly at fixed locations and macro-cell UEs are moving
+# randomly according to a Gauss—Markov model.
+
+__author__ = "Wenbo Wang"
+
+from MPMAB import MP_MAB
+
+import scipy
+import numpy as np
+
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+from plotutils import prepare_file_name
+
+if __name__ == '__main__':
+    print("Warning: this script 'HetNetSimulator.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+class HomeBrewedHetNetEnv(MP_MAB):
+    """
+    The network simulator and its interface for MP_MAB.
+    In the future version, we planned to incooperate existing simulators such as QuaDRiGa-5G for the channel models for the macro cell. 
+    (see https://quadriga-channel-model.de/#Publications)
+    For an example of a HetNet simulator over QuaDRiGa, see https://github.com/ICT-Embedded/5GNR-HetNet_Model
+    Due to the time consumption of building it with matlab engine, we adopt a home-brewed HetNet simulator in this version.
+    """    
+    def __init__(self, context_set, nbArms=20, nbPlayers=10):
+        """"""
+        self.nbArms = nbArms # number of channels
+        
+        # for poisson point process it is the intensity of nodes for a square area of 1
+        # for uniform distribution, it is the number of nodes
+        assert nbPlayers<=nbArms, "the number of channels should be no less than the number of devices."
+        self.nbPlayers = nbPlayers 
+        self.nbArms = nbArms
+        
+        self.context_set = context_set# 
+        self.prob_LoS = np.zeros(len(context_set))
+        self.prob_context = np.zeros(len(context_set))
+        
+        self.current_arm_value = np.zeros((nbPlayers, nbArms))
+        self.current_context = None
+        
+        self.arms = {};
+        self.horizon = 0
+        
+        self.flag_mmWave = True
+        
+        self.ue_position = []
+        self.bs_position = []
+        
+        self.nb_mue = 0
+        self.mue_position = []# macro cell UE
+        self.mue_mean_vel = []
+        self.mue_mean_dir = []
+        
+        #basic parameters of channel, not exposed to the parameter setting yet
+        self.frequence = 28e9 # 28GHz 
+        self.nb_UPBC = 4# number of unique pointing beans combined
+        self.wf_A = 0.07# weighting factor via MMSE for fitting BC-CI path loss model
+        self.ue_shadow_variance = np.zeros((nbPlayers, nbArms)) # currently based on an arbitrary value, e.g., 9
+        self.ue_fading_variance = np.ones((nbPlayers, nbArms)) # currently based on an arbitrary value
+        self.mobile_alpha = 0.3
+        
+        self.mue_shadow_variance = np.zeros((len(context_set))) # the same across arms
+        
+        """
+        The path loss exponent model is silightly different w.r.t. to different experiments in the literature.
+        According to "Path Loss, Shadow Fading, and Line-Of-Sight Probability Models for 5G Urban Macro-Cellular Scenarios", [Sun2015],
+        PLE_LoS = 2.1 and PLE_NLoS = 2.6 for the CI model in the 28GHz-urban macro-cellular scenario
+        """        
+        self.PLE_LoS = 2 # path loss exponent LoS
+        self.PLE_NLoS = 3 # path loss exponent NLoS
+        self.mue_power = 10 * np.random.uniform(low=0.5, high=1.0, size=len(context_set)) # 40 dBm, 10w
+        self.ue_power = 1 # 30 dBm, 1w
+        self.atenna_gain = 3 #dBi        
+        self.noise = 5e-17 # Watt 
+        
+        # for beamforming, the oversampling factor is 1
+        # we consider the beamforming vector to be randomly choosen, 
+        # this project does not aim to provide mechanisms of optimizing it
+        self.F = np.zeros([self.nb_UPBC, self.nb_UPBC], dtype=complex)
+        theta = np.pi * np.arange(start=0., stop=1., step=1./self.nb_UPBC) 
+        # Beamforming codebook F
+        for nn in np.arange(self.nb_UPBC):
+            exponent = 1j * np.pi * np.cos(theta[nn]) * np.arange(self.nb_UPBC)            
+            bf_vec = 1. / np.sqrt(self.nb_UPBC) * np.exp(exponent)                        
+            self.F[:,nn] = bf_vec[nn]
+        
+        self.mue_cb_idx = np.random.randint(self.nb_UPBC)
+        # to simplify the process of computation, we consider the IoT devices are using the same ones
+        # it does not affect the simulation results
+        self.iot_cb_idx = np.random.randint(self.nb_UPBC) 
+        
+        # recorder of the pre-sampled arm values
+        self.arm_values = {}
+        self.max_arm_value ={} # recording the maximum rate for normalization for each context along the time horizon
+        for context in self.context_set:
+            self.arm_values[context] = []
+            self.max_arm_value[context] = []
+        
+        self.flag_sample_prepared = False
+        
+    @classmethod
+    def HetNet_mab(cls, context_set, nbArms, nbPlayers, hetnet_params):
+        """
+        A number of parameters are hardcoded for the purpose of simplification. 
+        However, they can be easily exposed to the upper layer by moving into 'hetnet_params'
+        
+        """
+        hetnet_inst = cls(context_set, nbArms, nbPlayers)
+        
+        hetnet_inst.horizon = hetnet_params['horizon']        
+        hetnet_inst.flag_mmWave = hetnet_params['enabel mmWave']
+        
+        cell_range = hetnet_params['cell range'] if 'cell range' in hetnet_params.keys() else 200
+        hetnet_inst.bs_position = np.array([0.5 * cell_range, 0.5 * cell_range]) # always placed at the center     
+        hetnet_inst.ue_position, new_nbPlayer = hetnet_inst.initialize_UE_position(nbPlayers=nbPlayers, distance = cell_range,
+                                  dist_model=hetnet_params['dist_model'] if 'dist_model' in hetnet_params.keys() else 0)
+        
+        hetnet_inst.mue_position, new_nbMUE = hetnet_inst.initialize_UE_position(nbPlayers=len(hetnet_inst.context_set), 
+                                                                      distance=cell_range, dist_model=0)
+        
+        # randomly set shadowing variances of ue's, as an array of (nbUE-nbChannel)
+        shadow_vr_base = 2.0 if 'shadow_vr' not in hetnet_params.keys() else hetnet_params['shadow_vr']        
+        hetnet_inst.ue_shadow_variance = np.random.uniform(size=(nbPlayers, nbArms))*shadow_vr_base
+        hetnet_inst.mue_shadow_variance = np.random.uniform(size=len(context_set))*shadow_vr_base
+
+        fading_vr_base = 1.0 if 'fading_vr' not in hetnet_params.keys() else hetnet_params['fading_vr'] 
+        hetnet_inst.ue_fading_variance =  np.random.uniform(size=(nbPlayers, nbArms))*fading_vr_base        
+        
+        # assume that different context has different probability of LoS path
+        hetnet_inst.set_discrete_context_prob(hetnet_params['context_prob'], hetnet_params['los_prob']) 
+                                  
+        nb_MUE = len(hetnet_inst.prob_context)
+        hetnet_inst.mue_mean_vel, hetnet_inst.mue_mean_dir = hetnet_inst.initialize_UE_mobile_model(nb_MUE, scale_velocity=0.1)
+        
+        hetnet_inst.mue_vel = np.zeros(nb_MUE)
+        hetnet_inst.mue_dir = np.zeros(nb_MUE)
+        
+        hetnet_inst.vel_base = 1.0 if 'vel_base' not in hetnet_params.keys() else hetnet_params['vel_base']        
+        
+        return hetnet_inst
+    
+    def set_discrete_context_prob(self, context_prob, los_prob):
+        """
+        assign arbitrary probabilities to contexts
+        """
+        if set(context_prob.keys()) != self.context_set:
+            raise Exception("probability values do not match the set of context")
+        
+        self.context_array = np.array(list(context_prob.keys()))
+        
+        # probability of different MUE/UE in neighbor cells transmitting
+        self.prob_context = np.array(list(context_prob.values()))
+        self.prob_context = self.prob_context / np.sum(self.prob_context) # normalize 
+        
+        # probability of different MUE to the receiving AP
+        # this is to simulate the situation that transmissions from different MUE occupy the channels in the cell
+        self.prob_LoS = np.array(list(los_prob.values()))
+        self.prob_LoS = self.prob_LoS / np.sum(self.prob_LoS) # normalize          
+
+
+    def initialize_UE_position(self, nbPlayers, distance=200, dist_model=0):
+        """
+        initialize the positions of IoT devices and UEs
+        """
+        if dist_model == 1:# PPP distribution        
+            #TODO: the input number of nodes may not be equal to N according to the PPP distribution
+            # we need to update the player number self.nbPlayers
+            # do not call this branch in this version
+            N = scipy.stats.poisson( nbPlayers*1 ).rvs()            
+
+        else: # uniform distribution, TODO: add new distribution model here
+            N = nbPlayers
+            
+        x = scipy.stats.uniform.rvs(0, 1,((N,1)))*distance
+        y = scipy.stats.uniform.rvs(0, 1,((N,1)))*distance       
+
+        ue_position = np.hstack((x,y)).T
+        
+        return ue_position, N
+    
+    def initialize_UE_mobile_model(self, nbPlayers, scale_velocity=1):
+        ue_mean_vel = np.random.uniform(nbPlayers)*scale_velocity
+        ue_direction = np.random.uniform(nbPlayers)*np.pi*2
+        
+        return ue_mean_vel, ue_direction
+            
+    """Draw samples"""
+    def draw_sample(self, t=None):        
+        """
+        draw a new sample        
+        """
+        context_id_array = np.arange(start=0, stop=len(self.context_array))             
+        id_context = np.random.choice(a=context_id_array, size=None, p=self.prob_context) # choose the ID of MUE
+        self.current_context = self.context_array[id_context] # get the context value 
+        
+        if t == None:            
+            # update all MUEs' positions
+            self.mue_position, self.mue_vel, self.mue_dir = self.update_ue_position(self.mue_position, self.mue_vel, 
+                                                                self.mue_dir, self.mobile_alpha, self.mue_mean_vel, self.mue_mean_dir)
+                        
+            current_arm_value = self.compute_device_rate(id_context)
+            
+            # normalization
+            self.current_arm_value = current_arm_value / np.max(current_arm_value)
+        else:
+            if self.flag_sample_prepared == False:
+                raise Exception("samples are not prepared")
+            else:
+                # draw samples from the stored data
+                self.current_arm_value = self.arm_values[self.current_context][t]                
+        
+        return self.current_context, self.current_arm_value # we only return part of the real data
+         
+
+    def prepare_samples(self, horizon, flag_progress_bar=True):
+        """
+        Prepare the samples along the time horizon in advance.
+        The sequential generation of UE positions would be the most significant bottleneck 
+        for the simulation. 
+        """
+        if horizon <= 0:
+            raise Exception("Input horizon is not valid")
+                    
+        self.horizon = horizon
+        
+        if flag_progress_bar:
+            progress_range = tqdm(range(horizon))
+        else:
+            progress_range = range(horizon)
+        
+        for time in progress_range:
+            # update position first
+            self.mue_position, self.mue_vel, self.mue_dir = self.update_ue_position(self.mue_position, self.mue_vel, 
+                                                            self.mue_dir, self.mobile_alpha, self.mue_mean_vel, self.mue_mean_dir)
+            # the positions are the same w.r.t. each channel, but the shadowing/fading parameters are different
+            for context in self.context_set:
+                id_context = self.context_array.tolist().index(context) #np.where(self.context_array == context)
+                
+                rates = self.compute_device_rate(id_context)
+                
+                # normalization
+                current_max_rate = np.max(rates)
+                normalized_rate = rates / current_max_rate
+                # record the normalized rate matrix at "time"
+                self.arm_values[context].append(normalized_rate)
+                self.max_arm_value[context].append(current_max_rate) #added @ 2020.02.21
+                    
+        self.flag_sample_prepared = True
+        
+    """
+    methods used in draw_sample()
+    """
+    def update_ue_position(self, ue_position, ue_vel, ue_dir, mobil_alpha, ue_mean_vel, ue_mean_dir):         
+         # Gauss—Markov mobility model, Chapter 2.5. Gauss—Markov "A survey of mobility models for ad hoc network research", [Camp2002]         
+         # Calculate the new velocity and direction values using the Gauss-Markov formula:
+         # new_val = alpha*old_val + (1-alpha)*mean_val + sqrt(1-alpha^2)*rv
+         # where rv is a random number sampled from a normal (gaussian) distribution
+         # reference code (ns-3): https://www.nsnam.org/docs/doxygen/gauss-markov-mobility-model_8cc_source.html
+         one_minus_alpha = 1 - mobil_alpha
+         sqrt_alpha = np.sqrt(1 - mobil_alpha**2)
+         
+         rv = np.random.normal(size=len(ue_vel)) * self.vel_base # velocity
+         rd = np.random.normal(size=len(ue_vel)) # angle
+         
+         # random value, default parameters: mean = 0, and variance = 1
+         ue_vel = mobil_alpha * ue_vel + one_minus_alpha * ue_mean_vel + sqrt_alpha * rv
+         ue_dir = mobil_alpha * ue_dir + one_minus_alpha * ue_mean_dir + sqrt_alpha * rd
+         
+         cos_dir = np.cos(ue_dir)
+         sin_dir = np.sin(ue_dir)
+         
+         x = ue_position[0,:] + ue_vel * cos_dir
+         y = ue_position[1,:] + ue_vel * sin_dir
+         
+         ue_position = np.vstack((x,y))
+         
+         return ue_position, ue_vel, ue_dir
+         
+    # used for sampling channels gains
+    def update_pathloss_db(self, ue_pos, bs_pos, flag_LoS=False):    
+        #update the pathloss of the IoT devices and the macrocell UE
+        if self.flag_mmWave == True:                
+            if flag_LoS == True:
+                pl_db = self.path_loss_dB_mmWave(ue_pos, bs_pos, self.PLE_LoS)
+            else:
+                pl_db = self.path_loss_dB_mmWave(ue_pos, bs_pos, self.PLE_NLoS)
+            
+#            pl = 10 ** (pl_db / 10.)
+        else:
+            pl_db = self.path_loss_dB_cost231(ue_pos, bs_pos)
+#            pl = 10 ** (pl_db / 10.)
+            
+        return pl_db # path loss in dB
+            
+    # we may need to compute different ue/device-BS pairs
+    def path_loss_dB_mmWave(self, ue_position, bs_position, PLE):
+        """
+        Based on IEEE TWC paper "Directional Radio Propagation Path Loss Models for Millimeter-Wave 
+        Wireless Networks in the 28-, 60-, and 73-GHz Bands", Oct. 2016 [Sulyman2016]
+        Nr is the number of unique pointing beams combined, Nr = 3,4,5
+        """
+        #PLE = 2 for LoS, 4 for NLoS, see self.PLE_LoS, self.PLE_NLoS        
+        c = 3e8 # light speed
+        
+        # to align the notations with the equations in the refernece [Sulyman2016]
+        A = self.wf_A 
+        nr = self.nb_UPBC
+        fc = self.frequence # in Hz
+        
+        if ue_position.ndim == 1:
+            pass # single ue, don't have to do anything
+        else:            
+            bs_position = np.broadcast_to(bs_position, (ue_position.shape[::-1])).T
+        
+        dist = np.linalg.norm(ue_position-bs_position, axis=0) # along the rows          
+        
+#         fspl = 32.4 + 20 * np.log10(fc / 1e9) # fc in GHz, Eq (1a) of 2016 [Sulyman2016], equivalent equation
+        fspl = 20 * np.log10((4*np.pi*dist*fc) / c) # Eq (1a) of 2016 [Sulyman2016]
+        pl = fspl + 10 * PLE * np.log10(dist) * (1 - A*np.log2(nr)) # Eq (8) of 2016 [Sulyman2016]    
+    
+        return pl # in dB    
+    
+    def path_loss_dB_cost231(self, ue_position, bs_position):
+        """
+        reference: A.2.2 COST 231 Extension to Hata Model, Channel Models A Tutorial, [Jain2007]
+        code reference: https://www.mathworks.com/matlabcentral/fileexchange/21795-hata-and-cost231-hata-models
+        """
+        fc = self.frequence
+
+        dist =np.linalg.norm(ue_position-bs_position, axis=1)    
+
+        h_BS = 20 #  effective base station antenna height
+        h_UE = 1.5 # mobile station antenna height
+     
+        # COST231        
+        C = 3
+        
+        # equation: ahMS = (1.1 * log10(f) - 0.7) * hMS - (1.56 * log10(f) - 0.8);
+        ahMS = (1.1 * np.log10(fc/1e6) - 0.7)*h_UE - (1.56*np.log10(fc/1e6) - 0.8)
+        
+        # equation:  L50dB = 46.3 + 33.9 * log10(f) - 13.82 * log10(hBSef) - ahMS + (44.9 - 6.55 * log10(hBSef)) * log(d) + C;
+        # f is in MHz, dist is in km        
+        pl = 46.3 + 33.9 * np.log10(fc/1e6) + 13.82 * np.log10(h_BS) - ahMS + (44.9 - 6.55 * np.log10(h_BS)) * np.log10(dist/1000.) + C
+                
+        return pl # in dB
+        
+    def update_shadow(self, shadow_mean, shadow_var, ue_number):        
+        """
+        log-normal shadowing
+        """
+        # ue_number is used in case the shadowing parameters are the same
+        chi_shadow = np.random.normal(loc=shadow_mean, scale=shadow_var, size=ue_number) # log-normal shadowing in dB
+        
+        return chi_shadow # in dB
+        
+
+    def update_fast_fading(self, ue_number, rb_number, fading_variance, fading_type=0):
+        """
+        Rayleigh fading
+        """
+        if fading_type == 0:
+            """
+            Rayleigh fading,     
+            """
+            if rb_number > 1:
+                scale = np.broadcast_to(fading_variance, (ue_number, rb_number))
+                hf = 1/np.sqrt(2*scale) * (np.random.normal(scale = scale, size = (ue_number, rb_number)) 
+                              + 1j* np.random.normal(scale = scale, size = (ue_number, rb_number)))
+            else:
+                scale = fading_variance
+                hf = 1/np.sqrt(2*scale) * (np.random.normal(scale=scale, size=ue_number) 
+                              + 1j* np.random.normal(scale=scale, size=ue_number))
+            
+            h_fading = 20 * np.log10(np.abs(hf)) # in dB
+        else:
+            #implement other fast fading model here
+            raise NotImplementedError("fast fading types not supported")
+        
+        return h_fading # in dB
+        
+
+    def update_MUE_channels(self, mue_position, mue_shadow_variance, flag_LoS=False):
+        """
+        update_MUE_channels() and update_IoT_channels() are functions called by compute_device_rate()
+        """
+        # update_MUE_channels() is supposed to update a single MUE's (according to the context id) channel information
+        # multiple MUE is also supported
+        #
+        pl = self.update_pathloss_db(mue_position, self.bs_position, flag_LoS)
+        sh = self.update_shadow(shadow_mean=0, shadow_var=mue_shadow_variance, ue_number=1)
+        ff = 0# compared with the path loss, we ignore fast fading here
+                
+        if mue_position.ndim == 1:
+            # to check if we compute for a single MUE or multiple ones
+            nb_mue = 1
+        else:
+            nb_mue = mue_position.shape[0]
+            
+        
+        channel_gains = np.array((nb_mue, self.nb_UPBC), dtype=complex)
+        
+        if nb_mue == 1:
+            channel_gains = self.update_channel_gain(pl, sh, ff, self.atenna_gain, flag_LoS)
+        else:            
+            for ii in range(nb_mue):
+                channel_gains[ii,:] = self.update_channel_gain(pl[ii], sh[ii], ff[ii], self.atenna_gain, flag_LoS)
+               
+        return channel_gains 
+    
+    def update_IoT_channels(self, flag_LoS=False):
+        # we assume that the iot devices do not move
+        pl = self.update_pathloss_db(self.ue_position, self.bs_position, flag_LoS) # the same for each player
+        
+        channel_gains = np.zeros((self.nbPlayers, self.nbArms, self.nb_UPBC), dtype=complex)
+        for id_arm in range(self.nbArms):
+            # not the same for each channel/arm
+            sh = self.update_shadow(shadow_mean=0, shadow_var=self.ue_shadow_variance[:,id_arm], ue_number=self.nbPlayers) 
+            ff = self.update_fast_fading(self.nbPlayers, 1, self.ue_fading_variance[:,id_arm]) # not the same for each channel/arm
+            
+            for ii in range(self.nbPlayers):            
+                channel_gains[ii, id_arm, :] = self.update_channel_gain(pl[ii], sh[ii], ff[ii], self.atenna_gain, flag_LoS) #pl + sh - ff
+        
+        return channel_gains    
+    
+    def update_channel_gain(self, pl, sh, ff, atenna_gain, flag_LoS): 
+        """
+        consider a uniform linear array (ULA) with nb_UPBC antennas, 
+        the steering vector of the array towards direction θ is denoted as theta
+        """        
+        path_loss = 10 ** (pl / 10.)
+
+        vb = np.zeros(self.nb_UPBC, dtype=complex)        
+        # v is the array vector                    
+        if (flag_LoS == True):
+            Np = 1
+            vb[0] = 1. / np.sqrt(path_loss)
+        else:
+            # 
+            Np = self.nb_UPBC
+            vb = (np.random.normal(size=Np) + 1j * np.random.normal(size=Np)) / np.sqrt(path_loss)
+
+        # randomly generated
+        theta = np.random.uniform(low=0, high=np.pi, size=Np)                
+        rho = 10 ** ((atenna_gain + sh + ff ) / 10.)
+        
+        # initialize the channel as a complex variable.
+        h_ch = np.zeros(self.nb_UPBC, dtype=complex)
+        
+        for path in np.arange(Np):
+            exponent = 1j * np.pi * np.cos(theta[path]) * np.arange(self.nb_UPBC)
+            
+            bf_vec = 1. / np.sqrt(self.nb_UPBC) * np.exp(exponent)
+            h_ch = h_ch + bf_vec[path] / rho * bf_vec.T # scalar multiplication into a vector
+        
+        h_ch = h_ch * np.sqrt(self.nb_UPBC)
+        
+        return h_ch
+    
+    def compute_device_rate(self, id_context):
+        id_LoS = np.random.choice([0,1], p=[self.prob_LoS[id_context], 1-self.prob_LoS[id_context]])
+         
+        mue_channel_gain = self.update_MUE_channels(self.mue_position[:, id_context], self.mue_shadow_variance[id_context],
+                                                     flag_LoS=(id_LoS == 0)) # part of the context, interference
+         
+        iot_channel_gains = self.update_IoT_channels(flag_LoS=False)
+         
+        # get the channel capacity w.r.t. each IoT devices over each arm/channel
+        interference_power = self.mue_power[id_context] * abs(np.dot(mue_channel_gain.conj(), self.F[:, self.mue_cb_idx])) ** 2
+        
+        iot_received_power = np.zeros((self.nbPlayers, self.nbArms))# 2D matrix, columns correspond to each channel
+        for player_id in range(self.nbPlayers):
+            for ch_id in range(self.nbArms):
+                iot_received_power[player_id][ch_id] = self.ue_power * abs(np.dot(iot_channel_gains[player_id, ch_id,:].conj(), 
+                              self.F[:, self.iot_cb_idx])) ** 2   
+
+        mue_ipn = interference_power + self.noise #interference plus noise, scalar
+
+        # should be a (nbPlayer, nbArm) matrix
+        rates = np.log2(1 + np.divide(iot_received_power, mue_ipn))
+         
+        # update the rate value for all players over all arms         
+        return rates    
+
+    """utility functions"""   
+    # helper_plot_ue_posiiton() is used only for debugging
+    def helper_plot_ue_posiiton(self):
+        """
+        For debugging purpose
+        """
+        plt.figure(figsize=(4,3))
+        plt.scatter(self.ue_position[0,:], self.ue_position[1,:], edgecolor='b', facecolor='none', alpha=0.5 )     
+        plt.scatter(self.mue_position[0,:], self.mue_position[1,:], edgecolor='r', facecolor='none', alpha=0.5 ) 
+     
+    def get_discrete_context_prob(self):        
+        return self.prob_context
+            
+    def get_param(self, context):
+        # it is difficult to get the rate statisitics of the UEs over each channel
+        raise NotImplementedError("get_param() is not campatible with class HomeBrewedHetNetEnv.") 
+        
+    def get_current_param(self, t=None):
+        """ 
+        Get the current sampling parameters of arms in the given context.
+        """
+        raise NotImplementedError("This method get_current_param() is not campatible with class HomeBrewedHetNetEnv.") 
+        
+    def save_environment(self, file_name=None):
+        #TODO: not fully tested yet, not used
+        if self.flag_sample_prepared == False:
+            print("No data is prepared")
+        else:       
+            # we cannot select the path yet, put the file to the default directory "\results" of the current path            
+            file_path = prepare_file_name("{}-{}".format(file_name if file_name is not None else "", "env"), 
+                                          alg_name = None, ext_format = "mat")
+                        
+            scipy.io.savemat(file_path, self.arm_values)
+        
+    def load_environment(self, file_path, horizon=None):
+        #TODO: not fully tested yet, not used
+        try:            
+            self.arm_values = scipy.io.loadmat(file_path)
+        except:
+            print("No data is loaded")
+            
+        self.flag_sample_prepared = True
\ No newline at end of file
diff --git a/MABAlgorithms.py b/MABAlgorithms.py
new file mode 100644
index 0000000..bcea531
--- /dev/null
+++ b/MABAlgorithms.py
@@ -0,0 +1,715 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+"""
+
+# This file defines and implements the multi-player, multi-arm bandits algorithms.
+# Currently, the realized algorithms include:
+# 1. Hungarian: standard Hungarian algorithm for centralized arm allocation,
+# 2. StaticHungarian: centralized allocation algorithm if the mean value of each arm is known
+# 3. MusicalChairs: Musical Chairs algorithm for multi-player, homogeneous multi-arm bandit
+# 4. TrialandError: Log-linear learning algorithm for contextual multi-player multi-arm bandits,
+#    with heterogeneous arms
+# 5. GameofThrone: Log-linear learning algorithm for multi-player multi-arm bandits with heterogeneous 
+#    arms. It is sub-optimal for contextual bandits
+#
+# Typically, one distributed algorithm is accompanied by a corresponding player class
+# see also MABAlgorithms2.py
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from Players import MusicChairPlayer, TnEPlayer, GoTPlayer
+
+from loggingutils import info_logger
+
+if __name__ == '__main__':
+    print("Warning: this script 'MABAlgorithms.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+
+class MABAlgorithm(object):
+    """ Base class for an algorithm class."""
+    def __init__(self, param):
+        """ Base class for an algorithm class."""
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.context_set = param["context_set"]
+        
+        self.nbAgent = 0 # number of agents in the algorithms, can be centralized, decentralized or partially decentralized
+        
+        # an agent is usually corresponding to a player, it has its own 
+        self.agents = []
+        
+    # --- Printing
+    def __str__(self):
+        return self.__class__.__name__
+
+    def __repr__(self):
+        return "{}({})".format(self.__class__.__name__, self.__dir__)
+
+    # --- functionalities
+    def resolve_collision(self, pulls):
+        (nbPlayer, nbArm) = np.shape(pulls)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+    
+        collisions = pulls.sum(axis=0)
+        
+        assert len(collisions) == nbArm, "dimension of collisions is incorrect"
+        return collisions   
+        
+    def learn_policy(self, game_env, context=None, time=None):
+        """
+        Learn policies based on the given game environments.
+        A game environment can be in the form of (context, sampel_reward_matrix)
+        """
+        raise NotImplementedError("This method learn_policy(t) has to be implemented in the class inheriting from MABAlgorithm.") 
+
+    def reset(self, horizon=None):
+        """
+        The rest parameters cannot be reset, except self.horizon.
+        """
+        raise NotImplementedError("This method reset() has to be implemented in the class inheriting from MABAlgorithm.") 
+
+    def pulls2choices(self, pulls):
+        """
+        Convert pulls into choices
+        """        
+        (nbPlayer, nbArm) = np.shape(pulls)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        
+        arm_choices = np.zeros(nbPlayer, dtype=int)
+        
+        arm_selected = np.nonzero(pulls) # index of non-zero values
+        
+        # for some algorithms there may be a case when a player refuse to choose any arm    
+        for index in range(len(arm_selected[0])):
+            playerID = arm_selected[0][index]
+            arm_choices[playerID] = arm_selected[1][index] # playerID should be 0, 1, 2,..., nbPlayer-1
+           
+        return arm_choices
+    
+    def observe_distributed_payoff(self, game_env, collisions):
+        (nbPlayer, nbArm) = np.shape(game_env)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        
+        current_reward = np.zeros(self.nbPlayer)
+        
+        for playerID in range(self.nbPlayer):
+            selected_arm = self.agents[playerID].selected_arm
+            
+            # for some algorithms there may be a case when a player refuses to choose any arm    
+            if selected_arm < 0:
+                current_reward[playerID] = 0
+            else:
+                if collisions[selected_arm] == 1:
+                    current_reward[playerID] = game_env[playerID][selected_arm]# not collidiing
+                else:
+                    current_reward[playerID] = 0# colliding or void
+    
+        # returen an array of dimension nbArm
+        return current_reward        
+
+"""
+ Algorithm: centralized Hungarian
+"""    
+class Hungarian(MABAlgorithm):
+    """
+    Centralized assignment algorithm in the form of Hungarian (Munkres) algorithm. 
+    Implemented based on scipy.optimize.linear_sum_assignment.
+    It does not have the structure of multiple agents as the other algorithms.
+    """
+    def __init__(self, param):
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.context_set = param["context_set"]
+        """For simplicity we do not implement the single agent here."""
+#        self.nbAgent = 0
+        self.agents = []
+
+    # --- Printing
+    def __str__(self):
+        return "Hungarian"
+
+    # --- functionalities
+    def reset(self, horizon=None):
+        pass # do nothing
+
+    def learn_policy(self, game_env, context=None, time=None):
+        # context is not used in Hungarian
+        (nbPlayer, nbArm) = np.shape(game_env)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+                        
+        #the mehtod requires the number of rows (jobs) to be larger than that of columns (workers)
+        cost_matrix = np.negative(game_env.transpose())
+        # note that the cost_matrix is a transpose of the original matrix
+        col_ind, row_ind = linear_sum_assignment(cost_matrix) 
+        
+        pulls = np.zeros((nbPlayer, nbArm))
+        sampled_rewards = np.zeros(nbPlayer)
+        for ii in range(len(row_ind)):
+            playerID = row_ind[ii]
+            sampled_rewards[playerID] = game_env[playerID][col_ind[ii]]
+            pulls[playerID, col_ind[ii]] = 1            
+        
+        total_rewards = game_env[row_ind, col_ind].sum()
+        
+        return pulls, total_rewards, sampled_rewards
+    
+"""
+ Algorithm: centralized Hungarian over means of arm-values (static values)
+"""  
+class StaticHungarian(Hungarian):
+    """
+    This algorithm is implemented for the purpose of deriving the throetic regret
+    """
+    def __init__(self, param):
+        super().__init__(param)         
+        self.pulls = {}
+        
+        #we keep them for later use
+        self.total_rewards = {}
+        self.static_rewards = {}
+        
+        self.mean_env_payoff = param["mean_game_env"]
+        self.flag_allocation_ready = False
+        
+        for context in self.context_set:
+            self.pulls[context] = np.zeros((self.nbPlayer, self.nbArm))
+            self.total_rewards[context] = 0
+            self.static_rewards[context] = np.zeros(self.nbPlayer)
+        
+        self.array_context = param["array_context"]
+        self.array_prob = param["array_prob"]
+        
+        self.mean_total_reward = 0
+        self.mean_static_reward = np.zeros(self.nbPlayer)
+        
+    # --- Printing
+    def __str__(self):
+        return "Static Hungarian"
+         
+    def reset(self, horizon=None):
+        self.mean_total_reward = 0
+        self.mean_static_reward = np.zeros(self.nbPlayer)
+        self.flag_allocation_ready = False
+         
+    def learn_policy(self, game_env, context=None, time=None):
+        #ignore all the inputs        
+        if self.flag_allocation_ready == False:     
+            for context_id in range(len(self.array_context)):
+                tmp_context = self.array_context[context_id]
+                self.pulls[tmp_context], self.total_rewards[tmp_context], self.static_rewards[tmp_context] = super().learn_policy(
+                        self.mean_env_payoff[tmp_context], tmp_context)
+                
+                self.mean_total_reward = self.mean_total_reward + self.total_rewards[tmp_context] * self.array_prob[context_id]
+                self.mean_static_reward = self.mean_static_reward + self.static_rewards[tmp_context] * self.array_prob[context_id]
+                
+#                print("Static Hungarian: {}".format(tmp_context))
+            
+            self.flag_allocation_ready = True
+
+        return self.pulls[context], self.mean_total_reward, self.mean_static_reward
+    
+"""
+ Algorithm: musical chairs
+"""          
+class MusicalChairs(MABAlgorithm):
+    """
+    Decentralized assignment algorithm in the form of Musical Chair algorithm. 
+    Implemented based on the paper "Multi-Player Bandits – a Musical Chairs Approach", by Jonathan Rosenski and 
+    Ohad Shamir @2015 [Rosenski2015]. Note that this algorithm is designed for multi-player only and for 
+    contextual bandit it adapts to the condition of unobservable context.
+    """
+    def __init__(self, param):
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.context_set = param["context_set"] # not really used by the algorithm
+        self.horizon = param["horizon"]
+        
+        #each player will be attached a single agent
+#        self.nbAgent = self.nbPlayer
+        self.agents = []
+        
+        for playerID in range(self.nbPlayer):
+            player_param = {"horizon": self.horizon, 
+                            "nbArm": self.nbArm,
+                            "playerID": playerID
+                }
+            
+            if "T0" in param.keys():
+                player_param["T0"] = param["T0"]
+            
+            self.agents.append(MusicChairPlayer(player_param))
+        
+        self.time = 0
+        self.T0 = self.agents[0].T0
+        
+    # --- Printing
+    def __str__(self):
+        return "Musical Chairs"
+    
+    # --- functionalitiess        
+    def reset(self, horizon=None):
+        self.time = 0
+        for agent in self.agents:
+            agent.reset()
+        
+        if horizon is not None:
+            self.horizon = horizon
+        
+    
+    def learn_policy(self, game_env, context=None, time=None):
+        (nbPlayer, nbArm) = np.shape(game_env)
+#        print("number of arms: {}, number of recorded arms: {}".format(nbArm, self.nbArm))
+
+        assert nbArm == self.nbArm, "input arm number does not match the stored environment parameters."        
+        assert nbPlayer == self.nbPlayer, "input player number does not match the stored environment parameters."        
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+        assert time is not None, "time is not given."
+            
+        pulls = np.zeros((nbPlayer, nbArm))
+        
+        if time <= self.T0:
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].explore(context, time)
+                pulls[agentID][armID] = 1  
+                
+            collisions = self.resolve_collision(pulls)
+        
+            for agentID in range(nbPlayer):
+                self.agents[agentID].learn_arm_value(context, game_env[agentID,:], collisions)
+        else:
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].exploit(context, time)
+                pulls[agentID][armID] = 1  
+        
+            collisions = self.resolve_collision(pulls)
+             
+            for agentID in range(nbPlayer):
+                self.agents[agentID].update_musical_chair(time, collisions)
+        
+        sampled_rewards = self.observe_distributed_payoff(game_env, collisions)            
+        total_rewards = np.sum(sampled_rewards)        
+        return pulls, total_rewards, sampled_rewards
+    
+"""
+ Algorithm: trial and error [Wang2019]
+"""   
+class TrialandError(MABAlgorithm):
+    """
+    Decentralized assignment algorithm in the form of trial-and-error learning algorithm. 
+    Implemented for the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed Bandwidth as a 
+    Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al.
+    Note that this algorithm is designed for multi-player when contextual information is observable.
+    (If context is not observable, the algorithm produces a sub-optimal allocation in the same level as a distributed learning 
+    algorithm for non-contextual MP-MAB)
+    """
+    def __init__(self, param):
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.context_set = param["context_set"]
+        self.horizon = param["horizon"] # agents don't know the fixed horizon when running the algorithm
+        
+        # each player will be attached a single agent
+#        self.nbAgent = self.nbPlayer
+        self.agents = []
+        
+        self.xi = param["xi"] if "xi" in param.keys() else 0.001
+        
+        # a large epsilon will leads to more frequent transtions (explorations) in the intermedate game
+        self.epsilon = param["epsilon"] if "epsilon" in param.keys() else 0.1         
+        # see Theorem 1 in [Wang2019], not kept by the agents, determining trial-and-error rounds
+        self.delta = param["delta"] if "delta" in param.keys() else 2
+        
+        self.rho = param["rho"] if "rho" in param.keys() else 0.5 # no longer used by the improved algorithm
+        
+        self.exploration_round = param["c1"]
+        self.c2 = param["c2"]
+        self.c3 = param["c3"]
+        
+        for playerID in range(self.nbPlayer):
+            player_param = {"context_set": self.context_set, 
+                            "nbArm": self.nbArm,
+                            "playerID": playerID,
+                            "xi": self.xi,
+                            "epsilon": self.epsilon,
+                            "delta": self.delta,
+                            "rho": self.rho,
+                            "alpha11": param['alpha11'] if 'alpha11' in param.keys() else None,
+                            "alpha21": param['alpha21'] if 'alpha21' in param.keys() else None,
+                            "alpha12": param['alpha12'] if 'alpha12' in param.keys() else None,
+                            "alpha22": param['alpha22'] if 'alpha22' in param.keys() else None
+                            }
+            
+            self.agents.append(TnEPlayer(player_param))
+        
+        self.time = 0        
+        # used for determining the epoch
+        self.epoch = 1
+        
+        #initialize for the first epoch
+        self.tne_round = self.exploration_round + self.c2 # *1
+        self.rounds_in_epoch = self.tne_round + self.c3*2 # * (2** 1) # rounds in the first epoch
+        self.current_round = 1 
+        
+        self.flag_observable = True # set if the context is observable
+        
+        # for debug purpose
+        self.nbExploration = 0
+        self.nbTnE = 0
+        self.nbExploitation = 0
+        
+    # --- Printing
+    def __str__(self):
+        return "Trial and Error"    
+    
+    def set_context_observability(self, flag_observable = True):
+        """
+        set_context_observability() turns on/off the observability of contexts (side information), 
+        see Section V. of [Wang2019]. 
+        """
+        self.flag_observable = flag_observable
+    
+    # --- functionalitiess        
+    def reset(self, horizon=None):
+        for agent in self.agents:
+            agent.reset()
+            
+        self.time = 0
+        self.epoch = 1
+      
+        # reset to the initial values
+        self.tne_round = self.exploration_round + self.c2 # *1
+        self.rounds_in_epoch = self.tne_round + self.c3*2 # * (2** 1) # rounds in the first epoch
+        self.current_round = 1 
+        
+        self.nbExploration = 0
+        self.nbTnE = 0
+        self.nbExploitation = 0
+        
+        if horizon is not None:
+            self.horizon = horizon
+    
+    def learn_policy(self, game_env, context=None, time=None):
+        """
+        learn_policy() implements the 3 phases in Alg. 1 of [Wang2019]. 
+        """
+        (nbPlayer, nbArm) = np.shape(game_env)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+        assert time is not None, "time is not given."
+           
+        if self.flag_observable == False:
+            # freeze the context s.t. the algorithm is reduced to an MP-MAP
+            context = list(self.context_set)[0] 
+            
+        self.time = self.time + 1
+        
+        if self.current_round > self.rounds_in_epoch:
+            #update epcoh
+            self.epoch = self.epoch + 1
+            # rounds in the k-th epoch
+            self.tne_round = int(self.exploration_round + self.c2*(self.epoch**self.delta)) # insce delta may be non-integer
+            self.rounds_in_epoch = int(self.tne_round + self.c3*(2**self.epoch))
+            #reset
+            self.current_round = 1
+#            print("number of epoch: {}".format(self.epoch))# debug
+        
+        pulls = np.zeros((nbPlayer, nbArm))
+        
+        if self.current_round <= self.exploration_round:# exploration rounds
+            # reset the phase to exploration in an epoch
+            if self.current_round == 1:
+                for agentID in range(nbPlayer):
+                    for tmp_context in self.context_set:
+                        self.agents[agentID].set_internal_state(tmp_context, 0)         
+
+#                print("reset iteration at epoch {}".format(self.epoch))# debug            
+
+            #exploration by randomly choosing actions
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].explore(context, time)
+                pulls[agentID][armID] = 1         
+                
+            collisions = self.resolve_collision(pulls)        
+            for agentID in range(nbPlayer):
+                self.agents[agentID].learn_arm_value(context, game_env[agentID,:], collisions)
+                
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)
+            
+            # for debugging
+            self.nbExploration = self.nbExploration + 1
+                
+        elif self.current_round <= self.tne_round:# trial-and-error phase
+            if self.current_round == self.exploration_round + 1:
+                # reset the phase to learning in an epoch
+                for agentID in range(nbPlayer):
+                    for tmp_context in self.context_set:
+                        self.agents[agentID].set_internal_state(tmp_context, 1) 
+                        #set the static game
+                        self.agents[agentID].perturb_estimated_payoff(tmp_context, self.epoch)   
+                        
+                        # get the latest best policy (from the last epoch)
+                        init_state = None
+                        if self.epoch != 1:
+                            init_state = [0, self.agents[agentID].best_policy[tmp_context]]
+                        else:
+                            #randomize
+                            action = np.random.randint(self.nbArm)
+                            init_state = [0, action]
+                            
+                        # can be moved into perturb_estimated_payoff() in the later versions
+                        self.agents[agentID].init_tne_states(tmp_context, init_state) 
+                                            
+            #trial-and-error phase, taking actions randomly according to the intermediate state
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].learn_policy(context)
+                pulls[agentID][armID] = 1  
+
+            collisions = self.resolve_collision(pulls)         
+            
+            for agentID in range(nbPlayer):
+                self.agents[agentID].update_game_state(context, collisions)
+            
+            #update reward according to actions taken
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)
+                
+            # for debugging
+            self.nbTnE = self.nbTnE + 1
+        else:
+            if self.current_round == self.tne_round + 1:
+                 # reset the phase to exploration in an epoch
+                for agentID in range(nbPlayer):
+                    for tmp_context in self.context_set:
+                        self.agents[agentID].set_internal_state(tmp_context, 2)  
+                        
+                ###############################################################
+                # Debugging
+                for agentID in range(nbPlayer):
+                    armID = self.agents[agentID].exploit(context, self.current_round)
+                    pulls[agentID][armID] = 1 
+                collisions = self.resolve_collision(pulls)
+                    
+                info_logger().log_info('TnE Context {}: collisions array {}'.format(context, collisions)) #debug
+                # End of debugging
+                ###############################################################
+            
+            #exploitation
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].exploit(context, self.current_round)
+                pulls[agentID][armID] = 1  
+                
+            collisions = self.resolve_collision(pulls)              
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)
+            
+            # for debugging
+            self.nbExploitation = self.nbExploitation + 1
+        
+        #update round number
+        self.current_round = self.current_round + 1
+            
+        total_rewards = np.sum(current_rewards)        
+        return pulls, total_rewards, current_rewards
+        
+"""
+ Algorithm: trial and error [Leshem2018]
+"""   
+class GameofThrone(MABAlgorithm):    
+    """
+    Decentralized assignment algorithm in the form of game-of-throne learning algorithm. 
+    Implemented for the paper "Distributed Multi-Player Bandits - a Game of Thrones Approach", by Ilai Bistritz et al.
+    Note that this algorithm is designed for multi-player without considering contextual information.
+    """
+    def __init__(self, param):
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.horizon = param["horizon"] # agents don't know the fixed horizon when running the algorithm
+        
+        # each player will be attached a single agent
+#        self.nbAgent = self.nbPlayer
+        self.agents = []
+        
+        # a large epsilon will leads to more frequent transtions (explorations) in the intermedate game
+        self.epsilon = param["epsilon"] if "epsilon" in param.keys() else 0.1         
+        # see Theorem 1 in [Wang2019], not kept by the agents, determining trial-and-error rounds
+        self.delta = param["delta"] if "delta" in param.keys() else 2  
+        # set the round of iteration where we 
+        self.rho = param["rho"] if "rho" in param.keys() else 0.5
+        
+        self.c1 = param["c1"]        
+        self.c2 = param["c2"]
+        self.c3 = param["c3"]
+        
+        for playerID in range(self.nbPlayer):
+            player_param = {"nbArm": self.nbArm,
+                            "nbPlayer": self.nbPlayer,
+                            "playerID": playerID,
+                            "epsilon": self.epsilon,
+                            "delta": self.delta
+                            }
+            
+            self.agents.append(GoTPlayer(player_param))
+        
+        self.time = 0    
+        # used for determining the epoch
+        self.epoch = 1
+        
+        # initialize for the first epoch, 
+        # for simplicity, the parameter names are kept the same as the TnE algorithm.
+        self.exploration_round = self.c1
+        self.got_round = self.exploration_round + self.c2 # *1
+        self.rounds_in_epoch = self.got_round + self.c3*2 # * (2** 1) # rounds in the first epoch
+        self.current_round = 1                     
+        
+    # --- Printing
+    def __str__(self):
+        return "Game of Throne"    
+    
+    # --- functionalitiess     
+    def reset(self, horizon=None):
+        for agent in self.agents:
+            agent.reset()
+            
+        self.time = 0
+        self.epoch = 1
+      
+        # reset to the initial values
+        self.got_round = self.exploration_round + self.c2 # *1
+        self.rounds_in_epoch = self.got_round + self.c3*2 # * (2** 1) # rounds in the first epoch
+        self.current_round = 1         
+        
+        if horizon is not None:
+            self.horizon = horizon
+
+    def learn_policy(self, game_env, context=None, time=None):
+        """
+        learn_policy() implements the 3 phases in Alg. 1 of [Leshem2018]. 
+        Implemented in the same structure for tial-and-error
+        """
+        (nbPlayer, nbArm) = np.shape(game_env)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+        assert time is not None, "time is not given."
+            
+        self.time = self.time + 1
+        
+        if self.current_round > self.rounds_in_epoch:
+            #update epcoh
+            self.epoch = self.epoch + 1
+            # rounds in the k-th epoch
+            self.exploration_round = int(self.c1*(self.epoch**self.delta))
+            self.got_round = int(self.exploration_round + self.c2*(self.epoch**self.delta))
+            self.rounds_in_epoch = int(self.got_round + self.c3*(2**self.epoch))
+            #reset
+            self.current_round = 1
+#            print("number of epoch: {}".format(self.epoch))# debug
+        
+        pulls = np.zeros((nbPlayer, nbArm))
+        
+        if self.current_round <= self.exploration_round:# exploration rounds
+            # reset the phase to exploration in an epoch
+            if self.current_round == 1:
+                for agentID in range(nbPlayer):
+                    self.agents[agentID].set_internal_state(context=None, input_state=0)                
+
+            # exploration by randomly choosing actions
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].explore(None, time)
+                pulls[agentID][armID] = 1         
+                
+            collisions = self.resolve_collision(pulls)        
+            for agentID in range(nbPlayer):
+                self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions)
+                
+            # learn the real payoff
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)            
+                
+        elif self.current_round <= self.got_round:# game-and-thrones phase
+            if self.current_round == self.exploration_round + 1:
+                # reset the phase to learning in an epoch
+                for agentID in range(nbPlayer):                   
+                    self.agents[agentID].set_internal_state(context=None, input_state=1) 
+                    
+                    # as per Alg.1 in [Leshem2018], initialize the mood to be content
+                    if self.epoch != 1:
+                        init_state = [0, self.agents[agentID].best_policy] #(STATE_CONTENT, BEST ACTION)
+                    else:
+                        #randomize
+                        action = np.random.randint(self.nbArm)
+                        init_state = [0, action]
+                         
+                    # initialize the intermediate game
+                    self.agents[agentID].initalize_static_game(init_state, self.epoch)   
+                    # initialize the intermediate states, and (TODO) this can be moved into perturb_estimated_payoff() 
+                    self.agents[agentID].init_got_states(context=None, starting_state=init_state) 
+        
+            #game of throne phase, taking actions randomly according to the intermediate state
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].learn_policy(context=None)
+                pulls[agentID][armID] = 1  
+
+            collisions = self.resolve_collision(pulls)         
+            
+            flag_count_frequency = False
+            # update the count of state-visit only for the last half starting from rho*c2*k^(1+delta) rounds
+#            if self.current_round >= self.got_round - 1 - self.rho*self.c2*(self.epoch**self.delta):
+            if self.current_round >= self.exploration_round + self.rho*self.c2*(self.epoch**self.delta):
+                flag_count_frequency = True
+#            flag_count_frequency = True
+
+            for agentID in range(nbPlayer):
+                self.agents[agentID].update_game_state(context=None, collisions=collisions, 
+                           flag_record_frequency=flag_count_frequency)
+            
+            #update reward according to actions taken
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)
+                
+        else:
+            if self.current_round == self.got_round + 1:
+                # reset the phase to exploitation in an epoch
+                for agentID in range(nbPlayer):   
+                    # the best policy is computed in set_internal_state()    
+                    self.agents[agentID].set_internal_state(context=None, input_state=2)                        
+                
+                ###############################################################
+                # Debugging
+                for agentID in range(nbPlayer):
+                    armID = self.agents[agentID].exploit(None, self.current_round)
+                    pulls[agentID][armID] = 1 
+                collisions = self.resolve_collision(pulls)
+                    
+                info_logger().log_info('GoT Context {}: collisions array {}'.format(context, collisions)) #debug
+                # End of debugging
+                ###############################################################
+            
+            #exploitation
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].exploit(None, self.current_round)
+                pulls[agentID][armID] = 1                  
+                
+            collisions = self.resolve_collision(pulls)              
+            current_rewards = self.observe_distributed_payoff(game_env, collisions)
+        
+        #update round number
+        self.current_round = self.current_round + 1
+            
+        total_rewards = np.sum(current_rewards)        
+        return pulls, total_rewards, current_rewards
+
+__all__ = ["Hungarian", "StaticHungarian", "MusicalChairs", "TrialandError", "GameofThrone"]
\ No newline at end of file
diff --git a/MABAlgorithms2.py b/MABAlgorithms2.py
new file mode 100644
index 0000000..0c8ab2b
--- /dev/null
+++ b/MABAlgorithms2.py
@@ -0,0 +1,215 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+"""
+
+# This file defines and implements the following multi-player, multi-arm bandits algorithms (see also Algorithms.py).
+#
+# 1. [Sumit2019] Sumit J. Darak and Manjesh K. Hanawal, "Multi-player multi-armed bandits for stable allocation in 
+# heterogeneous ad-hoc networks", IEEE JSAC oct. 2019.
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+from MABAlgorithms import MABAlgorithm
+from Players2 import SOCPlayer
+
+if __name__ == '__main__':
+    print("Warning: this script 'MABAlgorithms2.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+"""
+ Algorithm: centralized Hungarian
+"""    
+class SOC(MABAlgorithm):
+    """ 
+    SOC implements the algorithm "stable orthogonal allocation (SOC)" proposed in
+    "Multi-player multi-armed bandits for stable allocation in heterogeneous ad-hoc networks", 
+    IEEE JSAC oct. 2019, by Sumit J. Darak and Manjesh K. Hanawal [Sumit2019].
+        
+    The algorithm is featured by a protocol explicitly resolving collisions with channel switching,
+    and the channel statistics (index) is learned based on upper confidence bound (UCB).
+    
+    However, it does not have a explicit function for when to stop exporation, as in the musical chairs.        
+    Channel allocation is obtained through a master-slave allocation process, with explicit coordination,
+    Exploration time needs to be given.
+    """
+    def __init__(self, param):
+        self.nbPlayer = param["nbPlayer"]
+        self.nbArm = param["nbArm"]
+        self.context_set = param["context_set"] # not used
+        
+        self.delta = 0.1 if 'delta' not in param.keys() else param['delta']
+#        self.nbAgent = self.nbPlayer
+        
+        self.time = 0
+        self.Trh = np.ceil(np.log(self.delta/self.nbArm) / np.log(1-1/4/self.nbArm))
+        self.TExploration = 3000 if "exploration_time" not in param.keys() else param["exploration_time"]
+
+        self.agents = []        
+        for playerID in range(self.nbPlayer):
+            player_param = {"context_set": self.context_set, 
+                            "nbArm": self.nbArm,
+                            "playerID": playerID
+                            }
+            
+            self.agents.append(SOCPlayer(player_param))
+
+        self.OHS_step = 2*(self.nbArm ** 2)
+        self.MB_step = 2*self.nbArm 
+        self.SB_step = 2
+        
+        self.current_MB_id = -1 # set to an invalid ID
+        self.current_master_node = -1 # there may not be a master node for the current MB
+
+    # --- Printing
+    def __str__(self):
+        return "Static Orthogonal Allocation"
+
+    # --- functionalities
+    def reset(self, horizon=None):
+        self.time = 0
+        self.current_MB_id = -1 # set to an invalid ID
+        self.current_master_node = -1
+        for agent in self.agents:
+            agent.reset()
+
+    def learn_policy(self, game_env, context=None, time=None):
+        # context is not used in Hungarian
+        (nbPlayer, nbArm) = np.shape(game_env)
+        assert nbPlayer == self.nbPlayer and nbArm == self.nbArm, "input does not match the stored environment parameters."
+        assert nbPlayer <= nbArm, "player number should be larger than or equal to arm number."
+                        
+        self.time = self.time + 1
+        
+        pulls = np.zeros((nbPlayer, nbArm))        
+        
+        # there are three phases in the game
+        if self.time < self.Trh:
+            #random hopping / exploration
+            for agentID in range(nbPlayer):
+                armID = self.agents[agentID].explore(None, time)
+                pulls[agentID][armID] = 1  
+        
+            collisions = self.resolve_collision(pulls)    
+            for agentID in range(nbPlayer):
+                self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions)                
+        elif self.time <= self.TExploration:
+            # master-slave process 
+            # 1 OHS block has K macro blcoks (K=nbArm)
+            # 1 macro block has T_mb=2K time slots, namely, K sub-blocks of 2 slots each
+            OHS_id = int(np.floor((self.time - self.Trh) / (self.OHS_step)))
+            MB_id = int(np.floor((self.time - self.Trh - OHS_id*self.OHS_step) / self.MB_step)) # from 0 to nbArm-1
+            SB_id = int(np.floor((self.time - self.Trh - OHS_id*self.OHS_step - MB_id*self.MB_step) / self.SB_step)) # from 0 to nbArm-1        
+            subslot_id = int ((self.time - self.Trh) % 2) # 0 is the CT slot and 1 is the CS slot                      
+                        
+            if self.current_MB_id != MB_id:
+                # one master block occupies 2*nbArm slots. Update master node ID as MB_id         
+                self.current_MB_id = MB_id
+                
+                # there may be no master node at the given MB (transmitting on MB_id), 
+                # so we initialize it to an invalid value for later state-check
+                self.current_master_node = -1
+                #prepare the master flags of each player, only when the master ID is updated         
+                master_counter = 0
+                for agentID in range(nbPlayer):
+                    # reset the master flag of each node
+                    ret_flag = self.agents[agentID].set_master(self.current_MB_id)
+                    if ret_flag == True:
+                        # if being a master, record its ID
+                        self.current_master_node = agentID       
+                        master_counter = master_counter + 1
+                        
+                assert master_counter<=1, "error: more than one master"
+            
+            if self.current_master_node == -1:
+                # if there is no master node, the MB block is wasted, see Fig.2 [Sumit2019],
+                # and for the entire 2*nbArm slots no one will change actions
+                for agentID in range(nbPlayer):
+                    arm_choice = self.agents[agentID].exploit()
+                    pulls[agentID][arm_choice] = 1
+                    
+                collisions = self.resolve_collision(pulls)
+                # update the UCB ranking
+                for agentID in range(nbPlayer):
+                     self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions)                      
+            else:                   
+                # a master node exists
+                if SB_id == 0:
+                    # force transmission to align with the current policy at the first SB
+                    for agentID in range(nbPlayer):
+                        arm_choice = self.agents[agentID].exploit()
+                        pulls[agentID][arm_choice] = 1
+                    
+                    collisions = self.resolve_collision(pulls)
+                    # update the UCB ranking
+                    for agentID in range(nbPlayer):
+                         self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions)                                
+                else:                
+                    # sub-slot CT or CS for SB=1,...nbArm-1, starting to switch channels 
+                    if subslot_id == 0:
+                        # in the channel transit (CT) sub-slot, the master node chooses channel SB_id to switch (notify),
+                        # channel SB_id is the index in its preference list.
+                        # all non-master nodes stay on the their own channels                          
+                        master_action, master_policy = self.agents[self.current_master_node].set_master_action(SB_id)
+                        pulls[self.current_master_node][master_action] = 1 
+                                                
+                        for agentID in range(nbPlayer):
+                            if agentID != self.current_master_node:                                    
+                                # directly get slave response (instead of getting it by observing collisions)  
+                                # prepare the arm choice of the slave node for the next round                       
+                                slave_action = self.agents[agentID].decide_switching(subslot_id, target_arm=master_policy)     
+                                pulls[agentID][slave_action] = 1
+                                
+                    else: #subslot_id == 1:
+                        assert subslot_id == 1, "sub-slot ID is invalid"
+                        # in channel switch sub-slot, the master node tries to transmit on the channel to switch
+                        # non-master nodes stays on their selected channels
+                        for agentID in range(nbPlayer):
+                            if agentID != self.current_master_node:   
+                                # only the slave occupying the target channel needs to answer the request                            
+                                arm_choice = self.agents[agentID].decide_switching(subslot_id)
+                                
+                                if arm_choice == -1:
+                                    # use invalid choice to indicate no trnasmission
+                                    pulls[agentID,:] = 0
+                                else:                                
+                                    pulls[agentID][arm_choice] = 1
+                            else:
+                                arm_choice = self.agents[agentID].selected_arm
+                                pulls[agentID][arm_choice] = 1
+                    
+                    # observe collision
+                    collisions = self.resolve_collision(pulls)
+                        
+                    # update the UCB ranking
+                    for agentID in range(nbPlayer):
+                        self.agents[agentID].learn_arm_value(None, game_env[agentID,:], collisions)                                 
+                        # update policy after learning 
+                        self.agents[agentID].update_policy(subslot_id, collisions)
+        else:
+            # exploitation (no mcuh is mentioned (theoretically) regarding the performance in [Sumit2019])
+            for agentID in range(nbPlayer):
+                arm_choice = self.agents[agentID].exploit()
+                pulls[agentID][arm_choice] = 1
+                    
+            collisions = self.resolve_collision(pulls)
+                     
+        current_rewards = self.observe_distributed_payoff(game_env, collisions)                        
+        total_rewards = np.sum(current_rewards)        
+        return pulls, total_rewards, current_rewards
+    
+    # add other algorithms here
+__all__ = ["SOC"]
\ No newline at end of file
diff --git a/MPMAB.py b/MPMAB.py
new file mode 100644
index 0000000..f916115
--- /dev/null
+++ b/MPMAB.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+"""
+
+"""
+Implementing the class 'MAB' and its children classes, which define the environment of the bandit game (stochastic i.i.d.)
+"""
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+import scipy.io
+from plotutils import prepare_file_name
+
+from Arms import UniformArm, GaussianArm
+
+class MP_MAB(object):
+    """
+    i.i.d. multi-arm bandit problem. 
+    The arm value is jointly sampled with the context, and for each player the underlying process may be different.
+    """
+    def __init__(self, context_set, nbArms, nbPlayers):
+        """New MP-MAB."""
+        print("\nCreating a contextual multi-player MAB game...")  # DEBUG
+        
+        self.nbArms = nbArms
+        self.nbPlayers = nbPlayers
+        
+        self.context_set = context_set
+        self.context_probabilites = []
+        self.context_array = [] # may the context iterable
+        self.flag_context_prob = False
+        
+        self.current_arm_value = np.zeros((nbPlayers, nbArms))
+        self.current_context = None
+        
+        self.arms = {}
+        self.max_arm_value = {} # recording the maximum arm value in case of normalization for each context along the time horizon
+        
+        self.horizon = 0
+        self.flag_sample_prepared = False
+        
+    """
+    For different joint distributions of (context, arm-value), we may need different initilization variables.
+    Call one of the following methods for class instantiation with different types of arms instead of __init__.
+    """
+        
+    @classmethod
+    def uniform_mab(cls, context_set, nbArms, nbPlayers, dic_lower, dic_upper):
+        uniform_inst = cls(context_set, nbArms, nbPlayers)
+        
+        # For each context and each player, we create an arm
+        for context in context_set:
+            player_arm_array = [[None]*nbArms for playerID in range(nbPlayers)]
+            for playerID in range(nbPlayers):
+                for armID in range(nbArms):
+                    # if it is a uniform arm
+                    param = {"lower_val": dic_lower[(context, playerID)][armID],
+                             "upper_val": dic_upper[(context, playerID)][armID],
+                             "context": context,
+                             "playerID": playerID,
+                             "armID": armID }
+                    player_arm_array[playerID][armID] = UniformArm(param)
+#                print("size of the object array: ", len(arm_array))#debug
+            
+            uniform_inst.arms[context] = player_arm_array
+#            print("size of the object array for context: ", context, ": (", len(player_arm_array), ",", len(player_arm_array[0]), ")")#debug            
+        
+        return uniform_inst
+    
+    @classmethod
+    def gaussian_mab(cls, context_set, nbArms, nbPlayers, dic_mean, dic_sigma):
+        gaussian_inst = cls(context_set, nbArms, nbPlayers)
+    
+        # For each context and each player, we create an arm
+        for context in context_set:
+            player_arm_array = [[None]*nbArms for playerID in range(nbPlayers)]
+            for playerID in range(nbPlayers):
+                for armID in range(nbArms):
+                    # if it is a uniform arm
+                    param = {"mu": dic_mean[(context, playerID)][armID],
+                             "sigma": dic_sigma[(context, playerID)][armID],
+                             "context": context,
+                             "playerID": playerID,
+                             "armID": armID }
+                    player_arm_array[playerID][armID] = GaussianArm(param)
+#                print("size of the object array: ", len(arm_array))#debug
+            
+            gaussian_inst.arms[context] = player_arm_array
+#            print("size of the object array for context: ", context, ": (", len(player_arm_array), ",", len(player_arm_array[0]), ")")#debug            
+        
+        return gaussian_inst
+
+    
+    def set_discrete_context_prob(self, context_prob):
+        """
+        assign arbitrary probabilities to contexts
+        """
+        if set(context_prob.keys()) != self.context_set:
+            raise Exception("probability values do not match the set of context")
+        
+        self.context_array = np.array(list(context_prob.keys()))
+        
+        self.context_probabilites = np.array(list(context_prob.values()))
+        self.context_probabilites = self.context_probabilites / np.sum(self.context_probabilites) # normalize
+        
+        self.flag_context_prob = True
+
+    def get_discrete_context_prob(self):
+        if self.flag_context_prob:
+            return self.context_array, self.context_probabilites
+        else:
+            prob = np.ones(len(self.context_set))
+            return np.array(list(self.context_set)), prob / np.sum(prob)
+
+    """Draw samples"""
+    def draw_sample(self, t=None):
+         """ 
+         Draw samples for all the player-arm pairs in a given sampled context.
+         We enforce that the arm values are drawn in the same global context.
+         """
+         
+         # context is finite, so here we can adopt a separate discrete (e.g., uniform) distribution for context evolution
+         # in the real-world situation context-arm-value can be seen as being sampled from a joint distribution
+         if self.flag_context_prob == False:
+             context = np.random.choice(tuple(self.context_set)) # uniform randomly sampled
+         else:
+             context = np.random.choice(self.context_array, p=self.context_probabilites)
+         
+         player_arm_array = self.arms[context]
+         for playerID in range(self.nbPlayers):
+             for armID in range(self.nbArms):
+                 if  player_arm_array[playerID][armID].playerID != playerID or player_arm_array[playerID][armID].armID != armID:
+                     raise Exception("player ID and arm ID do not match!")
+                 
+                 self.current_arm_value[playerID][armID] = player_arm_array[playerID][armID].draw_sample(context, t)
+         
+#         print("Sampling arms completes")
+         self.current_context = context
+
+         return self.current_context,self.current_arm_value
+        
+    """get the samples in advance"""
+    def prepare_samples(self, horizon, flag_progress_bar=False):
+        if horizon <= 0:
+            raise Exception("Input horizon is not valid")
+                    
+        self.horizon = horizon
+        
+        for context in self.context_set:
+            for playerID in range(self.nbPlayers):
+                for armID in range(self.nbArms):
+                    # for each player-arm pair, prepare its sample sequences in each context
+                    self.arms[context][playerID][armID].prepare_samples(horizon)
+                    
+            self.max_arm_value[context] = np.ones(horizon) #
+                    
+        self.flag_sample_prepared = True
+    
+    """utility functions"""
+    def get_param(self, context):
+         lower = np.zeros((self.nbPlayers, self.nbArms))
+         upper = np.zeros((self.nbPlayers, self.nbArms))
+         means = np.zeros((self.nbPlayers, self.nbArms))
+         variance = np.zeros((self.nbPlayers, self.nbArms))
+         
+         for playerID in range(self.nbPlayers):
+             for armID in range(self.nbArms):
+                 lower[playerID][armID] = self.arms[context][playerID][armID].lower
+                 upper[playerID][armID] = self.arms[context][playerID][armID].upper
+                 means[playerID][armID] = self.arms[context][playerID][armID].mean
+                 variance[playerID][armID] = self.arms[context][playerID][armID].variance
+                 
+         return lower, upper, means, variance
+    
+    def get_current_param(self, t=None):
+         """ 
+         Get the current sampling parameters of arms in the given context.
+         """
+         if self.current_context is None:
+             raise Exception("The MAB game is not started.")
+         
+         return self.get_param(self.current_context)
+        
+    """
+    
+    """
+    def save_environment(self, file_name=None):
+        if self.flag_sample_prepared == False:
+            print("No data is prepared")
+        else:       
+            # TODO: we cannot select the path yet, put the file to the default directory "\results" of the current path            
+            file_path = prepare_file_name("{}-{}".format(file_name if file_name is not None else "", "env"), 
+                                          alg_name = None, ext_format = "mat")
+        
+            mdict = {}
+            for context in self.context_set:
+                for playerID in range(self.nbPlayers):
+                    for armID in range(self.nbArms):
+                        dict_key = "{}-{}-{}".format(context, playerID, armID)
+                        mdict[dict_key] = self.arms[context][playerID][armID].prepared_samples
+            
+            scipy.io.savemat(file_path, mdict)
+        
+    def load_environment(self, file_path, horizon=None):
+        mdict = scipy.io.loadmat(file_path)
+        
+        for key in mdict:
+            key_strings = key.split('_')
+            context = key_strings[0]
+            playerID = int(key_strings[1])
+            armID = int(key_strings[2])
+            
+            self.arms[context][playerID][armID].prepared_samples = mdict[key]
+        
+        self.flag_sample_prepared = True
+        
+# ploting methods
\ No newline at end of file
diff --git a/PlayResult.py b/PlayResult.py
new file mode 100644
index 0000000..44eba75
--- /dev/null
+++ b/PlayResult.py
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+This file defines the data recorder and parts of the virtualization mechanisms in our simulations.
+class ResultMultiPlayers
+"""
+
+# This file implements the data recorder for each single MAB algorithm
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.io
+
+from plotutils import make_markers, make_palette, display_legend, prepare_file_name
+from datetime import datetime
+
+if __name__ == '__main__':
+    print("Warning: this script 'PlayerResult.py' is NOT executable..")  # DEBUG
+    exit(0)
+    
+DELTA_T_PLOT = 50
+FIGURE_SIZE = (5, 3.75)
+
+class ResultMultiPlayers(object):
+    """ ResultMultiPlayers accumulators, for the multi-players case. """
+    
+    def __init__(self, alg_name, context_set, player_no, arm_no, horizon):
+        """ Create ResultMultiPlayers."""
+        self.alg_name = alg_name
+        
+        self.nbPlayer = player_no
+        self.nbArm = arm_no
+        self.context_set = context_set
+        self.horizon = horizon
+        
+        self.choices = np.zeros((player_no, horizon), dtype=int)  #: Store all the arm choices of all the players
+        self.sampled_rewards = np.zeros((player_no, horizon))  #: Store all the rewards of all the players, to compute the mean
+        self.total_rewards = np.zeros(horizon) 
+        
+        self.context_history = [None]*horizon
+        
+        self.pull_history = np.zeros((player_no, arm_no, horizon), dtype=int)  #: Is a map of 0-1 for players and arms
+        self.collisions = np.zeros((arm_no, horizon), dtype=int)  #: Store the number of collisions on all the arms
+        
+        self.delta_t_plot = 1 if self.horizon <= 10000 else DELTA_T_PLOT
+
+    def store(self, time, context, choices, sampled_rewards, total_rewards, pulls, collision=None):
+        """ Store results."""
+        self.context_history[time] = context
+        
+        self.choices[:, time] = choices
+        self.sampled_rewards[:, time] = sampled_rewards
+        self.total_rewards[time] = total_rewards
+
+        self.pull_history[:, :, time] = pulls
+        
+        if collision is None:
+            self.collisions[:, time] = 0
+        else:
+            self.collisions[:, time] = collision
+            
+    def reset_record(self, horizon=None):
+        if horizon is not None:
+            self.horizon = horizon
+        
+        self.choices = np.zeros((self.nbPlayer, self.horizon), dtype=int)  #: Store all the arm choices of all the players
+        self.sampled_rewards = np.zeros((self.nbPlayer, self.horizon))  #: Store all the rewards of all the players, to compute the mean
+        self.total_rewards = np.zeros(self.horizon) 
+        
+        self.context_history = [None]*self.horizon
+        
+        self.pull_history = np.zeros((self.nbPlayer, self.nbArm, self.horizon), dtype=int)  #: Is a map of 0-1 for players and arms
+        self.collisions = np.zeros((self.nbArm, self.horizon), dtype=int)  #: Store the number of collisions on all the arms
+        
+        
+    def dump2disk(self, file_name=None):
+        """Save the result into a Matlab .mat file"""       
+        file_path = prepare_file_name(file_name, self.alg_name, "mat")
+        
+        scipy.io.savemat(file_path, mdict={"nbPlayer": self.nbPlayer, "nbArm": self.nbArm, "context_set": list(self.context_set),
+                                           "horizon": self.horizon, "context_history": self.context_history, 
+                                           "sampled_reward": self.sampled_rewards,
+                                           "choices": self.choices, "collisions": self.collisions})
+        
+        
+    """
+    The following methods are used for plotting/saving figures.  
+    Other figure plotting methods can be found in plotutils.py
+    """
+    def plot_cumu_rewards(self, horizon=None, other_results=None, semilogx=False, save_fig=False, save_data=False):
+        #other_results are used for comparison with other algorithms
+        if other_results is not None:
+            #the other results should have the same player/arm numbers
+            for idx in range(len(other_results)):
+                nbPlayer = other_results[idx].nbPlayer
+                nbArm = other_results[idx].nbArm
+                
+                if nbPlayer != self.nbPlayer or nbArm != self.nbArm:
+                    raise Exception("environment does not match!")
+                    
+            nbCurves = self.nbPlayer * (1 + len(other_results))
+        else:
+            nbCurves = self.nbPlayer
+            
+        """Plot the decentralized rewards, for each player."""
+        fig = plt.figure(figsize=FIGURE_SIZE)
+        ymin = 0
+        colors = make_palette(nbCurves)
+        markers = make_markers(nbCurves)
+        
+        if horizon is None:
+            horizon = self.horizon
+        
+        X = np.arange(start=0, stop=horizon, step=1)
+        
+        #plot the locally stored values
+        cumu_rewards = np.cumsum(self.sampled_rewards, axis=1)
+
+        curve_idx = 0
+        for playerId in range(self.nbPlayer):            
+            label = '{}: Player {:>2}'.format(self.alg_name, playerId + 1)
+            Y = cumu_rewards[playerId, :horizon]
+            Y = Y / (X+1)
+
+            ymin = min(ymin, np.min(Y))
+            if semilogx:
+                plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                             marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+            else:
+                plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                         marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+                
+            curve_idx = curve_idx + 1
+            
+        if other_results is not None:
+             for idx in range(len(other_results)):
+                 cumu_rewards = np.cumsum(other_results[idx].sampled_rewards, axis=1)
+                 for playerId in range(other_results[idx].nbPlayer):
+                     label = '{}: Player {:>2}'.format(other_results[idx].alg_name, playerId + 1)
+                     Y = cumu_rewards[playerId, :horizon]
+                     Y = Y / (X+1)
+                     ymin = min(ymin, np.min(Y))
+                     if semilogx:
+                         plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                                  marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+                     else:
+                         plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                                  marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+                     
+                     curve_idx = curve_idx + 1
+                
+        display_legend()
+        plt.xlabel("Number of rounds", fontsize=10)
+        plt.ylabel("Average reward over time", fontsize=10)
+
+#        plt.title("Individual Average Rewards Over Time", fontsize=10)
+        if save_data:
+            print("saving figure...")
+            self.dump2disk()
+        
+        if save_fig:
+            self.save_figure(file_name = "indv_avg_result", fig=fig)
+            
+        return fig
+
+    def plot_avg_reward(self, horizon=None, other_results=None, semilogx=False, save_fig=False, save_data=False):
+         #other_results are used for comparison with other algorithms
+        if other_results is not None:
+            #the other results should have the same player/arm numbers                    
+            nbCurves = 1 + len(other_results)
+        else:
+            nbCurves = 1
+            
+        """Plot the average rewards, for each player in each algorithm."""
+        fig = plt.figure(figsize=FIGURE_SIZE)
+        ymin = 0
+        colors = make_palette(nbCurves)
+        markers = make_markers(nbCurves)
+        
+        if horizon is None:
+            horizon = self.horizon
+            
+        X = np.arange(start=0, stop=horizon, step=1)   
+        
+        #plot the locally stored values
+        curve_idx = 0 
+        cumu_rewards = np.cumsum(self.total_rewards[:horizon])
+
+        label = '{}'.format(self.alg_name)
+        Y = cumu_rewards / (X+1) / self.nbPlayer
+
+        ymin = min(ymin, np.min(Y))
+        if semilogx:
+            plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                         marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+        else:
+            plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                         marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+                
+        if other_results is not None:            
+            for idx in range(len(other_results)):
+                curve_idx = curve_idx + 1
+                cumu_rewards = np.cumsum(other_results[idx].total_rewards[:horizon])
+                
+                label = '{}'.format(other_results[idx].alg_name)
+                Y = cumu_rewards / (X+1) / other_results[idx].nbPlayer
+                
+                ymin = min(ymin, np.min(Y))
+                if semilogx:
+                    plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                                 marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)
+                else:
+                    plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[curve_idx], 
+                                 marker=markers[curve_idx], markersize=5, markevery=(curve_idx / 50., 0.1), lw=1)               
+                
+        display_legend()
+        plt.xlabel("Number of rounds", fontsize=10)
+        plt.ylabel("Average reward over time", fontsize=10)
+#        plt.title("Individual Average Rewards Over Time", fontsize=10)
+
+        if save_data:
+            print("saving figure data...")
+            self.dump2disk()
+        
+        if save_fig:
+            print("saving figure...")
+            self.save_figure(file_name = "avg_result", fig=fig)
+
+        return fig        
+                    
+    def save_figure(self, file_name=None, formats={'pdf', 'png'}, fig=None):
+        now = datetime.now()
+        
+        for form in formats:               
+            path = prepare_file_name(file_name, self.alg_name, form)
+            try:
+                current_time = now.strftime("%H:%M:%S")
+                plt.savefig(path, bbox_inches="tight")
+                print("Figure saved! {} at {} ...".format(path, current_time))   
+            
+            except Exception as exc:
+                print("Could not save figure to {} due to error {}!".format(path, exc))  # DEBUG  
+                
\ No newline at end of file
diff --git a/Players.py b/Players.py
new file mode 100644
index 0000000..fa00380
--- /dev/null
+++ b/Players.py
@@ -0,0 +1,852 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+"""
+
+# This file defines the player behavior for a series of MP-MAB algorithms
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+
+from loggingutils import info_logger
+
+if __name__ == '__main__':
+    print("Warning: this script 'Player.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+class Player(object):
+    """ Base class for a player class."""
+
+    def __init__(self, param):
+        """ 
+        Base class for a player class.
+        For clarity, we require each child class to re-implement completely the __init__() method.
+        """
+        self.horizon = param["horizon"]  #: if the horizon is not known in advance, set it to None.
+        self.nbArm = param["nbArm"]
+                
+        #for arm of a specific context-player
+        self.context = param["context"]
+        self.playerID = param["playerID"]
+        
+        self.arm_estimate = np.zeros(self.nbArm)
+    
+    # --- Printing
+    def __str__(self):
+        return self.__class__.__name__    
+        
+    # --- functionalities
+    def explore(self, context = None, time = None):
+        print("decision() should be implemented for agent adopting a particular algorithm.")
+        
+    
+    def learn_arm_value(self, context = None, arm_values = None, collisions = None):
+        print("learn_arm_value() should be implemented for agent adopting a particular algorithm.")
+        
+    def exploit(self, context = None, time=None):
+        print("exploit() should be implemented for agent adopting a particular algorithm.")
+        
+    def reset(self):
+        print("reset() should be implemented for agent adopting a particular algorithm.")
+   
+class MusicChairPlayer(Player):
+    """ 
+    Class MusicChairPlayer for a player (agent) adopting the Music Chair algorithm.
+    Implemented based on the paper "Multi-Player Bandits – a Musical Chairs Approach", by Jonathan Rosenski and Ohad Shamir @2015 [Rosenski2015] 
+    (https://arxiv.org/abs/1512.02866).
+    Note that this algorithm is designed for multi-player only and for contextual bandit it adapts to the condition of unobservable context.
+    """    
+    
+    def __init__(self, param):
+        self.horizon = param["horizon"]  #: if the horizon is not known in advance, set it to None.
+        self.nbArm = param["nbArm"]
+                    
+        #for arm of a specific context-player
+        self.context = None # not used by the player
+        self.playerID = param["playerID"]
+        
+        if "epsilon" in param:
+            self.epsilon = param["epsilon"]
+        else:
+            self.epsilon = 0.1
+            
+        if "delta" in param:
+            self.delta = param["delta"]
+        else:
+            self.delta = 0.05
+            
+        self.accumulated_value = np.zeros(self.nbArm)
+        self.arm_estimate = np.zeros(self.nbArm) # \tilde{\mu}_i in [Rosenski2015]
+        self.nb_collision = 0 # number of observed collision, C_{T_0} in [Rosenski2015]
+        self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff, o_i in [Rosenski2015]
+        
+        if "T0" in param.keys() and param["T0"] > 0:
+            self.T0 = param["T0"]    
+        else:
+            self.T0 = self.get_optimalT0(self.nbArm, self.horizon, self.epsilon, self.delta)
+        
+        self.time = 0
+        
+        self.sorted_chair = None        
+        self.selected_arm = 0
+        
+        self.flag_seated = False
+        self.selected_chair = 0
+        self.estimated_nbPlayer = 0
+    
+    def reset(self):
+        self.accumulated_value = np.zeros(self.nbArm)
+        self.arm_estimate = np.zeros(self.nbArm) # \tilde{\mu}_i in [Rosenski2015]
+        self.nb_collision = 0 # number of observed collision, C_{T_0} in [Rosenski2015]
+        self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff, o_i in [Rosenski2015]
+        
+        self.time = 0
+        
+        self.sorted_chair = None        
+        self.selected_arm = 0
+        
+        self.flag_seated = False
+        self.selected_chair = 0
+        self.estimated_nbPlayer = 0
+        
+        
+    def get_optimalT0(self, nbArms, horizon=None, epsilon=0.1, delta=0.05):
+        """ 
+        Estimate T0 for an error probability delta and a bound of gap between the rewards of N-th best arm and the (N+1)-th best arm. 
+        The method is based on Theorem 1 of [Rosenski2015], which requires knowing the number of arms in the game. 
+        
+        Equation:
+            \begin{equation}
+                T_0 = \ceil{\max (\frac{K}{2})\ln(\frac{2K^2}{\delta}), \frac{16K}{\epsilon^2}\ln(\frac{4K^2}{\delta}, \frac{K^2\log(\frac{2}{\delta})}{0.02})   }
+            \end{equation}
+        
+        Remark: note that the last term \frac{K^2\log(\frac{2}{\delta})}{0.02} was written in [Rosenski2015] as \frac{K^2\log(\frac{2}{\delta_2})}{0.02}, which is a typo.
+        $\delta_2$ should be $\delta$, since $\frac{K^2\log(\frac{2}{\delta_2})}{0.02}$ is derived from $t\ge \frac{\log(2/delta)}{2\epsilon_1^2}$, where 
+        $\epsilon_1^2\ge \frac{0.01}{K^2}$.
+        
+        Examples:
+    
+        - For K arms, in order to have a constant regret with error probability delta, with the gap condition epsilon, we have     
+        (1) optimalT0(2, None, 0.1, 0.05) = 18459
+        (2) optimalT0(6, None, 0.01, 0.05) = 76469
+        (3) optimalT0(17, None, 0.01, 0.05) = 273317
+        """        
+    
+        T0_1 = (nbArms / 2.) * np.log(2 * nbArms**2 / delta)
+        T0_2 = ((16 * nbArms) / (epsilon**2)) * np.log(4 * nbArms**2 / delta)
+        T0_3 = (nbArms**2 * np.log(2 / delta)) / 0.02   # delta**2 or delta_2 ? Typing mistake in their paper
+        T0 = max(T0_1, T0_2, T0_3)
+        
+        if horizon is None:
+            raise Exception("the total number of rounds is not known.")
+        elif T0>= horizon:
+            raise Exception("the total number of rounds is too small for exploration.")
+    
+        return int(np.ceil(T0))
+    
+    def explore(self, context = None, time = None):
+        if time is None or time != self.time:
+            raise Exception("Playing round does not match.")
+            
+        #update time
+        self.time = time + 1
+        
+        if self.time <= self.T0:
+            #pahse of exploration
+            self.selected_arm = np.random.randint(self.nbArm)
+            
+        return self.selected_arm
+    
+    def learn_arm_value(self, context = None, arm_values = None, collisions = None):
+        # context is not used in this algorithm
+        # must be called after explore
+        if len(arm_values) != self.nbArm or len(collisions) != self.nbArm:
+            raise Exception("inputs are invalid.")   
+        
+        if self.time <= self.T0:
+            # get the reward of exploration phase
+            if collisions[self.selected_arm] > 1:
+                #selects an arm with collision
+                self.nb_collision = self.nb_collision + 1
+            else:
+                armID = self.selected_arm
+                self.nb_observation[armID] = self.nb_observation[armID] + 1                
+                self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID]      
+    
+    def exploit(self, context = None, time=None):
+        if time is None or time != self.time:
+            raise Exception("Playing round does not match.")
+            
+        #update time
+        self.time = time + 1
+        
+        if self.time > self.T0 and self.time <=self.horizon:
+            if self.sorted_chair is None:
+                # prepare only once
+                for armID in range(self.nbArm):
+                    if self.nb_observation[armID] != 0:
+                        self.arm_estimate[armID] = self.accumulated_value[armID] / self.nb_observation[armID]       
+                        
+                # if the estimated player nubmer is not obtained, calculate it first
+                # Equation for N^* is given in Alg. 1 of [Rosenski2015]
+                self.estimated_nbPlayer = int(round(1 + np.log((self.T0 - self.nb_collision) / self.T0) / np.log(1. - 1. / self.nbArm)))
+                if self.estimated_nbPlayer > self.nbArm:
+                    self.estimated_nbPlayer = self.nbArm # force the number of players to be less than the number of arms
+                
+                # sort their index by empirical arm values (means) in decreasing order
+                sorted_arms = np.argsort(-self.arm_estimate)  # FIXED among the best M arms!
+                self.sorted_chair = sorted_arms[:self.estimated_nbPlayer]     
+                
+        if self.estimated_nbPlayer == 0:
+            raise Exception("estimated arm number is invalid.") 
+                
+        if self.flag_seated == False:            
+                self.selected_chair = np.random.randint(self.estimated_nbPlayer)
+                self.selected_arm = self.sorted_chair[self.selected_chair]
+        else:
+            pass
+        
+        return self.selected_arm
+    
+    def update_musical_chair(self, time = None, collisions = None):
+        if time is None or time <= self.T0:
+            raise Exception("Playing round does not match.")
+            
+        if self.flag_seated == False and collisions[self.selected_arm] == 1:
+            self.flag_seated = True             
+
+
+STATE_EXPLORE = 0
+STATE_LEARN = 1
+STATE_EXPLOIT = 2
+
+STATE_CONTENT = 0
+STATE_HOPEFUL = 1
+STATE_WATCHFUL = 2
+STATE_DISCONTENT = 3
+
+class TnEPlayer(Player):
+    """ 
+    Class TnEPlayer for a player (agent) adopting the trial-and-error algorithm.
+    Implemented for the paper "Distributed Learning for Interference Avoidance as aContextual Multi-player Multi-armed Bandit Game", 
+    by Wenbo Wang et al. [Wang2019]
+    """    
+    def __init__(self, param):
+        if "context_set" not in param.keys():
+            raise Exception("context set is not given")
+        else:
+            self.context_set = param["context_set"] # has to be larger than or equal to 1
+        
+        self.horizon = param["horizon"] if "horizon" in param.keys() else 0
+                    
+        #for arm of a specific context-player
+        self.playerID = param["playerID"]
+        self.nbArm = param["nbArm"]
+        
+        #used in Eq.(6) in [Wang2019]
+        self.xi = param["xi"] 
+        #used in Eq. (10) and Eq. (11) in [Wang2019]
+        self.epsilon = param["epsilon"] 
+        
+        self.rho = param["rho"] #no longer used in the new algorithm
+        
+        #log-linear function parameters, adopted from Young's paper "learning efficient Nash equilibrium in distributed systems"
+        self.alpha11 = -0.001 if param['alpha11'] is None else param['alpha11']# F(u)<1/2M
+        self.alpha12 = 0.1 if param['alpha12'] is None else param['alpha12']
+        
+        self.alpha21 = -0.01 if param['alpha21'] is None else param['alpha21']# G(u)<1/2
+        self.alpha22 = 0.5 if param['alpha22'] is None else param['alpha22']
+        
+        # Initialization
+        self.nb_observation = {}
+        self.accumulated_value = {}
+        self.arm_estimate = {}
+        
+        self.learning_state = {}
+#        self.visit_frequency = {}
+        self.ptbd_arm_value = {}
+        self.selected_arm = 0
+        
+        self.nb_state_visit = {}
+        self.nb_state_aligned = {}
+        
+        self.current_state = {}
+        self.reference_reward = {}
+        
+        self.best_policy = {}
+        
+        for context in self.context_set:
+            # for arm-value estimation
+            self.nb_observation[context] = np.zeros(self.nbArm)
+            self.accumulated_value[context] = np.zeros(self.nbArm)
+            # the static game is formulated on arm_estimate
+            self.arm_estimate[context] = np.zeros(self.nbArm)
+            
+            self.learning_state[context] = STATE_EXPLORE
+
+            self.ptbd_arm_value[context] = np.zeros(self.nbArm) # perturbed arm values
+        
+            self.nb_state_visit[context] = np.zeros((4, self.nbArm)) # for debugging purpose
+            self.nb_state_aligned[context] = np.zeros(self.nbArm)                        
+            """
+            One example of the intermediate states:
+            --- for a game of 2 arms, we have that for a given context (payoff is stored in self.reference_reward)
+            (0, 0, 0): Content, arm 0, payoff = 0,
+            (1, 0, 0): Hopeful, arm 0, payoff = 0,
+            (2, 0, 0): Watchful, arm 0, payoff = 0,
+            (3, 0, 0): Discontent, arm 0, payoff = 0,
+            
+            (0, 0, 1): Content, arm 0, payoff = arm-value,
+            (1, 0, 1): Hopeful, arm 0, payoff = arm-value,
+            (2, 0, 1): Watchful, arm 0, payoff = arm-value,
+            (3, 0, 1): Discontent, arm 0, payoff = arm-value,
+            
+            (0, 1, 0): Content, arm 1, payoff = 0,
+            (1, 1, 0): Hopeful, arm 1, payoff = 0,
+            (2, 1, 0): Watchful, arm 1, payoff = 0,
+            (3, 1, 0): Discontent, arm 1, payoff = 0,
+            
+            (0, 1, 1): Content, arm 1, payoff = arm-value,
+            (1, 1, 1): Hopeful, arm 1, payoff = arm-value,
+            (2, 1, 1): Watchful, arm 1, payoff = arm-value,
+            (3, 1, 1): Discontent, arm 1, payoff = arm-value,
+            
+            """
+            
+            self.current_state[context] = [STATE_DISCONTENT, 0] #set as a default 3-tuple: (mood, reference action, reference payoff = 0)
+            self.reference_reward[context] = 0# record the real reference reward of the state
+            
+            self.best_policy[context] = 0
+        
+    def reset(self):
+         for context in self.context_set:
+            # for arm-value estimation
+            self.nb_observation[context] = np.zeros(self.nbArm)
+            self.accumulated_value[context] = np.zeros(self.nbArm)
+            # the static game is formulated on arm_estimate
+            self.arm_estimate[context] = np.zeros(self.nbArm)
+            
+            self.learning_state[context] = STATE_EXPLORE
+            self.ptbd_arm_value[context] = np.zeros(self.nbArm) # perturbed arm values
+        
+            self.nb_state_visit[context] = np.zeros((4, self.nbArm))
+            self.nb_state_aligned[context] = np.zeros(self.nbArm)
+                        
+            #set as a default 3-tuple: (mood, reference action, reference payoff = 0 or none-zero)
+            self.current_state[context] = [STATE_DISCONTENT, 0] 
+            self.reference_reward[context] = 0 # record the real reference reward of the state
+            
+            self.best_policy[context] = 0
+        
+        
+    # --- functionalities
+    def explore(self, context=None, time=None):
+        """
+        explore() only update when no collision occurs on the selected arm, see Eq. (5) of [Wang2019]
+        will update the value in learn_arm_value()
+        """
+        assert self.learning_state[context] == STATE_EXPLORE, "learning state does not match"#debug
+            
+        self.selected_arm = np.random.randint(self.nbArm)
+        
+        return self.selected_arm
+    
+    def learn_arm_value(self, context=None, arm_values=None, collisions=None):
+        # must be called after explore
+        assert self.learning_state[context] == STATE_EXPLORE, "learning state does not match"#debug
+        assert len(arm_values) == self.nbArm and len(collisions) == self.nbArm, "inputs are invalid."        
+        assert collisions[self.selected_arm] != 0, "arm selection error."
+        
+        if collisions[self.selected_arm] == 1:
+            armID = self.selected_arm
+            self.nb_observation[context][armID] = self.nb_observation[context][armID] + 1 # obtain a new valid arm-value observation
+            self.accumulated_value[context][armID] = self.accumulated_value[context][armID] + arm_values[armID]
+            
+            self.arm_estimate[context][armID] = self.accumulated_value[context][armID]  / self.nb_observation[context][armID]
+        else:
+            pass # do not update
+            
+        return self.arm_estimate[context]
+    
+    def set_internal_state(self, context=None, input_state=STATE_EXPLORE):
+        # input_state: 0 --explore, 1 -- trial-and-error, 2 -- exploitation
+        if input_state < STATE_EXPLORE or input_state > STATE_EXPLOIT:
+            raise Exception("input state is invalid")
+                
+        if input_state == STATE_EXPLORE:
+            pass
+        elif input_state == STATE_LEARN:
+            self.ptbd_arm_value[context][:] = 0
+        elif input_state == STATE_EXPLOIT:
+            # do it once for all
+            self.get_best_policy(context)
+        else:
+            raise Exception("input is not valid.")
+                            
+        self.learning_state[context] = input_state
+        
+    
+    def perturb_estimated_payoff(self, context=None, epoch=None):
+        """
+        The perturbation of estimated arm values guarantees that there is a unique social optimal equialibrium for the static game.
+        See Proposition 3 in [Wang2019]
+        """
+        assert epoch is not None and epoch > 0, "the epoch index is invalid"
+        
+        #get a perturbation, which is only computed at the beginning of the learning phase in each each
+        perturbation = np.random.random_sample(self.nbArm) * self.xi/epoch        
+        assert len(perturbation) == self.nbArm, "the dimension of perturbation is invalid"
+        
+        self.ptbd_arm_value[context] = self.arm_estimate[context] + perturbation
+#        self.init_tne_states(context)
+
+        return self.ptbd_arm_value[context]
+
+    def init_tne_states(self, context=None, starting_state=None):
+        """
+        We have 4 states: Content (C), Hopeful (H), Watchful (W) and Discontent (D).
+        For each agent in a given context, the total # of local intermediate states is 4 * nbArm
+        
+        """
+        # if we turn (1) on, in each exploration phase the learning algorithm will only use the outcomes of game play in this epoch.
+        self.nb_state_visit[context] = np.zeros((4, self.nbArm)) # (1): tracks the frequency state visits
+        self.nb_state_aligned[context] = np.zeros(self.nbArm)
+        
+        # set as a default 3-tuple: (mood=discontent, reference action (arm)=0, reference payoff = 0 or zero)
+        if starting_state is None:
+            self.current_state[context] = [STATE_DISCONTENT, 0]
+            
+            # reference_reward records the real reference reward of the state, 
+            # initialization sets all players to select arm 0 so the reward is 0 due to collision
+            self.reference_reward[context] = 0    
+        else:
+            self.current_state[context] = starting_state
+            self.reference_reward[context] = 0
+    
+    def learn_policy(self, context=None, time=None):
+        #note that here time is not used   
+        assert context is not None, "context is not given" #debug
+        assert self.learning_state[context] == STATE_LEARN, "learning state does not match" #debug 
+        
+        self.selected_arm = self.update_static_game_action(context, self.current_state[context])
+        
+        return self.selected_arm            
+    
+    def update_static_game_action(self, context=None, current_state=None):
+        """
+        Update action in the static game according to Eq.(9)
+        """
+        if current_state[0] == STATE_CONTENT: # if content
+            #content, Eq. (9), experiment with prob. epsilon
+            seed = np.random.random_sample()
+            if seed > self.epsilon:
+                action = current_state[1]
+            else:
+                remaining_actions = list(range(self.nbArm))
+                remaining_actions.pop(current_state[1]) 
+                action_id =  np.random.randint(self.nbArm - 1) 
+                action = remaining_actions[action_id]
+                assert action != current_state[1], "sampled action is invalid."
+                                
+#                print("player {} taking action arm {}".format(self.playerID, action)) #debug                
+
+        elif current_state[0] == STATE_HOPEFUL or current_state[0] == STATE_WATCHFUL: # if hopeful or watchful
+            #hopeful or watchful
+            action = current_state[1] # do not change
+        elif current_state[0] == STATE_DISCONTENT: # if discontent
+            #discontent
+            action = np.random.randint(self.nbArm)
+            assert action >=0 and action < self.nbArm, "sampled action is invalid."
+        else:
+            raise Exception("the mood of the current state is invalid")
+            
+        return action
+            
+    def update_game_state(self, context, collisions):
+        """
+        Update the state of agent in the static game according to Alg. 2 in [Wang2019].
+        Note that self.current_state[context] is in the form of (mood, arm, value)
+        """
+        current_reward = 0 # this is the reward of the static game
+        if collisions[self.selected_arm] == 1:
+            current_reward = self.ptbd_arm_value[context][self.selected_arm]        
+        
+        if self.current_state[context][0] == STATE_CONTENT:# if content
+            # the current mood is content
+            if self.selected_arm != self.current_state[context][1]:
+                if current_reward > self.reference_reward[context]:
+                    G_delta_u = (self.alpha21 * (current_reward - self.reference_reward[context]) + self.alpha22)                
+                    threshold = self.epsilon ** G_delta_u
+                    
+                    #update according to Eq. (10) with probability                
+                    sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold])
+                    
+                    if sampled_result == 0:                                  
+                        self.current_state[context][1] = self.selected_arm #update reference action
+                        self.reference_reward[context] = current_reward
+                    else:
+                        pass
+                else:
+                    pass
+            else: # no experimenting
+                if current_reward > self.reference_reward[context]:
+                    self.current_state[context][0] = STATE_HOPEFUL # hopeful
+                elif current_reward < self.reference_reward[context]:
+                    self.current_state[context][0] = STATE_WATCHFUL # watchful
+                else: # current_reward == self.reference_reward[context]:
+                    pass # do nothing
+                
+        elif self.current_state[context][0] == STATE_HOPEFUL: # if hopeful
+            if current_reward > self.reference_reward[context]:
+                self.current_state[context][0] = STATE_CONTENT # set to content                
+                self.reference_reward[context] = current_reward                
+            elif current_reward == self.reference_reward[context]:
+                self.current_state[context][0] = STATE_CONTENT
+            else:# current_reward < self.reference_reward[context]:
+                self.current_state[context][0] = STATE_WATCHFUL # set to watchful  
+                
+        elif self.current_state[context][0] == STATE_WATCHFUL: # if watchful
+            if current_reward > self.reference_reward[context]:
+                self.current_state[context][0] = STATE_HOPEFUL # set to hopeful                
+            elif current_reward == self.reference_reward[context]:
+                self.current_state[context][0] = STATE_CONTENT
+            else:# current_reward < self.reference_reward[context]:
+                self.current_state[context][0] = STATE_DISCONTENT # set to discontent
+        
+        elif self.current_state[context][0] ==  STATE_DISCONTENT:
+            if current_reward == 0:
+                pass# remain discontent, keep exploring
+            else:                
+                F_u =  self.alpha11 * current_reward + self.alpha12 # update with the probability in Eq. (11)                                              
+                threshold = self.epsilon ** F_u 
+                
+                sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold])
+                if sampled_result == 0:                    
+                    self.current_state[context][0] = STATE_CONTENT
+                    self.current_state[context][1] = self.selected_arm #update reference action
+                    
+                    self.reference_reward[context] = current_reward     
+                else:
+                    pass #stay with the same state
+                        
+        else:
+            raise Exception("unexpected state.")
+                
+        #update the number of visited states
+        id_mood = self.current_state[context][0]
+        id_action = self.current_state[context][1]
+        
+        self.nb_state_visit[context][id_mood][id_action] = 1 + self.nb_state_visit[context][id_mood][id_action]
+        
+        if id_mood == STATE_CONTENT and self.reference_reward[context] == current_reward:
+            self.nb_state_aligned[context][id_action] = 1 + self.nb_state_aligned[context][id_action]
+        
+    def exploit(self, context = None, time=None):
+        assert context is not None, "context is None"
+        assert self.learning_state[context] == STATE_EXPLOIT, "learning state does not match"
+        assert time is not None, "time is None"
+        
+#        self.selected_arm = self.get_best_policy(context) # if turning this on, we'll compute the best policy each time
+        
+        self.selected_arm = self.best_policy[context]
+        return self.selected_arm #return the action
+        
+    def get_best_policy(self, context = None):
+        assert context is not None, "context is None"
+        
+        mat_frequency = self.nb_state_aligned[context] # only count the Content mood 
+                
+        id_max = np.argmax(mat_frequency) #over the remaining action/arm axis
+        
+        self.best_policy[context] = id_max
+        
+#        print("TnE - {}: Player {}: arm {}".format(context, self.playerID, id_max)) # debug
+        
+        return id_max
+    
+"""
+Implemented based on the method proposed in the paper, [Bistritz2019]
+"Game of Thrones: Fully Distributed Learning for Multi-Player Bandits", by Ilai Bistritz and Amir Leshem, 
+NeurIPS2019 
+"""
+class GoTPlayer(Player): # with almost the same structure of TnE
+    def __init__(self, param):        
+        self.horizon = param["horizon"] if "horizon" in param.keys() else 0
+                    
+        #for arm of a specific context-player
+        self.playerID = param["playerID"]
+        self.nbArm = param["nbArm"]
+        self.nbPlayer = param["nbPlayer"] # used for determining the probaibliy of intermediate state switching
+        
+        #used in Eq. (10) and Eq. (11) in [Wang2019]
+        self.epsilon = param["epsilon"] 
+        
+        # Initialization
+        self.nb_observation = np.zeros(self.nbArm)
+        self.accumulated_value = np.zeros(self.nbArm)
+        self.arm_estimate = np.zeros(self.nbArm)
+        
+        self.learning_state = STATE_EXPLORE
+
+        self.selected_arm = 0
+        self.nb_state_visit = np.zeros((2, self.nbArm))
+        
+        self.current_state = [STATE_DISCONTENT, 0]
+        
+        self.max_u = 1
+        self.best_policy = 0
+        
+        # requirement from [Bistritz2019], the discrepancy of sum of maximum value and the social-optimal value
+        self.c = 1.2 # this is an estimation
+        self.pert_factor = self.c * self.nbPlayer
+#        self.reference_reward = 0 # the current version of GoT doesn't need a reference reward        
+        
+    def reset(self):
+        self.nb_observation = np.zeros(self.nbArm)
+        self.accumulated_value = np.zeros(self.nbArm)
+            
+        # the static game is formulated on arm_estimate
+        self.arm_estimate = np.zeros(self.nbArm)
+            
+        self.learning_state = STATE_EXPLORE         
+        
+        self.selected_arm = 0
+        self.nb_state_visit = np.zeros((2, self.nbArm))
+                        
+        #set as a default 3-tuple: (mood, reference action, reference payoff = 0 or none-zero)
+        self.current_state = [STATE_DISCONTENT, 0] 
+#        self.reference_reward = 0 
+        
+        self.max_u = 1
+        self.best_policy = 0
+     
+    # --- functionalities
+    def explore(self, context = None, time = None):
+        """
+        we will update the estimated arm values in function learn_arm_value()
+        context and time are not used for this version
+        """
+        assert self.learning_state == STATE_EXPLORE, "learning state does not match"#debug
+            
+        self.selected_arm = np.random.randint(self.nbArm)
+        
+        return self.selected_arm
+    
+    def learn_arm_value(self, context = None, arm_values = None, collisions = None):
+        # must be called after explore
+        assert self.learning_state == STATE_EXPLORE, "learning state does not match"#debug
+        assert len(arm_values) == self.nbArm and len(collisions) == self.nbArm, "inputs are invalid"        
+        assert collisions[self.selected_arm] != 0, "arm selection error"
+        
+        if collisions[self.selected_arm] == 1:
+            armID = self.selected_arm
+            self.nb_observation[armID] = self.nb_observation[armID] + 1 # obtain a new valid arm-value observation
+            self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID]
+            
+            self.arm_estimate[armID] = self.accumulated_value[armID]  / self.nb_observation[armID]
+        else:
+            pass # do nothing
+            
+        return self.arm_estimate
+    
+    def set_internal_state(self, context=None, input_state=STATE_EXPLORE):
+        # GoT does not use context information
+        # input_state: 0 --explore, 1 -- trial-and-error, 2 -- exploitation
+        if input_state < STATE_EXPLORE or input_state > STATE_EXPLOIT:
+            raise Exception("input state is invalid")
+                
+        if input_state == STATE_EXPLORE:
+            pass
+        elif input_state == STATE_LEARN:
+            pass
+        elif input_state == STATE_EXPLOIT:
+            self.get_best_policy() # calculate once far all
+        else:
+            raise Exception("input is not valid.")
+                            
+        self.learning_state = input_state
+        
+    def initalize_static_game(self, epoch=None, context=None):   
+        """
+        State initialization is done in init_got_states,
+        this function is to be removed in the future
+        """
+        id_max_u = np.argmax(self.arm_estimate)
+        
+        self.max_u = self.arm_estimate[id_max_u]
+
+#        print("id {} - max u {}".format(id_max_u, self.max_u))# debug
+        
+    def init_got_states(self, context=None, starting_state=None):
+        """
+        We have 2 states: Content (C) and Discontent (D).
+        For each agent in each context, the total # of local intermediate state is 2 * nbArm
+        
+        
+        starting_state is used for initializing the state at the beginnning of the epoch
+        """
+        # if we turn (1) on, in each exploration phase the learning algorithm will only use the outcomes of game play in this epoch.
+        self.nb_state_visit = np.zeros((2, self.nbArm)) # (1): tracks the frequency of state visits
+                
+        if starting_state is None:
+            # set as a default 3-tuple: (mood=discontent, reference action (arm)=0, reference payoff = 0 or zero)
+            self.current_state = [STATE_DISCONTENT, 0]
+            
+            # reference_reward records the real reference reward of the state, 
+            # initialization sets all players to select arm 0 so the reward is 0 due to collision
+#            self.reference_reward = 0 
+        else:
+            self.current_state = starting_state
+#            self.reference_reward = 0 # need to learn and update the reference reward for the new static game
+
+    
+    def learn_policy(self, context=None, time=None):
+        #note that here time is not used   
+        assert self.learning_state == STATE_LEARN, "learning state does not match" #debug 
+        
+        self.selected_arm = self.update_static_game_action(None, self.current_state)
+        
+        return self.selected_arm            
+    
+    
+    def update_static_game_action(self, context=None, current_state=None):
+        """
+        Update action in the static game
+        """
+        if current_state[0] == STATE_CONTENT: # if content
+            #content, Eq. (8) Alg.2 of [Bistritz2019], experiment with prob. epsilon
+            tmp_factor = self.pert_factor # perturbation factor
+            
+            # sampling method 1
+            prob_no_change = 1 - self.epsilon**(tmp_factor)
+            prob_rand_action = self.epsilon**(tmp_factor) / (self.nbArm - 1)
+            
+            action_array = list(range(self.nbArm))
+            prob_array = np.zeros(self.nbArm)
+            prob_array[:] = prob_rand_action
+            prob_array[current_state[1]] = prob_no_change
+                        
+            action = np.random.choice(action_array, size=None, p=prob_array)      
+            
+            # sampling method 2
+#            seed = np.random.random_sample()
+#            if seed <= 1 - self.epsilon**(tmp_factor):
+#                # at content state a player does not experiment frequently
+#                action = current_state[1]
+#            else:
+#                remaining_actions = list(range(self.nbArm))
+#                remaining_actions.pop(current_state[1]) 
+#                action_id = np.random.randint(self.nbArm - 1) 
+#                action = remaining_actions[action_id]
+#                assert action != current_state[1], "sampled action is invalid."
+                                
+        elif current_state[0] == STATE_DISCONTENT: # if discontent
+            #discontent
+            action = np.random.randint(self.nbArm)
+            assert action >=0 and action < self.nbArm, "sampled action is invalid."
+        else:
+            raise Exception("the mood of the current state is invalid")
+            
+        return action
+
+    def update_game_state(self, context, collisions, flag_record_frequency=False):
+        """
+        Ignore any context. The GoT algorithm is designed for the MP-MAB in stochastic environment w/o context
+        """
+        current_reward = 0 # this is the reward of the static game
+        if collisions[self.selected_arm] == 1:
+            current_reward = self.arm_estimate[self.selected_arm]
+        elif collisions[self.selected_arm] == 0:
+            raise Exception("the collision is not correctly computed.") 
+        else:
+            current_reward = 0 # if there is a collision
+        
+        if self.current_state[0] == STATE_CONTENT:# if content
+            # the current mood is content
+            # check the current reward first
+            if current_reward <= 0:
+                self.current_state[0] = STATE_DISCONTENT
+                self.current_state[1] = self.selected_arm
+            else:
+                # current_reward > 0
+                if self.selected_arm == self.current_state[1]:
+                    # If the current action is the same as the reference action,
+                    # and utility > 0, then a content player remains content with probability 1
+                    pass # stay at the same state, w/ probability 1
+                elif self.selected_arm != self.current_state[1]:
+                    # set the probability
+                    threshold = current_reward / self.max_u * (self.epsilon**(self.max_u - current_reward))
+                    sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold])      
+                 
+                    if sampled_result == 0:
+                        self.current_state[0] = STATE_CONTENT
+                        self.current_state[1] = self.selected_arm       
+                        
+#                        info_logger().log_info('Player {}: action {} remains CONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold)) #debug                       
+                    else:
+                        self.current_state[0] = STATE_DISCONTENT
+                        self.current_state[1] = self.selected_arm       
+                        
+#                        info_logger().log_info('Player {}: action {} transit to DISCONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold))#debug 
+        
+        elif self.current_state[0] == STATE_DISCONTENT:
+            if current_reward <= 0:
+                self.current_state[0] = STATE_DISCONTENT
+                self.current_state[1] = self.selected_arm
+            else:                
+                threshold = current_reward / self.max_u * (self.epsilon**(self.max_u - current_reward))
+                sampled_result = np.random.choice([0, 1], size=None, p=[threshold, 1-threshold])
+                                 
+                if sampled_result == 0:
+                    self.current_state[0] = STATE_CONTENT
+                    self.current_state[1] = self.selected_arm
+                    
+#                    info_logger().log_info('Player {}: action {} transit to CONTENT with prob. {}'.format(self.playerID, self.selected_arm, threshold)) #debug
+                else:
+                    self.current_state[0] = STATE_DISCONTENT
+                    self.current_state[1] = self.selected_arm
+        else:
+            raise Exception("unexpected state.")
+
+        # only the last few rounds are considered to count toward the optimal policy
+        if flag_record_frequency == True:                
+            #update the number of visited states
+            id_mood = 0 if self.current_state[0] == STATE_CONTENT else 1
+            id_action = self.current_state[1]            
+
+            self.nb_state_visit[id_mood][id_action] = 1 + self.nb_state_visit[id_mood][id_action]
+        
+    def exploit(self, context = None, time=None):
+        assert time is not None, "time is None"
+        assert self.learning_state == STATE_EXPLOIT, "learning state does not match at iteration {}".format(time)
+                
+#        self.selected_arm = self.get_best_policy(context) # if turning this line on, we'll compute the best policy each time
+        
+        self.selected_arm = self.best_policy
+        return self.selected_arm #return the action
+        
+    def get_best_policy(self, context = None):       
+        mat_frequency = self.nb_state_visit[0,:] # over the mood axis, over CONTENT
+        assert np.shape(mat_frequency) == (self.nbArm,), "shape of frequency is wrong."
+                
+        id_max = np.argmax(mat_frequency) #over the remaining action/arm axis
+        
+        self.best_policy = id_max
+        
+#        info_logger().log_info("GoT - Player {}: frequency {} arm {}".format(self.playerID, mat_frequency, id_max)) #debug        
+        
+        return id_max
\ No newline at end of file
diff --git a/Players2.py b/Players2.py
new file mode 100644
index 0000000..e2fdc67
--- /dev/null
+++ b/Players2.py
@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+"""
+
+# This file defines the player behavior for the specific SOC-MP-MAB algorithms (SOC in MABAlgorithms2.py)
+# see also Players.py for other algorithms
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+from Players import Player
+
+if __name__ == '__main__':
+    print("Warning: this script 'PlayerResult2.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+
+class SOCPlayer(Player):
+
+    def __init__(self, param):
+        """ 
+        SOCPlayer is the player for the algorithm "stable orthogonal allocation (SOC)" proposed in
+        "Multi-player multi-armed bandits for stable allocation in heterogeneous ad-hoc networks", IEEE JSAC oct. 2019,
+        Sumit J. Darak and Manjesh K. Hanawal [Sumit2019].
+        
+        The algorithm is featured by a protocol explicitly resolving collisions with channel switching,
+        and the channel statistics (index) is learned based on upper confidence bound (UCB).
+        
+        Channel allocation is obtained through a master-slave allocation process. Social optimality is not guaranteed.
+        """
+        self.nbArm = param["nbArm"]
+                    
+        #for arm of a specific context-player
+        self.playerID = param["playerID"]
+    
+        self.flag_master = False
+        self.master_collision = np.zeros(2)
+        
+        self.flag_lock = False
+    
+        self.selected_arm = 0     
+        self.policy = -1 # set to an invalid value
+        
+        self.time = 0
+        self.accumulated_value = np.zeros(self.nbArm)
+        self.arm_score = np.zeros(self.nbArm) # for UCB score computation
+        self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff
+        self.ranked_armIDs = np.array(list(range(0, self.nbArm))) #ranked according to UCB score 
+        
+        self.flag_agree_switching = 0 # a 3-state flag, -1: not agree, 0: irrelavent, 1: agree
+        
+    def reset(self):
+        self.flag_master = False
+        self.flag_lock = False
+        self.flag_agree_switching = 0
+    
+        self.selected_arm = 0
+        self.policy = -1 # set to an invalid value
+        
+        self.time = 0        
+        
+        self.accumulated_value = np.zeros(self.nbArm)
+        self.arm_score = np.zeros(self.nbArm) # for UCB score computation
+        self.nb_observation = np.zeros(self.nbArm) # number of observed non-zero payoff
+        self.ranked_armIDs = np.array(list(range(0, self.nbArm))) #ranked according to UCB score 
+        self.master_collision[:] = 0
+        
+    # --- functionalities
+    def explore(self, context = None, time = None):
+        """
+        explore() is equivalent to the algorithm "Random Hopping" in [Sumit2019], 
+        it allows users to orthogonalize on channels through uniformly drawing action samples at random  
+        
+        flag_lock has to be set after observing the collision feedback
+        """
+        if self.flag_lock == True:
+            # choose the same action, do nothing
+            if self.policy == -1:
+                self.policy = self.selected_arm            
+        else:
+            self.selected_arm = np.random.randint(self.nbArm)
+            
+        return self.selected_arm    
+            
+    def learn_arm_value(self, context = None, arm_values = None, collisions = None):
+        # UCB score
+        if self.flag_agree_switching == -1:
+            # no arm is selected, this case happens when a slave node evacuates a channel
+            # to notify the master that it won't switch
+            pass
+        elif collisions[self.selected_arm] == 1:
+            # no collisions
+            self.flag_lock = True
+            
+            self.time = self.time + 1 # only increment when a good sample is obtained
+            
+            armID = self.selected_arm
+            self.nb_observation[armID] = self.nb_observation[armID] + 1                
+            self.accumulated_value[armID] = self.accumulated_value[armID] + arm_values[armID]
+            
+            # update UCB Scores
+            self.arm_score = self.accumulated_value / (self.nb_observation+1e-9) + np.sqrt(2*self.time / (self.nb_observation+1e-9))
+            # get the preference
+            self.ranked_armIDs = np.argsort(-self.arm_score)
+        
+    def exploit(self, context=None, time=None):
+        # SOC doesn't have a clear phase of exploitation, the players uses a collision avoidance-like
+        # protocol to explicitly allocate the channels among players
+        assert self.policy != -1, "policy is not obtained"
+        
+        self.selected_arm = self.policy        
+        return self.selected_arm
+            
+    def set_master(self, MB_id):
+        assert self.flag_lock == True, "the channel is not locked yet"
+        
+        # check if the MB_id is currently self.selected_arm        
+        if self.policy == MB_id:
+#            print("set_master(): master node ID {} at MB {}".format(self.playerID, MB_id)) # debugging
+            self.flag_master = True
+            # reset the recorder
+            self.master_collision[:] = 0
+        else:
+#            print("set_master: slave node ID {} at MB {}".format( self.playerID, MB_id)) # debugging
+            self.flag_master = False
+            
+        return self.flag_master
+        
+    def set_master_action(self, SB_id):
+        """
+        set the action of the master node (as the channel indicated by the current block ID)
+        """        
+        assert self.flag_lock == True, "the channel is not locked yet"
+        assert  self.flag_master == True, "not a master node"
+        assert self.policy == self.selected_arm, "action not aligned to policy"
+        
+        # get the ranked_arms without self.selected_arm
+        tmp_arm_rank = np.ndarray.tolist(self.ranked_armIDs)
+        
+        # see footnote 1 of [Sumit2019]
+        current_arm_rank = tmp_arm_rank.index(self.selected_arm)
+        tmp_arm_rank.pop(current_arm_rank)       
+        
+        if SB_id - 1 < current_arm_rank:              
+#            print("Master ID-{}: av-{:.2} ---> av-{:.2}".format(self.playerID, self.arm_score[self.selected_arm], 
+#                  self.arm_score[tmp_arm_rank[SB_id - 1]])) # debugging
+            master_arm_choice = tmp_arm_rank[SB_id-1]
+        else:
+            master_arm_choice = self.selected_arm
+            
+        # set policy to the currently reserved channel, signal over the new channel
+        self.policy = self.selected_arm
+        self.selected_arm = master_arm_choice
+        
+        return self.selected_arm, self.policy # new, old (MB)
+        
+    def decide_switching(self, subslot_id, target_arm=None):
+        # has to be called by a slave
+        assert self.flag_lock == True, "the channel is not locked yet"                
+        assert self.flag_master == False, "not a slave node."
+        assert self.policy != -1, "policy is not set"
+        
+        if subslot_id == 0:
+            # it is in a channel transmit (CT) sub-slot
+            assert target_arm is not None, "master arm choice not set"
+ 
+            if target_arm != self.selected_arm:
+                # not requested and do nothing                 
+                self.flag_agree_switching = 0 # not requested
+                
+#                print("Slave ID-{}: not requested {} ---> {}".format(self.playerID, self.selected_arm, target_arm)) # debugging
+            else:                            
+                arm_rank_list = np.ndarray.tolist(self.ranked_armIDs)
+                current_arm_rank = arm_rank_list.index(self.selected_arm)
+                requested_arm_rank = arm_rank_list.index(target_arm)
+                
+#                print("Slave ID-{}: av-{:.2} ---> av-{:.2}".format(self.playerID, self.arm_score[self.selected_arm],
+#                      self.arm_score[target_arm])) # debugging
+                
+                if requested_arm_rank < current_arm_rank:
+                    # if master_arm_choice has a higher score, switch
+                    self.flag_agree_switching = 1 # agreed   
+                    
+                    self.selected_arm = self.policy # choose the currently preferred arm
+                    self.policy = target_arm # update policy
+                    
+#                    print("UE-{} agrees: CH-{} to CH-{} w/ scores: {} to {}".format(self.playerID, 
+#                          self.selected_arm, target_arm, arm_rank_list[current_arm_rank], arm_rank_list[requested_arm_rank])) # debugging
+                else:
+                    # if master_arm_choice is worse than the current arm, refuse switching
+                    self.flag_agree_switching = -1 # refused
+                    # no change to policy  
+                    self.selected_arm = self.policy
+        else:
+            # it is in a channel switch (CS) sub-slot
+            if self.flag_agree_switching == -1:
+                # refuse swithcing, leave the channel for one slot
+                self.selected_arm = -1
+            else:
+                # if self.flag_agree_switching == 1: # agree to switch, stay on the channel to collide
+                # if self.flag_agree_switching == 0: # not requested to switch, stay on the channel
+                # transmit on the same channel or not affected
+                pass
+                    
+        return self.selected_arm
+    
+    def update_policy(self, subslot_id, collisions):
+        # the original paper does not specify when to stop updating the arm-value estimation
+        # so we aussme that it never stops        
+        assert self.flag_lock == True, "the channel is not locked yet"            
+        
+        # update actions
+        if subslot_id == 0:
+            # only update the master in CS slot        
+            if self.flag_master == True:
+                self.master_collision[0] = collisions[self.selected_arm]
+            else:
+                pass 
+        elif subslot_id == 1:
+            if self.flag_master == True:
+                self.master_collision[1] = collisions[self.selected_arm]
+                
+                #update policy and action, according to Fig.2 [Sumit2019]
+                if self.master_collision[0] > 1 and self.master_collision[1] > 1: # senario 1 (colliding twice): 
+                    # switching allowed
+                    self.policy = self.selected_arm
+                    
+#                    print("Master ID-{}: policy updated w/ switching".format(self.playerID)) # debugging
+                elif self.master_collision[0] == 1 and self.master_collision[1] == 1: # senario 3 (no collision, twice)
+                    self.policy = self.selected_arm
+                    
+#                    print("Master ID-{}: policy updated for vacant channel".format(self.playerID)) # debugging
+                else:
+                    # roll back
+#                    print("Master ID-{}: policy rolled back".format(self.playerID)) # debugging
+                    pass
+                
+                # reset the recorder
+                self.master_collision[:] = 0
+            else:   
+                # reset flag to "not requested"
+                self.flag_agree_switching = 0
+        else:
+            raise Exception("invalid sub-slot ID.")
+            
+        self.selected_arm = self.policy
\ No newline at end of file
diff --git a/envutils.py b/envutils.py
new file mode 100644
index 0000000..1d97069
--- /dev/null
+++ b/envutils.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+"""
+
+# This file defines the class Struct used in simu_config.py, 
+# and the automation method for arm parameter generation
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+
+if __name__ == '__main__':
+    print("Warning: this script 'envutils.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+
+class Struct(object):
+    """
+    Simple class for instantiating objects to add arbitrary attributes as variables.
+    Used for serializing configurations parameters.
+    Reference:
+        https://stackoverflow.com/questions/6198372/most-pythonic-way-to-provide-global-configuration-variables-in-config-py/43941592
+    """
+    def __init__(self, *args):
+        self.__header__ = str(args[0]) if args else None
+
+    def __repr__(self):
+        if self.__header__ is None:
+             return super(Struct, self).__repr__()
+        return self.__header__
+
+    def next(self):
+        """ Fake iteration functionality.
+        """
+        raise StopIteration
+
+    def __iter__(self):
+        """ Fake iteration functionality.
+        We skip magic attribues and Structs, and return the rest.
+        """
+        ks = self.__dict__.keys()
+        for k in ks:
+            if not k.startswith('__') and not isinstance(k, Struct):
+                yield getattr(self, k)
+
+    def __len__(self):
+        """ Don't count magic attributes or Structs.
+        """
+        ks = self.__dict__.keys()
+        return len([k for k in ks if not k.startswith('__')\
+                    and not isinstance(k, Struct)])
+        
+
+def uniform_means(nbContext=2, nbPlayers=2, nbArms=4, delta=0.05, lower=0., upper=1.):
+    """
+    Return a dictionary of lower and upper bounds of arm values, 
+    well spaced (needed for some algorithms that requires arm-values to be distrigushed) for uniform distribution:
+
+    - in [lower, upper],
+    - starting from lower + (upper-lower) * delta, up to lower + (upper-lower) * (1 - delta),
+    - and there is nbArms arms.
+
+    >>> np.array(uniformMeans(2, 0.1))
+    array([0.1, 0.9])
+    >>> np.array(uniformMeans(3, 0.1))
+    array([0.1, 0.5, 0.9])
+    >>> np.array(uniformMeans(9, 1 / (1. + 9)))
+    array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
+    """
+    assert nbPlayers >= 1, "Error: 'nbPlayers' = {} has to be >= 1.".format(nbPlayers)  # DEBUG
+    assert nbArms >= 1, "Error: 'nbArms' = {} has to be >= 1.".format(nbArms)  # DEBUG
+    assert nbArms >= nbPlayers, "Error: 'nbArms' has to be larger than 'nbPlayers'."
+    assert upper - lower > 0, "Error: 'upper - lower' = {:.3g} has to be > 0.".format(upper - lower)  # DEBUG
+    assert 0. < delta < 1., "Error: 'delta' = {:.3g} has to be in (0, 1).".format(delta)  # DEBUG
+    mus = lower + (upper-lower) * np.linspace(delta, 1 - delta, nbArms)
+
+    means = [];
+    for idPlayer in range(nbPlayers):
+        np.random.shuffle(mus)
+        means.append(mus)
+    return means
+
+
+def randomMeans(nbPlayers=2, nbArms=4, mingap=None, lower=0., upper=1.):
+    """Return a list of means of arms, randomly sampled uniformly in [lower, lower + amplitude], with a min gap >= mingap.
+
+    - All means will be different, except if ``mingap=None``, with a min gap > 0.
+
+    """
+    assert nbArms >= 1, "Error: 'nbArms' = {} has to be >= 1.".format(nbArms)  # DEBUG
+    assert upper - lower > 0, "Error: 'upper - lower' = {:.3g} has to be > 0.".format(upper - lower)  # DEBUG
+    mus = np.random.rand(nbArms)
+    if mingap is not None and mingap > 0:
+        assert (nbArms * mingap) < (upper - lower / 2.), "Error: 'mingap' = {:.3g} is too large, it might be impossible to find a vector of means with such a large gap for {} arms.".format(mingap, nbArms)  # DEBUG
+        
+        means = []
+        for idPlayer in range(nbPlayers):            
+            while np.min(np.abs(np.diff(mus))) <= mingap:  # Ensure a min gap > mingap
+                mus = np.random.rand(nbArms)
+            
+            mus = lower + (upper - lower) * mus
+            means.append(mus)
+   
+    return means
\ No newline at end of file
diff --git a/loggingutils.py b/loggingutils.py
new file mode 100644
index 0000000..88403c5
--- /dev/null
+++ b/loggingutils.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this
+code for research that results in publications, please cite our original
+article listed above.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+"""
+
+"""
+This file implement the logging module as the wrapper of the standard logging API provided by python.
+Please use the format, e.g., "info_logger().log_info("...")" to record the information of interest in a log file 
+stored in the path "$PWD/results"
+"""
+
+__author__ = "Wenbo Wang"
+
+import logging
+import os
+import functools
+
+from datetime import datetime
+
+
+def __singleton(class_):
+    """
+    Make <class_> a singleton class with only one single instance.
+    Note that it cannot prevent instantiation in multiple processes
+    """    
+    @functools.wraps(class_)
+    def wrapper_singleton(*args, **kwargs):
+        if wrapper_singleton.instance is None:
+#            print("wrapper_singleton.instance")
+            wrapper_singleton.instance = class_(*args, **kwargs)
+            
+        return wrapper_singleton.instance
+    
+    wrapper_singleton.instance = None
+    
+    return wrapper_singleton
+
+@__singleton
+class info_logger(object):
+    def __init__(self):                       
+        log_file_name = 'log'
+        # the logging module may be used by different process in the parallel mode
+        # for each process we create a single log file
+        process_id = os.getpid()
+        
+        now = datetime.now()
+        current_date = now.strftime("(%Y-%m-%d-%H-%M-%S)")
+        cwd = os.getcwd() # current directory    
+        logFilePath = "{}\{}\{}-{}-{}.log".format(cwd, "results", log_file_name, process_id, current_date)
+                 
+        # get the instance of logger
+        self.logger = logging.getLogger(log_file_name)        
+        self.logger.setLevel(logging.DEBUG)
+
+        #define the output format        
+        logging_format = logging.Formatter("[%(threadName)s, %(levelname)s] %(message)s")
+#        logging_format = logging.Formatter('%(name)s  %(asctime)s  %(levelname)-8s:%(message)s')
+ 
+        # file handler        
+        file_handler = logging.FileHandler(logFilePath, mode='w')
+        file_handler.setFormatter(logging_format)
+        file_handler.setLevel(logging.DEBUG)
+ 
+        self.logger.addHandler(file_handler)
+        
+        print("logger created @ {}".format(logFilePath))
+        self.log_info("logger created")
+
+    # for different levels of messages, we can also call the logger member directly
+    def log_info(self, msg):
+        self.logger.info(msg)
+ 
+    def log_debug(self, msg):
+        self.logger.debug(msg)
+
+    def log_error(self, msg):
+        self.logger.error(msg)
+    
+if __name__ == '__main__':
+    print("Warning: this script 'loggingutils.py' is NOT executable..")  # DEBUG
+    exit(0)
+else:
+    # turn it on then we create one log file for each process before it is really needed
+#    fileLogger = info_logger()
+    pass
\ No newline at end of file
diff --git a/main_MPMAB.py b/main_MPMAB.py
new file mode 100644
index 0000000..dd5ef3c
--- /dev/null
+++ b/main_MPMAB.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+"""
+
+# Used for the simulations in the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed  
+# Bandwidth as a Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al.
+# This file is the main entrance of all the simulations except that for those w.r.t. network sizes.
+
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+import pandas as pd
+
+import time
+import datetime
+import argparse
+
+from GameEvaluator import AlgEvaluator
+from plotutils import plot_data_frame, plot_repeated_simu_results
+
+import simu_config as CONFIG
+
+def simulation_execution(game_config):        
+    """
+    simulation_execution() is the main body of the MP-MAP algorithm simulations
+    """    
+    print("MAB game with configuration '{}' starts to play...".format(game_config.__repr__()))
+
+    game_horizon = game_config.game_horizon
+    alg_engine = AlgEvaluator(game_config.env_config)   
+
+    #add algorithms
+    for alg_id in range(len(game_config.alg_types)):
+        alg_engine.add_algorithm(algo_type=game_config.alg_types[alg_id], 
+                                 custome_params=game_config.alg_configs[alg_id])
+    
+    print("MAB game prepares the environment for arm type '{}' of {} rounds".format(game_config.env_config['env_type'], game_horizon))
+    alg_engine.prepare_arm_samples()
+    
+    # simulation 1: reward plotting to compare the efficiency of the algorithms
+    if "enable_efficiency_simulation" in game_config.__dict__ and game_config.enable_efficiency_simulation:
+        start_time_oneshot = time.time()
+                
+        #######################################################################
+        #
+        if game_config.flag_parallel != True:
+#            print("starting single-process simulation...")
+            alg_engine.play_game(flag_progress_bar=game_config.flag_progress_bar)    
+        else:
+#            print("starting parallel simulation...")
+            alg_engine.play_game_parallel(flag_progress_bar=game_config.flag_progress_bar)
+        #
+        #######################################################################
+            
+        alg_engine.plot_rewards(save_fig = game_config.flag_save_figure, save_data = game_config.save_data)        
+        
+        # printing
+        running_time = time.time() - start_time_oneshot           
+        print("Single-shot simulation completes in {} for {} iterations.".format( \
+                datetime.timedelta(seconds=running_time), game_horizon))
+    
+    # simulation 2/3/4: plotting regret or total rewards over horizon
+    if ("enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation) or \
+       ("enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation) or \
+       ("enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation):    
+        start = game_config.T_start
+        nb_point = game_config.T_step
+        
+        horizon_list = np.exp(np.linspace(np.log(start), np.log(game_horizon), nb_point))
+        simu_rounds = game_config.T_simu_rounds
+        
+        start_time_repeated = time.time()
+        
+        #######################################################################
+        #
+        if game_config.flag_parallel != True:
+#            print("starting single-process simulation...")
+            simulation_results = alg_engine.play_repeated_game(horizon_list, simulation_rounds=simu_rounds, 
+                                                               flag_progress_bar=game_config.flag_progress_bar)                        
+        else:
+#            print("starting parallel simulation...")
+            simulation_results = alg_engine.play_repeated_game_parallel(horizon_list, simulation_rounds=simu_rounds,
+                                                                        flag_progress_bar=game_config.flag_progress_bar)
+        #
+        #######################################################################
+          
+        # printing
+        running_time = time.time() - start_time_repeated    
+        print("Repeated simulation completes in {} with maximum horizon {} in {} rounds of plays...".format(\
+              datetime.timedelta(seconds=running_time), game_horizon, simu_rounds))
+            
+        # virtualization for simulation 2
+        if "enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation:
+            # locate the reference algorithm
+            optimal_alg_id = 0
+            
+            len_horizon = simulation_results['horizon'].shape[1]
+            time_series = np.empty((0, len_horizon))
+            alg_indicator_series = []
+    
+            avg_regret_series = np.empty((0, len_horizon))
+            for alg_id in range(len(simulation_results['algorithm_name'])):
+                if alg_id != optimal_alg_id:
+                    # the returned value simulation_results['reward_series'] is organized as an array:
+                    # (len(algorithm_ids), simulation_rounds*len(horizon_list))
+                    horizon_series = simulation_results['horizon'][alg_id,:]
+                    avg_regret = (simulation_results['reward_series'][optimal_alg_id,:] - 
+                              simulation_results['reward_series'][alg_id,:]) / horizon_series
+                
+                    avg_regret_series = np.append(avg_regret_series, avg_regret) # flatten
+                    time_series = np.append(time_series, horizon_series)
+                    
+                    alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
+                    
+            prepared_results = {}                
+            prepared_results['Average regret'] = avg_regret_series            
+            prepared_results['Total number of plays'] = time_series        
+            prepared_results['Algorithms'] = alg_indicator_series
+                
+            simu_data_frame = pd.DataFrame(prepared_results)
+            
+            # plot and save the figure    
+            file_name = "monte_carlo_regret" if game_config.flag_save_figure==True else None        
+            sns_figure_unused, repeated_play_data_name = plot_data_frame(simu_data_frame, 
+                            xlabel="Total number of plays", ylabel="Average regret", huelabel='Algorithms', 
+                            save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
+            
+            # post processing, add the theoretical bound to the figure
+            flag_bound = False
+            if hasattr(game_config, 'flag_regret_bound'):
+                flag_bound = game_config.flag_regret_bound
+            else:
+                flag_bound = False
+                
+            plot_repeated_simu_results(start=start, horzion=game_horizon, nbPoints=nb_point, flag_bound=flag_bound,
+                                       data_file_name=repeated_play_data_name)            
+        
+        # virtualization for simulation 3
+        if "enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation:          
+            len_horizon = simulation_results['horizon'].shape[1]
+            time_series = np.empty((0, len_horizon))
+            alg_indicator_series = []
+            
+            reward_series = np.array([])
+            for alg_id in range(len(simulation_results['algorithm_name'])):
+                horizon_series = simulation_results['horizon'][alg_id,:]
+                avg_rewards = simulation_results['reward_series'][alg_id, :] / horizon_series
+                
+                reward_series = np.append(reward_series, avg_rewards) # flatten
+                time_series = np.append(time_series, horizon_series)
+                alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
+                
+            prepared_results = {}                
+            prepared_results['Average sum of rewards'] = reward_series            
+            prepared_results['Total number of plays'] = time_series        
+            prepared_results['Algorithms'] = alg_indicator_series
+            
+            simu_data_frame = pd.DataFrame(prepared_results)
+            
+            #plot and save the figure    
+            file_name = "monte_carlo_rewards" if game_config.flag_save_figure==True else None        
+            plot_data_frame(simu_data_frame, 
+                            xlabel="Total number of plays", ylabel="Average sum of rewards", huelabel='Algorithms', 
+                            flag_semilogx = False,
+                            save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
+            
+        # virtualization for simulation 4    
+        if "enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation:
+            len_horizon = simulation_results['horizon'].shape[1]
+            time_series = np.empty((0, len_horizon))
+            alg_indicator_series = []
+            
+            switching_series = np.array([])
+            collision_series = np.array([])
+                       
+            for alg_id in range(len(simulation_results['algorithm_name'])):
+                horizon_series = simulation_results['horizon'][alg_id,:]
+                switching = simulation_results['switching_count_series'][alg_id, :]
+                collisions = simulation_results['collision_series'][alg_id, :]
+                
+                switching_series = np.append(switching_series, switching) # flatten
+                collision_series = np.append(collision_series, collisions) # flatten
+                
+                time_series = np.append(time_series, horizon_series)
+                alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
+                
+            prepared_results = {}                
+            prepared_results['Accumulated switching counts'] = switching_series
+            prepared_results['Accumulated collision counts'] = collision_series
+            prepared_results['Total number of plays'] = time_series        
+            prepared_results['Algorithms'] = alg_indicator_series
+            
+            assert len(switching_series) == len(collision_series), "switching array must be of the same length: {}, {}".format(
+                    len(switching_series), len(collision_series))
+            
+            simu_data_frame = pd.DataFrame(prepared_results)
+            
+            #plot and save the figure: 1
+            file_name = "monte_carlo_switching" if game_config.flag_save_figure==True else None        
+            plot_data_frame(simu_data_frame, 
+                            xlabel="Total number of plays", ylabel="Accumulated switching counts", huelabel='Algorithms', 
+                            flag_semilogx = False,
+                            save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
+            
+            #plot and save the figure: 2
+            file_name = "monte_carlo_collision" if game_config.flag_save_figure==True else None        
+            plot_data_frame(simu_data_frame, 
+                            xlabel="Total number of plays", ylabel="Accumulated collision counts", huelabel='Algorithms', 
+                            flag_semilogx = False,
+                            save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)            
+
+
+if __name__ == '__main__':    
+    """
+    Parallel processing is suggested to be turned on for repeated simulations (see simu_config.py)
+    It is approximately 2X to 4X faster in terms of the total time than the single-process simulation
+    """
+    arg_parser = argparse.ArgumentParser(description='Select a configuration set in \'simu_config.py\' to run the simulations')    
+    # Add the arguments
+    arg_parser.add_argument('-id',  metavar='ID', type=int,
+                           help='Choose the configuration ID between [1-13], see the summary of simu_config.py')
+    args = arg_parser.parse_args()  
+
+    if args.id is None:
+        # default choice of configuration for a simulation
+        game_config = CONFIG.ENV_SCENARIO_7 # 
+    else:
+        if args.id in CONFIG.CONFIGURATION_DICT.keys():
+            game_config =  CONFIG.CONFIGURATION_DICT[args.id]
+        else:
+            raise Exception('the input configuration ID is not valid')      
+    
+    # beginning of the game
+    start_time = time.time()# record the starting time of the simulation, start simulations
+
+    simulation_execution(game_config)
+        
+    #end of the game
+    running_time = time.time() - start_time    
+    print("Simulation completes in {}.".format(datetime.timedelta(seconds=running_time)))
diff --git a/main_MPMAB_IoT_Simu.py b/main_MPMAB_IoT_Simu.py
new file mode 100644
index 0000000..74513c2
--- /dev/null
+++ b/main_MPMAB_IoT_Simu.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+"""
+
+# Used for the simulations in the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed  
+# Bandwidth as a Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al.
+# This file is the main entrance of the simulations regarding the network performance vs. network scale.
+
+__author__ = "Wenbo Wang"
+
+# This file is the main entrance of the simulations for algorithm performance w.r.t. network sizes.
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+import pandas as pd
+
+import time
+import datetime
+import sys
+#import argparse
+
+from GameEvaluator import AlgEvaluator
+from plotutils import plot_data_frame
+
+from envutils import Struct as Section
+
+def simulation_execution(alg_engine, game_config, player_number, game_horizon, simu_rounds, flag_parallel=False):        
+    """
+    simulation_execution() is the main body of the MP-MAP algorithm simulations
+    """    
+    
+#    print("number of arms: {}, number of players: {}".format(alg_engine.nbArms, alg_engine.nbPlayers))
+
+    #add algorithms
+    for alg_id in range(len(game_config.alg_types)):
+        alg_engine.add_algorithm(algo_type=game_config.alg_types[alg_id], 
+                                 custome_params=game_config.alg_configs[alg_id])
+    
+    if flag_parallel == True:
+        simulation_results = alg_engine.play_repeated_game_parallel([game_horizon], simulation_rounds=simu_rounds,
+                                                                flag_progress_bar=True)
+    else:
+        # for large network, we use seuqnecial processing in order to avoid overwhelming the memory 
+        simulation_results = alg_engine.play_repeated_game([game_horizon], simulation_rounds=simu_rounds,
+                                                                flag_progress_bar=True)
+            
+    
+    network_size_indicator_series = []
+    alg_indicator_series = []
+    reward_series = np.array([])
+    switching_series = np.array([])
+    collision_series = np.array([])
+    
+#    print("size of simulation results, rewards: {}".format(np.shape(simulation_results['reward_series'])))
+#    print("length of simulation_results: {}".format(len(simulation_results['algorithm_name'])))
+    
+    for alg_id in range(len(simulation_results['algorithm_name'])):
+         avg_rewards = simulation_results['reward_series'][alg_id, :] / game_horizon                  
+         switching = simulation_results['switching_count_series'][alg_id, :]
+         collisions = simulation_results['collision_series'][alg_id, :]
+         
+         network_sizes = np.zeros(avg_rewards.shape)
+         network_sizes[:] = player_number
+                
+         reward_series = np.append(reward_series, avg_rewards) # flatten
+         switching_series = np.append(switching_series, switching) # flatten
+         collision_series = np.append(collision_series, collisions) # flatten
+         alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * simu_rounds)
+         network_size_indicator_series.extend(network_sizes)
+       
+                
+    prepared_results = {}                
+    prepared_results['Sum of rewards'] = reward_series            
+    prepared_results['Node Number'] = network_size_indicator_series     
+    prepared_results['Accumulated switching counts'] = switching_series
+    prepared_results['Accumulated collision counts'] = collision_series
+    prepared_results['Algorithms'] = alg_indicator_series
+    
+#    print("length: {}, {}, {}, {}, {}".format(len(reward_series), len(network_size_indicator_series),
+#          len(switching_series), len(collision_series), len(alg_indicator_series)))
+            
+    simu_data_frame = pd.DataFrame(prepared_results)
+            
+    
+    return simu_data_frame
+            
+
+def simulation_plot_results(input_data_frame):
+    #plot and save the figure: 1
+    file_name = "network_switching"
+    plot_data_frame(input_data_frame, 
+                    xlabel="Node Number", ylabel="Accumulated switching counts", huelabel='Algorithms', 
+                    flag_semilogx = False,
+                    save_file_name=file_name, save_data_name=None)
+            
+    #plot and save the figure: 2
+    file_name = "network_collision"
+    plot_data_frame(input_data_frame, 
+                    xlabel="Node Number", ylabel="Accumulated collision counts", huelabel='Algorithms', 
+                    flag_semilogx = False,
+                    save_file_name=file_name, save_data_name=None)          
+    
+    file_name = "network_rewards"
+    plot_data_frame(input_data_frame, 
+                    xlabel="Node Number", ylabel="Sum of rewards", huelabel='Algorithms', 
+                    flag_semilogx = False,
+                    save_file_name=file_name, save_data_name=None)
+
+if __name__ == '__main__':    
+    """
+    Parallel processing is turned off by default. 
+    Unless the machine memory is sufficiently large, we may have a risk of running out of memory 
+    for a large network scale.
+    """
+    yes = {'yes','y', 'ye', 'Y'}
+    no = {'no','n', 'N'}
+    
+    print("This simulation takes more than 10 hrs. \nDo you want to continue? [y/n]")
+    while True:
+        input_choice = input().lower()
+        if input_choice in yes:
+           break
+        elif input_choice in no:
+           print('execution is terminated.')
+           sys.exit()
+        else:
+           print("Please respond with 'yes' or 'no'")
+    
+    game_horizon = 400000
+    simu_rounds = 40
+  
+    max_player_no = 30 # the more nodes we have, the longer horizon we need for find a social-optimal allocation.
+    
+    player_numbers = np.linspace(5, max_player_no, 6)  #Example: [max_player_no, 25, 20, 15, 10, 5]
+    max_arm_number = max_player_no + 1 # to save some memory
+    
+    env_config = {'horizon': game_horizon,
+                  'arm number': max_arm_number,
+                  'player number': max_player_no,
+                  'context set': {"context 1", "context 2", "context 3"},#
+                  'env_type': 'HetNet simulator', # change the underlying distribution here
+                  'enabel mmWave': True,
+                  'cell range': 250,
+                  'context_prob': {'context 1': 2, 'context 2': 1, 'context 3': 1},
+                  'los_prob':  {'context 1': 1.5, 'context 2': 2, 'context 3': 1}
+                  }
+        
+    # generate the arm-value sequence for only once
+    alg_engine = AlgEvaluator(env_config)  
+    alg_engine.prepare_arm_samples()
+        
+    game_config = Section("Simulation of HetNet: reward evolution for 4 algorithms")    
+    game_config.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Thrones'] #, 
+  
+    # beginning of the game
+    start_time = time.time()# record the starting time of the simulation, start simulations
+    data_frame = []
+    for player_no in player_numbers:        
+        num_players = int(player_no)
+        
+        # be sure that the value of the two constant variables satisfiy the condition in Theorem 2 of [Wang2020]
+        alpha11 = -0.40/num_players
+        alpha12 = 0.45/num_players
+        
+        game_config.alg_configs = [None,
+                                  {"delta": 0.02, "exploration_time": 4000},
+                                  {"c1": 2000, "c2": 10000,"c3":3000, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                     "alpha11": alpha11, "alpha12": alpha12, "alpha21": -0.39, "alpha22": 0.4,},
+                                  {"c1": 2000, "c2": 10000,"c3":3000, "epsilon": 0.01, "delta": 1.5},                                 
+                                  ]  
+        
+        #set the arm number to be used in the simulation
+        alg_engine.reset_player_number(num_players)
+        alg_engine.reset_arm_number(num_players + 1)   
+        alg_engine.clear_algorithms()
+        
+        if player_no >= 10:
+            temp_simu_data_frame = simulation_execution(alg_engine, game_config, num_players, game_horizon, simu_rounds) 
+        else:
+            # There is always a risk of overwhelming the memory capacity with parallel processing, especially when num_players > 15 
+            # Set the last parameter to True to enable parallel processing
+            temp_simu_data_frame = simulation_execution(alg_engine, game_config, num_players, game_horizon, simu_rounds, False) 
+
+        data_frame.append(temp_simu_data_frame)
+
+    #end of the numerical simulation        
+    input_data = pd.concat(data_frame)        
+    running_time = time.time() - start_time    
+    print("Simulation completes in {}.".format(datetime.timedelta(seconds=running_time)))
+
+    #plotting figures
+    simulation_plot_results(input_data)
\ No newline at end of file
diff --git a/obsolete/UniformArm.py b/obsolete/UniformArm.py
new file mode 100644
index 0000000..cd370f3
--- /dev/null
+++ b/obsolete/UniformArm.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 27 14:15:17 2019
+
+Partially inspired by the project SMPyBandits. This file defines the running framework of the bandit simulation.
+"""
+
+""" 
+Uniformly distributed arm in [0, 1], or [lower, upper]_context, for each context.
+
+Example of creating an arm:
+
+>>> import random; import numpy as np
+>>> random.seed(0); np.random.seed(0)
+>>> Unif01 = UniformArm(0, 1)
+>>> Unif01
+U(0, 1)
+>>> Unif01.mean
+0.5
+
+Examples of sampling from an arm:
+
+>>> Unif01.draw()  # doctest: +ELLIPSIS
+0.8444...
+>>> Unif01.draw_nparray(20)  # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE
+array([0.54... , 0.71..., 0.60..., 0.54..., 0.42... ,
+       0.64..., 0.43..., 0.89...  , 0.96..., 0.38...,
+       0.79..., 0.52..., 0.56..., 0.92..., 0.07...,
+       0.08... , 0.02... , 0.83..., 0.77..., 0.87...])
+"""
+from __future__ import division, print_function  # Python 2 compatibility
+
+__author__ = "Wenbo Wang"
+__version__ = "0.6"
+
+from random import random
+from numpy.random import random as nprandom
+
+# Local imports
+try:
+    from .Arm import Arm
+except ImportError:
+    from Arm import Arm
+
+
+class UniformArm(Arm):
+    """ Uniformly distributed arm, default in [0, 1],
+
+    - default to (mini, maxi),
+    - or [lower, lower + amplitude], if (lower=lower, amplitude=amplitude) is given.
+
+    >>> arm_0_1 = UniformArm()
+    >>> arm_0_10 = UniformArm(0, 10)  # maxi = 10
+    >>> arm_2_4 = UniformArm(2, 4)
+    >>> arm_m10_10 = UniformArm(-10, 10)  # also UniformArm(lower=-10, amplitude=20)
+    """
+
+    def __init__(self, lower=0., upper=1., context_set):
+        """New arm."""
+        self.lower = lower  #: Lower value of rewards, corresponding to array of states
+        self.upper = upper  #: Upper value of rewards
+        self.amplitude = upper - lower  #: Amplitude of value of rewards
+        self.context_set = context_set
+        
+        self.amplitude = upper - lower  #: Amplitude of rewards
+        self.mean = (self.lower + self.upper) / 2.0  #: Mean for this UniformArm arm
+
+    # --- Random samples
+
+    def draw(self, t=None):
+        """ Draw one random sample. The parameter t is ignored in this Arm."""
+        shape = (1, len(self.context_set))
+        return self.lower + (nprandom(shape) * self.amplitude)
+
+    # --- Printing
+
+    def __str__(self):
+        return "UniformArm"
+
+    def __repr__(self):
+        return "U({:.3g}, {:.3g})".format(self.lower, self.upper)
+
+    # --- Lower bound
+
+    @staticmethod
+    def kl(x, y):
+        """ The kl(x, y) to use for this arm."""
+        return klBern(x, y)
+
+    @staticmethod
+    def oneLR(mumax, mu):
+        """ One term of the Lai & Robbins lower bound for UniformArm arms: (mumax - mu) / KL(mu, mumax). """
+        return (mumax - mu) / klBern(mu, mumax)
+
+
+__all__ = ["UniformArm"]
+
+
+# --- Debugging
+
+if __name__ == "__main__":
+    # Code for debugging purposes.
+    from doctest import testmod
+    print("\nTesting automatically all the docstring written in each functions of this module :")
+    testmod(verbose=True)
\ No newline at end of file
diff --git a/obsolete/collision_models.py b/obsolete/collision_models.py
new file mode 100644
index 0000000..c0867b5
--- /dev/null
+++ b/obsolete/collision_models.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Nov 26 20:31:54 2019
+
+@author: wenbo2017
+"""
+
+"""
+Partially inspired by the project SMPyBandits. This file defines the running framework of the bandit simulation.
+This file defines the reward generation and collision resolution method "collision_models".
+to be extended to other types of collisions, currently only the non-colliding player is rewarded with non-zero value
+"""
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+
+def onlyRewardNoCollision(t, arms, players, choices, pulls, collisions):
+    """ Simple collision model where only the players alone on one arm samples it and receives the reward.
+    
+    - The numpy array 'choices' is the choices of players choosing arms
+    - Collision should be rewarded 0
+    """
+
+    nb_collisions = np.bincount(choices, minlength=len(arms))
+
+    for i, player in enumerate(players):  # Loop over the player set
+        # pulls counts the number of selection, not the number of successful selection.
+        pulls[i, choices[i]] += 1
+        if nb_collisions[choices[i]] <= 1:  # No collision
+            player.getReward(choices[i])  # Observing reward
+        else:
+            collisions[choices[i]] += 1  # Should be counted here, onlyUniqUserGetsReward
+            # handleCollision_or_getZeroReward(player, choices[i])  # NOPE
+            player.getCollisionReward(choices[i])
+            
+# Default collision model to use
+defaultCollisionModel = onlyRewardNoCollision
+
+
+#: List of possible collision models
+collision_models = [
+    onlyRewardNoCollision,
+]
\ No newline at end of file
diff --git a/obsolete/plotsettings.py b/obsolete/plotsettings.py
new file mode 100644
index 0000000..1bfa6fb
--- /dev/null
+++ b/obsolete/plotsettings.py
@@ -0,0 +1,458 @@
+# -*- coding: utf-8 -*-
+""" plotsettings: use it like this, in the Environment folder:
+
+>>> import sys; sys.path.insert(0, '..')
+>>> from .plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, wraptext, wraplatex, legend, show_and_save, nrows_ncols
+"""
+from __future__ import division, print_function  # Python 2 compatibility
+
+__author__ = "Lilian Besson"
+__version__ = "0.9"
+
+from textwrap import wrap
+import os.path
+
+import matplotlib as mpl
+# mpl.use('Agg')  # XXX is it a good idea? Nope, use "export MPLBACKEND='Agg'" in your bashrc ... Cf. http://stackoverflow.com/a/4935945/ and http://matplotlib.org/faq/usage_faq.html#what-is-a-backend
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mtick
+
+import numpy as np
+import seaborn as sns
+
+# Customize here if you want a signature on the titles or xlabel, of each plot
+from datetime import datetime
+import locale  # See this bug, http://numba.pydata.org/numba-doc/dev/user/faq.html#llvm-locale-bug
+locale.setlocale(locale.LC_TIME, 'C')
+monthyear = "{:%b.%Y}".format(datetime.today()).title()  #: Month.Year date
+
+from os import getenv
+
+# Backup figure objects
+from pickle import dump as pickle_dump
+
+if getenv('DEBUG', 'False') == 'True':
+    signature = "\n(By Lilian Besson, {}, cf. SMPyBandits.GitHub.io - MIT Licensed)".format(monthyear)  #: A small string to use as a signature
+else:
+    signature = ""
+
+DPI = 120  #: DPI to use for the figures
+# FIGSIZE = (19.80, 10.80)  #: Figure size, in inches!
+FIGSIZE = (16, 9)  #: Figure size, in inches!
+# FIGSIZE = (12.4, 7)  #: Figure size, in inches!
+# FIGSIZE = (8, 6)  #: Figure size, in inches!
+# FIGSIZE = (8, 4.5)  #: Figure size, in inches!
+
+# Customize the colormap
+HLS = True  #: Use the HLS mapping, or HUSL mapping
+VIRIDIS = False  #: Use the Viridis colormap
+
+# Bbox in inches. Only the given portion of the figure is saved. If 'tight', try to figure out the tight bbox of the figure.
+BBOX_INCHES = "tight"  #: Use this parameter for bbox
+BBOX_INCHES = None
+
+if __name__ != '__main__':
+    # use a clever color palette, eg http://seaborn.pydata.org/api.html#color-palettes
+    sns.set(context="talk", style="whitegrid", palette="hls" if HLS else "husl", font="sans-serif", font_scale=0.95)
+
+    # Use tex by default http://matplotlib.org/2.0.0/users/dflt_style_changes.html#math-text
+    # mpl.rcParams['text.usetex'] = True  # XXX force use of LaTeX
+    mpl.rcParams['font.family'] = "sans-serif"
+    mpl.rcParams['font.sans-serif'] = "DejaVu Sans"
+    mpl.rcParams['mathtext.fontset'] = "cm"
+    mpl.rcParams['mathtext.rm'] = "serif"
+
+    # Configure size for axes and x and y labels
+    # Cf. https://stackoverflow.com/a/12444777/
+    mpl.rcParams['axes.labelsize']  = "small"
+    mpl.rcParams['xtick.labelsize'] = "x-small"
+    mpl.rcParams['ytick.labelsize'] = "x-small"
+    mpl.rcParams['figure.titlesize'] = "small"
+
+    # Configure the DPI of all images, once and for all!
+    mpl.rcParams['figure.dpi'] = DPI
+    # print(" - Setting dpi of all figures to", DPI, "...")  # DEBUG
+
+    # Configure figure size, even of if saved directly and not displayed, use HD screen
+    # cf. https://en.wikipedia.org/wiki/Computer_display_standard
+    mpl.rcParams['figure.figsize'] = FIGSIZE
+    # print(" - Setting 'figsize' of all figures to", FIGSIZE, "...")  # DEBUG
+
+    # XXX Set up a discrete version of the Viridis map for axes.prop_cycle
+
+
+def palette(nb, hls=HLS, viridis=VIRIDIS):
+    """ Use a smart palette from seaborn, for nb different plots on the same figure.
+
+    - Ref: http://seaborn.pydata.org/generated/seaborn.hls_palette.html#seaborn.hls_palette
+
+    >>> palette(10, hls=True)  # doctest: +ELLIPSIS
+    [(0.86..., 0.37..., 0.33...), (0.86...,.65..., 0.33...), (0.78..., 0.86...,.33...), (0.49..., 0.86...,.33...), (0.33..., 0.86...,.46...), (0.33..., 0.86...,.74...), (0.33..., 0.68..., 0.86...) (0.33..., 0.40..., 0.86...) (0.56..., 0.33..., 0.86...) (0.84..., 0.33..., 0.86...)]
+    >>> palette(10, hls=False)  # doctest: +ELLIPSIS
+    [[0.96..., 0.44..., 0.53...], [0.88..., 0.52..., 0.19...], [0.71..., 0.60..., 0.19...], [0.54..., 0.65..., 0.19...], [0.19..., 0.69..., 0.34...], [0.20..., 0.68..., 0.58...],[0.21..., 0.67..., 0.69...], [0.22..., 0.65..., 0.84...], [0.55..., 0.57..., 0.95...], [0.85..., 0.44..., 0.95...]]
+    >>> palette(10, viridis=True)  # doctest: +ELLIPSIS
+    [(0.28..., 0.13..., 0.44...), (0.26..., 0.24..., 0.52...), (0.22..., 0.34..., 0.54...), (0.17..., 0.43..., 0.55...), (0.14..., 0.52..., 0.55...), (0.11..., 0.60..., 0.54...), (0.16..., 0.69..., 0.49...), (0.31..., 0.77..., 0.41...), (0.52..., 0.83..., 0.28...), (0.76..., 0.87..., 0.13...)]
+
+    - To visualize:
+
+    >>> sns.palplot(palette(10, hls=True))  # doctest: +SKIP
+    >>> sns.palplot(palette(10, hls=False))  # use HUSL by default  # doctest: +SKIP
+    >>> sns.palplot(palette(10, viridis=True))  # doctest: +SKIP
+    """
+    if viridis:
+        return sns.color_palette('viridis', nb)
+    else:
+        return sns.hls_palette(nb + 1)[:nb] if hls else sns.husl_palette(nb + 1)[:nb]
+
+
+def makemarkers(nb):
+    """ Give a list of cycling markers. See http://matplotlib.org/api/markers_api.html
+
+    .. note:: This what I consider the *optimal* sequence of markers, they are clearly differentiable one from another and all are pretty.
+
+    Examples:
+
+    >>> makemarkers(7)
+    ['o', 'D', 'v', 'p', '<', 's', '^']
+    >>> makemarkers(12)
+    ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>', 'o', 'D']
+    """
+    allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>']
+    longlist = allmarkers * (1 + int(nb / float(len(allmarkers))))  # Cycle the good number of time
+    return longlist[:nb]  # Truncate
+
+
+#: Default parameter for legend(): if True, the legend is placed at the right side of the figure, not on it.
+#: This is almost mandatory for plots with more than 10 algorithms (good for experimenting, bad for publications).
+PUTATRIGHT = True
+PUTATRIGHT = False
+
+#: Shrink factor if the legend is displayed on the right of the plot.
+#:
+#: .. warning:: I still don't really understand how this works. Just manually decrease if the legend takes more space (i.e., more algorithms with longer names)
+SHRINKFACTOR = 0.60
+SHRINKFACTOR = 0.65
+SHRINKFACTOR = 0.70
+SHRINKFACTOR = 0.75
+
+#: Default parameter for maximum number of label to display in the legend INSIDE the figure
+MAXNBOFLABELINFIGURE = 8
+
+
+def legend(putatright=PUTATRIGHT, fontsize="xx-small",
+        shrinkfactor=SHRINKFACTOR, maxnboflabelinfigure=MAXNBOFLABELINFIGURE,
+        fig=None, title=None
+    ):
+    """plt.legend() with good options, cf. http://matplotlib.org/users/recipes.html#transparent-fancy-legends.
+
+    - It can place the legend to the right also, see https://stackoverflow.com/a/4701285/.
+    """
+    try:
+        len_leg = len(plt.gca().get_legend_handles_labels()[1])
+        putatright = len_leg > maxnboflabelinfigure
+        if len_leg > maxnboflabelinfigure: print("Warning: forcing to use putatright = {} because there is {} items in the legend.".format(putatright, len_leg))  # DEBUG
+    except (ValueError, AttributeError, IndexError) as e:
+        # print("    e =", e)  # DEBUG
+        pass
+    if fig is None:
+        # fig = plt.gcf()
+        fig = plt  # HACK
+    if putatright:
+        try:
+            # Shrink current axis by 20% on xaxis and 10% on yaxis
+            delta_rect = (1. - shrinkfactor)/6.25
+            # XXX rect = [left, bottom, right, top] in normalized (0, 1) figure coordinates.
+            fig.tight_layout(rect=[delta_rect, delta_rect, shrinkfactor, 1 - 2*delta_rect])
+            # Put a legend to the right of the current axis
+            fig.legend(loc='center left', numpoints=1, fancybox=True, framealpha=0.8, bbox_to_anchor=(1, 0.5), title=title, fontsize=fontsize)
+        except:
+            fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize)
+    else:
+        fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize)
+
+
+def maximizeWindow():
+    """ Experimental function to try to maximize a plot.
+
+    - Tries as well as possible to maximize the figure.
+    - Cf. https://stackoverflow.com/q/12439588/
+
+    .. warning:: This function is still experimental, but "it works on my machine" so I keep it.
+    """
+    # plt.show(block=True)
+    # plt.tight_layout()
+    figManager = plt.get_current_fig_manager()
+    try:
+        figManager.window.showMaximized()
+    except Exception:
+        try:
+            figManager.frame.Maximize(True)
+        except Exception:
+            try:
+                figManager.window.state('zoomed')  # works fine on Windows!
+            except Exception:
+                try:
+                    figManager.full_screen_toggle()
+                except Exception:
+                    print("  Note: Unable to maximize window...")
+                    # plt.show()
+
+
+#: List of formats to use for saving the figures, by default.
+#: It is a smart idea to save in both a raster and vectorial formats
+FORMATS = ('png', 'pdf')
+# FORMATS = ('png', 'pdf', 'eps')
+# FORMATS = ('png', 'pdf', 'eps', 'svg')
+
+
+def show_and_save(showplot=True, savefig=None, formats=FORMATS, pickleit=False, fig=None):
+    """ Maximize the window if need to show it, save it if needed, and then show it or close it.
+
+    - Inspired by https://tomspur.blogspot.fr/2015/08/publication-ready-figures-with.html#Save-the-figure
+    """
+    if showplot:
+        maximizeWindow()
+    if savefig is not None:
+        if pickleit and fig is not None:
+            form = "pickle"
+            path = "{}.{}".format(savefig, form)
+            print("Saving raw figure with format {}, to file '{}'...".format(form, path))  # DEBUG
+            with open(path, "bw") as f:
+                pickle_dump(fig, f)
+            print("       Saved! '{}' created of size '{}b', at '{:%c}' ...".format(path, os.path.getsize(path), datetime.fromtimestamp(os.path.getatime(path))))
+        for form in formats:
+            path = "{}.{}".format(savefig, form)
+            print("Saving figure with format {}, to file '{}'...".format(form, path))  # DEBUG
+            try:
+                plt.savefig(path, bbox_inches=BBOX_INCHES)
+                print("       Saved! '{}' created of size '{}b', at '{:%c}' ...".format(path, os.path.getsize(path), datetime.fromtimestamp(os.path.getatime(path))))
+            except Exception as exc:
+                print("Error: could not save current figure to {} because of error {}... Skipping!".format(path, exc))  # DEBUG
+    try:
+        plt.show(block=True) if showplot else plt.close()
+    except (TypeError, AttributeError):
+        print("Failed to show the figure for some unknown reason...")  # DEBUG
+
+
+def add_percent_formatter(which="xaxis", amplitude=1.0, oldformatter="%.2g%%", formatter="{x:.1%}"):
+    """ Small function to use a Percentage formatter for xaxis or yaxis, of a certain amplitude.
+
+    - which can be "xaxis" or "yaxis",
+    - amplitude is a float, default to 1.
+
+    - More detail at http://stackoverflow.com/a/36320013/
+    - Not that the use of matplotlib.ticker.PercentFormatter require matplotlib >= 2.0.1
+    - But if not available, use matplotlib.ticker.StrMethodFormatter("{:.0%}") instead
+    """
+    # Which axis to use ?
+    if which == "xaxis":
+        ax = plt.axes().xaxis
+    elif which == "yaxis":
+        ax = plt.axes().yaxis
+    else:
+        raise ValueError("Unknown value '{}' for 'which' in function add_percent_formatter() : only xaxis,yaxis are accepted...".format(which))
+    # Which formatter to use ?
+    try:
+        my_frmt = mtick.StrMethodFormatter(formatter)  # Use new format string
+    except Exception:
+        my_frmt = mtick.FormatStrFormatter(oldformatter)  # Use old format string, better looking but not correctly scaled
+    if hasattr(mtick, 'PercentFormatter'):
+        my_frmt = mtick.PercentFormatter(amplitude)
+    # Use it!
+    ax.set_major_formatter(my_frmt)
+
+
+#: Default value for the ``width`` parameter for :func:`wraptext` and :func:`wraplatex`.
+WIDTH = 95
+
+
+def wraptext(text, width=WIDTH):
+    """ Wrap the text, using ``textwrap`` module, and ``width``."""
+    return "\n".join(wrap(text, width=width))
+
+
+def wraplatex(text, width=WIDTH):
+    """ Wrap the text, for LaTeX, using ``textwrap`` module, and ``width``."""
+    return "$\n$".join(wrap(text, width=width))
+
+
+def nrows_ncols(N):
+    """Return (nrows, ncols) to create a subplots for N plots of the good size.
+
+    >>> for N in range(1, 22):
+    ...     nrows, ncols = nrows_ncols(N)
+    ...     print("For N = {:>2}, {} rows and {} cols are enough.".format(N, nrows, ncols))
+    For N =  1, 1 rows and 1 cols are enough.
+    For N =  2, 2 rows and 1 cols are enough.
+    For N =  3, 2 rows and 2 cols are enough.
+    For N =  4, 2 rows and 2 cols are enough.
+    For N =  5, 3 rows and 2 cols are enough.
+    For N =  6, 3 rows and 2 cols are enough.
+    For N =  7, 3 rows and 3 cols are enough.
+    For N =  8, 3 rows and 3 cols are enough.
+    For N =  9, 3 rows and 3 cols are enough.
+    For N = 10, 4 rows and 3 cols are enough.
+    For N = 11, 4 rows and 3 cols are enough.
+    For N = 12, 4 rows and 3 cols are enough.
+    For N = 13, 4 rows and 4 cols are enough.
+    For N = 14, 4 rows and 4 cols are enough.
+    For N = 15, 4 rows and 4 cols are enough.
+    For N = 16, 4 rows and 4 cols are enough.
+    For N = 17, 5 rows and 4 cols are enough.
+    For N = 18, 5 rows and 4 cols are enough.
+    For N = 19, 5 rows and 4 cols are enough.
+    For N = 20, 5 rows and 4 cols are enough.
+    For N = 21, 5 rows and 5 cols are enough.
+    """
+    nrows = int(np.ceil(np.sqrt(N)))
+    ncols = N // nrows
+    while N > nrows * ncols:
+        ncols += 1
+    nrows, ncols = max(nrows, ncols), min(nrows, ncols)
+    return nrows, ncols
+
+
+def addTextForWorstCases(ax, n, bins, patches, rate=0.85, normed=False, fontsize=8):
+    """Add some text labels to the patches of an histogram, for the last 'rate'%.
+
+    Use it like this, to add labels for the bins in the 65% largest values n::
+
+        >>> n, bins, patches = plt.hist(...)
+        >>> addTextForWorstCases(ax, n, bins, patches, rate=0.65)
+    """
+    # DONE add an automatic detection of the cases where a regret was found to not be O(log(T)) to display on the histogram the count of bad cases
+    assert 0 <= rate <= 1, "Error: 'rate' = {:.3g} should be in [0, 1].".format(rate)  # DEBUG
+    if not isinstance(n, list) and not isinstance(n, np.ndarray):
+        n = [n]
+    if hasattr(patches, 'patches'):
+        # assert isinstance(patches, mpl.container.BarContainer)  # DEBUG
+        patches = patches.patches
+    if not isinstance(patches, list):
+        patches = [patches]
+    max_x = max(p.xy[0] for p in patches)
+    for nx, p in zip(n, patches):
+        text = "{:.3%}".format(nx) if normed else "{:.3g}".format(nx)
+        x, y = p.xy[0], 1.015 * nx  # 1.5% higher than the top of the patch rectangle
+        # Simple detection can be if a box is for a regret larger than some fraction of T
+        if nx > 0 and x > (rate * max_x):
+            # print("Writing text =", text, "at x =", x, "and y =", y)  # DEBUG
+            ax.text(x, y, text, fontsize=fontsize)
+
+
+def myviolinplot(*args, nonsymmetrical=False, **kwargs):
+    try:
+        return sns.violinplot(*args, nonsymmetrical=nonsymmetrical, cut=0, inner="stick", **kwargs)
+    except (TypeError, NameError):
+        return sns.violinplot(*args, cut=0, inner="stick", **kwargs)
+
+
+def violin_or_box_plot(data=None, labels=None, boxplot=False, **kwargs):
+    """ Automatically add labels to a box or violin plot.
+
+    .. warning:: Requires pandas (https://pandas.pydata.org/) to add the xlabel for violin plots.
+    """
+    if boxplot:
+        return plt.boxplot(data, labels=labels, showmeans=True, meanline=True, **kwargs)
+    if labels is not None:
+        try:
+            import pandas as pd
+            dict_of_data = {
+                label: column
+                for label, column in zip(labels, data)
+            }
+            df = pd.DataFrame(dict_of_data)
+            return myviolinplot(nonsymmetrical="left", data=df, orient="v", **kwargs)
+        except ImportError:
+            return violin_or_box_plot(data, boxplot=boxplot, **kwargs)
+    return myviolinplot(nonsymmetrical="left", data=data, orient="v", **kwargs)
+
+
+MAX_NB_OF_LABELS = 50  #: If more than MAX_NB_OF_LABELS labels have to be displayed on a boxplot, don't put a legend.
+
+
+def adjust_xticks_subplots(ylabel=None, labels=(), maxNbOfLabels=MAX_NB_OF_LABELS):
+    """Adjust the size of the xticks, and maybe change size of ylabel.
+
+    - See https://stackoverflow.com/a/37708190/
+    """
+    if len(labels) >= maxNbOfLabels:
+        return
+    max_length_of_labels = max([len(label) for label in labels])
+    locs, xticks_labels = plt.xticks()  # XXX don't name xticks_labels, labels or it erases the argument of the function and labels are not correctly displayed.
+    plt.xticks(locs, labels, rotation=80, verticalalignment="top", fontsize="xx-small")
+    if max_length_of_labels >= 50:
+        plt.subplots_adjust(bottom=max_length_of_labels/135.0)
+        if ylabel is not None: plt.ylabel(ylabel, fontsize="x-small")
+    else:
+        plt.subplots_adjust(bottom=max_length_of_labels/90.0)
+
+
+def table_to_latex(mean_data, std_data=None,
+        labels=None, fmt_function=None, name_of_table=None,
+        filename=None, erase_output=False,
+        *args, **kwargs
+    ):
+    """ Tries to print the data from the input array or collection of array or :class:`pandas.DataFrame` to the stdout and to the file ``filename`` (if it does not exist).
+
+    - Give ``std_data`` to print ``mean +- std`` instead of just ``mean`` from ``mean_data``,
+    - Give a list to ``labels`` to use a header of the table,
+    - Give a formatting function to ``fmt_function``, like :func:`IPython.core.magics.execution._format_time` to print running times, or :func:`memory_consumption.sizeof_fmt` to print memory usages, or ``lambda s: "{:.3g}".format(s)`` to print ``float`` values (default),
+    - Uses :func:`tabulate.tabulate` (https://bitbucket.org/astanin/python-tabulate/) or :func:`pandas.DataFrame.to_latex` (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_latex.html#pandas.DataFrame.to_latex).
+
+    .. warning:: FIXME this is still experimental! And useless, most of the time we simply do a copy/paste from the terminal to the LaTeX in the article...
+    """
+    if fmt_function is None:  fmt_function = lambda s: "{:.3g}".format(s)
+    output_string = None
+    input_data = mean_data
+    if std_data is not None:
+        format_data = np.vectorize(lambda xi, yi: r"{} \pm {}".format(fmt_function(xi), fmt_function(yi)))
+        input_data = format_data(mean_data, std_data)
+    else:
+        format_data = np.vectorize(fmt_function)
+        input_data = format_data(mean_data)
+    print("Using input_data of shape = {} and size = {}\n{}".format(np.shape(input_data), np.size(input_data), input_data))  # DEBUG
+    # 1. try with pandas module
+    try:
+        import pandas as pd
+        if labels is not None:
+            df = pd.DataFrame(input_data, columns=labels)
+        else:
+            df = pd.DataFrame(input_data)
+        output_string = df.to_latex(*args, **kwargs)
+    except ImportError:
+        print("Error: the pandas module is not available, install it with 'pip install pandas' or 'conda install pandas'.")  # DEBUG
+    # 2. if pandas failed, try with tabulate
+    if output_string is None:
+        try:
+            import tabulate
+            if labels is not None:
+                output_string = tabulate.tabulate(input_data, tablefmt="latex_raw", headers=labels, *args, **kwargs)
+            else:
+                output_string = tabulate.tabulate(input_data, tablefmt="latex_raw", *args, **kwargs)
+        except ImportError:
+            print("Error: the tabulate module is not available, install it with 'pip install tabulate' or 'conda install tabulate'.")  # DEBUG
+    if filename is not None and not erase_output and os.path.exists(filename):
+        print("Error: the file named '{}' already exists, and option 'erase_output' is False.".format(filename))
+        return -1
+    if name_of_table is not None:
+        output_string = r"""%% LaTeX code for a table, produced by SMPyBandits.Environment.plotsetting.table_to_latex()
+\begin{table}
+%s
+\caption{%s}
+\end{table}""" % (output_string, name_of_table)
+    print("\nThe data from object (shape = {} and size = {}) can be pretty printed in a LaTeX table looking like this one:".format(np.shape(input_data), np.size(input_data)))  # DEBUG
+    print(output_string)
+    if filename is not None:
+        print("\nThe data from object (shape = {} and size = {}) will be saved to the file {}...".format(np.shape(input_data), np.size(input_data), filename))  # DEBUG
+        with open(filename, 'w') as open_file:
+            print(output_string, file=open_file)
+    return 0
+
+
+# --- Debugging
+
+if __name__ == "__main__":
+    # Code for debugging purposes.
+    from doctest import testmod
+    print("\nTesting automatically all the docstring written in each functions of this module :")
+    testmod(verbose=True)
diff --git a/obsolete/simu_config_network_only.py b/obsolete/simu_config_network_only.py
new file mode 100644
index 0000000..f6be446
--- /dev/null
+++ b/obsolete/simu_config_network_only.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+"""
+
+# This file provides the configurations for each simulation.
+
+__author__ = "Wenbo Wang"
+
+from envutils import Struct as Section
+
+if __name__ == '__main__':
+    print("Warning: this script 'simu_config.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+###############################################################################
+# Section 1:
+# Define the algorithms that are used in the simulation
+###############################################################################
+ENV_ALG_SETTING_1 = Section("Simulation of HetNet: reward evolution for 5 algorithms")
+ENV_ALG_SETTING_1.game_horizon = 200000
+
+
+# Disable simulation for reward evolution in a single shot
+ENV_ALG_SETTING_1.enable_reward_simulation = True
+ENV_ALG_SETTING_1.enable_switching_simulation = True
+
+ENV_ALG_SETTING_1.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Throne', 'TnE Nonobservable'] #, 
+ENV_ALG_SETTING_1.alg_configs = [None,                               
+                              {"delta": 0.02, "exploration_time": 4000},
+                              {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.39, "alpha22": 0.4,},
+                              {"c1": 100, "c2": 300,"c3":200, "epsilon": 0.025, "delta": 1.5},
+                              {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.025, "delta": 1.5, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4, "observable": 0}
+                              ]
+                             
+# Experiment parameters
+ENV_ALG_SETTING_1.flag_save_figure = True
+ENV_ALG_SETTING_1.save_data = False
+
+# Experiment parameters
+ENV_ALG_SETTING_1.T_repr_rounds = 40
+
+ENV_ALG_SETTING_1.repeated_play_data_name = 'reward_data_4_alg_HetNet'
+
+# Enable parallel processing
+ENV_ALG_SETTING_1.flag_parallel = True
+ENV_ALG_SETTING_1.flag_progress_bar = True
\ No newline at end of file
diff --git a/obsolete/test_PPP.py b/obsolete/test_PPP.py
new file mode 100644
index 0000000..737605f
--- /dev/null
+++ b/obsolete/test_PPP.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 18 11:16:15 2019
+
+@author: wenbo2017
+"""
+
+import scipy
+import numpy as np
+import matplotlib.pyplot as plt
+
+import simu_config as CONFIG
+#import matlab.engine
+
+#from HetNetSimulator import HomeBrewedHetNetEnv
+import argparse
+import os
+import sys
+
+from loggingutils import info_logger
+
+def PoissonPP( rt, Dx, Dy=None ):
+    '''
+    Determines the number of events `N` for a rectangular region,
+    given the rate `rt` and the dimensions, `Dx`, `Dy`.
+    Returns a <2xN> NumPy array.
+    '''
+    if Dy == None:
+        Dy = Dx
+        N = scipy.stats.poisson( rt*Dx*Dy ).rvs()
+        x = scipy.stats.uniform.rvs(0,Dx,((N,1)))
+        y = scipy.stats.uniform.rvs(0,Dy,((N,1)))
+        P = np.hstack((x,y))
+    return P
+
+if __name__ == '__main__':  
+#    rate, Dx = 10, 1
+#    P = PoissonPP( rate, Dx ).T
+#    fig, ax = plt.subplots()
+#    ax = fig.add_subplot(111)
+#    ax.scatter( P[0], P[1], edgecolor='b', facecolor='none', alpha=0.5 )
+#    # lengths of the axes are functions of `Dx`
+#    plt.xlim(0,Dx) ; plt.ylim(0,Dx)
+#    # label the axes and force a 1:1 aspect ratio
+##    plt.xlabel('X') ; plt.ylabel('Y') ; ax.set_aspect(1)
+#    plt.title('Poisson Process {}'.format(rate))
+##    savefig( 'poisson_lambda_0p2.png', fmt='png', dpi=100 )
+    
+    epsilon = 0.02
+    nbArm = 10
+    tmp_factor = 0.1
+    
+    current_action = 3
+    
+    for ii in range(10):
+        prob_no_change = 1 - epsilon**(tmp_factor)
+        prob_rand_action = epsilon**(tmp_factor) / (nbArm - 1)
+                
+        action_array = list(range(nbArm))
+        prob_array = np.zeros(nbArm)
+        prob_array[:] = prob_rand_action
+        prob_array[current_action] = prob_no_change
+                            
+        action = np.random.choice(action_array, size=None, p=prob_array) 
+        
+        print("new action: {}; prob_stay: {:.2}, prob_rnd_change: {:.2}".format(action, prob_no_change, prob_rand_action))
+    
+#    test_simulator = HomeBrewHetNetEnv({'context 1'}, 10, 10)
+#    test_simulator.initialize_UE(10, distance = 200, dist_mode = 0)
+#    
+#    test_simulator.helper_plot_ue_posiiton()
+#    bs_position = [1,2]
+#    bs_position = np.broadcast_to(bs_position, (10,2))
+#    
+#    print(bs_position)
+#    eng = matlab.engine.connect_matlab()
+#    eng.sqrt(4.0)
+    
+    C_set =  {"context 1", "context 2", "context 3"}
+    
+    my_logger = info_logger()      
+    my_logger.logger.debug("test message.")
+    
+    record_series = np.empty((0,4))
+    
+    record1 = np.array([1, 2, 3, 4])
+    record2 = np.array([0, 9, 8, 7])
+    
+    record_series = np.append(record_series, [record1], axis=0)
+    record_series = np.append(record_series, [record2], axis=0)
+    print(record_series)
+    print(record_series.shape)
+    
+    ret_rand = np.random.uniform(low=0.5, high=1.0, size=3)
+    print(ret_rand)
+    
+#    game_config = CONFIG.ENV_SCENARIO_3
+#    print("MAB game with configuration '{}' starts to play...".format(game_config.__repr__()))
+    my_parser = argparse.ArgumentParser(description='Select the configuration type to run the simulations')
+    
+    # Add the arguments
+    my_parser.add_argument('-id',
+                           metavar='ID',
+                           type=int,
+                           help='Choose the configuration ID between [1-6]')
+    
+    # Execute the parse_args() method
+    args = my_parser.parse_args()
+
+    if args.id is not None:
+        print ("id has been set to {}".format(args.id))
+    else:
+        args.id = 1
+        print ("id has been set to {}".format(args.id))
\ No newline at end of file
diff --git a/obsolete/test_main_MPMAB.py b/obsolete/test_main_MPMAB.py
new file mode 100644
index 0000000..8e1702a
--- /dev/null
+++ b/obsolete/test_main_MPMAB.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+
+This file tests the running framework of the bandit simulation
+"""
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+
+from MPMAB import MP_MAB
+from HetNetSimulator import HomeBrewedHetNetEnv
+from PlayResult import ResultMultiPlayers
+from MABAlgorithms import Hungarian, MusicalChairs, TrialandError, GameofThrone
+from Arms import *
+
+import time
+from tqdm import tqdm
+
+if __name__ == '__main__':
+    # test code
+    horizon = 10000# should not be less than 100000 for MC    
+
+    context_set = {"context 1", "context 2", "context 3"}
+    
+#    nb_player = 2
+#    nb_arms = 3
+#    dic_lower = {("context 1", 0): np.array([0., 0.5, 0.3]), ("context 2", 0): np.array([0.1, 0.2, 0.2]), ("context 3", 0): np.array([0., 0.2, 0.25]),
+#                     ("context 1", 1): np.array([0.1, 0.6, 0.2]), ("context 2", 1): np.array([0., 0., 0.]), ("context 3", 1): np.array([0.2, 0.1, 0.45])}
+#    dic_upper =  {("context 1", 0): np.array([0.5, 0.8, 0.6]), ("context 2", 0): np.array([1., 1., 0.4]), ("context 3", 0): np.array([1, 0.3, 0.65]),
+#                     ("context 1", 1): np.array([0.81, 0.96, 0.52]), ("context 2", 1): np.array([0.5, 0.4, 0.9]), ("context 3", 1): np.array([0.62, 0.21, 0.95])}
+
+    nb_player = 5
+    nb_arms = 6
+    
+    """ 
+    (1) Create an environment instance (e.g., with uniform arms) of the MPMAB    
+    """
+    hetnet_params = {'enabel mmWave': True,
+                             'horizon': horizon,
+                             'cell range': 200,
+                             'context_prob': {'context 1':2, 'context 2':1, 'context 3':1},
+                             'los_prob':  {'context 1':2, 'context 2':1, 'context 3':1}
+            }
+    multi_player_MAB = HomeBrewedHetNetEnv.HetNet_mab(context_set, nb_arms, nb_player, hetnet_params)
+#    multi_player_MAB = MP_MAB.gaussian_mab(context_set, nb_arms, nb_player, dic_lower, dic_upper)
+    
+    multi_player_MAB.prepare_samples(horizon)
+    multi_player_MAB.save_environment()
+
+    start_time = time.time()
+        
+    """
+    (2) Create Musical Chairs algorithm
+    """
+    alg_param_mc = {"nbPlayer": nb_player,
+                 "nbArm": nb_arms,
+                 "context_set": context_set,
+                 "horizon": horizon,
+                 "T0": 3000
+                 }
+    alg_MC = MusicalChairs(alg_param_mc)
+            
+    # to record the learning results of alg_MC
+    result_MC = ResultMultiPlayers("Musical Chair", context_set, nb_player, nb_arms, horizon)
+    
+    """
+    (3) Create Hungarian algorithm
+    """
+    alg_param_hungarian = {"nbPlayer": nb_player,
+                 "nbArm": nb_arms,
+                 "context_set": context_set
+            }
+    
+    alg_hungarian = Hungarian(alg_param_hungarian)
+    
+#    dic_pulls_on_means = dict()
+#    dic_total_rewards_on_means = dict()
+#    dic_sampled_rewards_on_means = dict()
+#    #get static allocation w.r.t. the means in each context
+#    for context in context_set:
+#        lower, upper, means, variance = multi_player_MAB.get_param(context)
+#        static_pulls, static_total_reward, static_sampled_rewards = alg_hungarian.learn_policy(means)
+#        
+#        dic_pulls_on_means[context] = static_pulls
+#        dic_total_rewards_on_means[context] = static_total_reward
+#        dic_sampled_rewards_on_means[context] = static_sampled_rewards
+#    
+#    #recorder of learning results
+#    # to store the centralized algorithm result of alg_hungarian
+    result_hungarian = ResultMultiPlayers("Instant Hungarian", context_set, nb_player, nb_arms, horizon) 
+#    result_hungarian_mean = ResultMultiPlayers("Hungarian", context_set, nb_player, nb_arms, horizon)
+    
+    """
+    (4) Create trial-and-error algorithm
+    """
+    alg_param_tne = {"nbPlayer": nb_player,
+                 "nbArm": nb_arms,
+                 "context_set": context_set,
+                 "horizon": horizon,
+                 "c1": 100, "c2": 200, "c3": 100, 
+                 "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4
+            }
+    alg_TnE = TrialandError(alg_param_tne)
+    # to store the centralized algorithm result of alg_hungarian
+    result_TnE = ResultMultiPlayers("Trial-n-Error", context_set, nb_player, nb_arms, horizon) 
+    
+    """
+    (5) Create game-of-throne algorithm
+    """
+    alg_param_got = {"nbPlayer": nb_player,
+                 "nbArm": nb_arms,
+                 "context_set": context_set,
+                 "horizon": horizon,
+                 "c1": 100, "c2": 200, "c3": 100, 
+                 "epsilon": 0.01, "delta": 2, "xi": 0.001,             
+            }
+           
+    alg_GoT = GameofThrone(alg_param_got)
+                        
+    result_GoT = ResultMultiPlayers("Game of Throne", context_set, nb_player, nb_arms, horizon) 
+    
+    # Main loop of learning
+    for t in tqdm(range(horizon)):
+        context, arm_values = multi_player_MAB.draw_sample(t)
+    
+        # Hungarian algoirthm over the instantaneous samples and results
+        pulls, total_reward, sampled_rewards = alg_hungarian.learn_policy(arm_values)        
+        choices = alg_hungarian.pulls2choices(pulls)
+        result_hungarian.store(t, context, choices, sampled_rewards, total_reward, pulls)
+        
+        # Hungarian algoirthm over the mean samples and results
+#        static_pulls = dic_pulls_on_means[context]
+#        static_choices = alg_hungarian.pulls2choices(static_pulls)
+#        static_reward = dic_sampled_rewards_on_means[context]
+#        static_total_reward = dic_total_rewards_on_means[context]
+#        result_hungarian_mean.store(t, context, static_choices, static_reward, static_total_reward, static_pulls)
+        
+        # Musical-chair algorithm over the  instantaneous samples and the learning results
+        pulls, total_reward, sampled_rewards = alg_MC.learn_policy(arm_values, context, t)
+        choices = alg_MC.pulls2choices(pulls)
+        collisions = alg_MC.resolve_collision(pulls)
+        result_MC.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions)
+        
+        # Trial-and-error algorithm over the  instantaneous samples and the learning results
+        pulls, total_reward, sampled_rewards = alg_TnE.learn_policy(arm_values, context, t)
+        choices = alg_TnE.pulls2choices(pulls)
+        collisions = alg_TnE.resolve_collision(pulls)
+        result_TnE.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions)
+        
+        # Game of Throne 
+        pulls, total_reward, sampled_rewards = alg_GoT.learn_policy(arm_values, context, t)
+        choices = alg_GoT.pulls2choices(pulls)
+        collisions = alg_GoT.resolve_collision(pulls)
+        result_GoT.store(t, context, choices, sampled_rewards, total_reward, pulls, collisions)
+
+    #end of play
+    running_time = time.time() - start_time
+    print("Simulation completes in {}s for {} rounds".format(running_time, horizon))
+    
+    # for debugging
+    print("Trial-and-error Algorithm: {} exploration rounds, {} learning rounds, {} exploitation rounds".format(alg_TnE.nbExploration, 
+          alg_TnE.nbTnE, alg_TnE.nbExploitation))
+        
+    print("Context 1: {}, Context 2: {}, Context 3: {}".format(result_MC.context_history.count("context 1"), 
+          result_MC.context_history.count("context 2"), 
+          result_MC.context_history.count("context 3")) )
+    
+#    result_hungarian.plot_cumu_rewards(other_results=[result_MC, result_TnE], save_fig=True, save_data=False)
+    result_hungarian.plot_avg_reward(other_results=[result_MC, result_GoT, result_TnE], save_fig=True, save_data=False)
+        
+        
+        
+        
+        
+        
+        
+        
+        
\ No newline at end of file
diff --git a/obsolete/test_parallel.py b/obsolete/test_parallel.py
new file mode 100644
index 0000000..ad3b65f
--- /dev/null
+++ b/obsolete/test_parallel.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Dec 20 12:36:37 2019
+
+@author: wenbo2017
+"""
+
+#import multiprocessing as mp
+#import time
+#
+#class someClass(object):
+#
+#  def __init__(self):
+#      self.var = 1
+#
+#  def test(self):
+#      print(self)
+#      print ("Variable value: {}".format(self.var))
+#      self.var += 1
+#      
+#  def apply_async_with_callback(self):
+#      pool = mp.Pool(processes = 3)
+#      for i in range(10):
+#          pool.apply_async(self.test) #, callback = self.log_result
+#         
+#      pool.close()
+#      pool.join()
+#
+#
+#if __name__ == '__main__':
+#    sc = someClass()
+#    
+#    sc.apply_async_with_callback()
+    
+
+#from multiprocessing import Pool
+#import time
+#from tqdm import *
+#
+#def _foo(my_number):
+#   square = my_number * my_number
+#   time.sleep(1)
+#   return square 
+#
+#if __name__ == '__main__':
+#    with Pool(processes=2) as p:
+#        max_ = 30
+#        with tqdm(total=max_) as pbar:
+#            for i, _ in tqdm(enumerate(p.imap_unordered(_foo, range(0, max_)))):
+#                pbar.update()
+
+
+import numpy as np
+
+import multiprocessing as mp
+from tqdm import tqdm
+from time import sleep
+
+SENTINEL = 1
+
+def test(q=None):
+    for i in range(1000):
+        sleep(0.01)
+        
+        if q is not None:
+            q.put(SENTINEL)
+
+def listener(q, nbProcess):
+    pbar = tqdm(total = 1000*nbProcess)
+    for item in iter(q.get, None):
+        pbar.update()
+
+if __name__ == '__main__':
+#    pool = mp.Pool(processes=5)
+#    manager = mp.Manager()
+#    queue = manager.Queue()
+#    
+#    proc = mp.Process(target=listener, args=(queue, 5))
+#    
+#    for ii in range(5):
+#        pool.apply_async(test, args=(queue, ))
+#        
+#    proc.start()
+#    pool.close()
+#    pool.join()
+#    queue.put(None)
+#    proc.join()
+#    
+#    print("process is done")
+#    c = np.array([])
+    c = None
+    
+    d = np.array([1, 2, 0, 4, 0])
+    
+    idx = np.where(d != 0)
+    
+    d[idx] = -1
+    
+    print(d)
+    
+    
+#    a = [0, 0, 0, 0, 0]
+#    
+#    arm_selected = np.nonzero(a)
+#    
+#    print(arm_selected[0])
+##    print(arm_selected[1])
+#    
+#    indx = np.where(a == 6)
+#    
+#    print(indx)
+#    print(indx[0].ndim)
+#    print(indx[0].shape)
+#    
+#    aa = np.array(a)
+#    
+#    aa[:] = 0
+#    
+#    b = np.array(list(range(0, 10)))
+#    print(b)
+#    
+#    print(aa)
+#    q = mp.Queue()
+#    proc = mp.Process(target=listener, args=(q,))
+#    proc.start()
+#    workers = [mp.Process(target=test, args=(q,)) for i in range(5)]
+#    for worker in workers:
+#        worker.start()
+#    for worker in workers:
+#        worker.join()
+#    q.put(None)
+#    proc.join()
\ No newline at end of file
diff --git a/obsolete/test_plot.py b/obsolete/test_plot.py
new file mode 100644
index 0000000..8d721f8
--- /dev/null
+++ b/obsolete/test_plot.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 11 10:06:46 2019
+
+@author: wenbo2017
+"""
+
+# testing plotting methods in 
+import seaborn as sns
+import numpy as np
+import pandas as pd
+
+#import matplotlib.pyplot as plt
+from plotutils import plot_data_frame, prepare_file_name, read_data_frame, plot_repeated_simu_results
+#from matplotlib.lines import Line2D
+import simu_config as CONFIG
+
+flag_test1 = False            
+ # start the simulation    
+if flag_test1 == True:
+    horizon_list = np.linspace(5000, 50000, 20)
+  
+    timepoints = []
+    alg_len = 3
+    regret_series = []
+    #Monte Carlo Simulation
+    simu_rounds = 300
+    
+    alg_types = [ii for ii in range(alg_len)]
+    
+    type_series = []
+
+    for simu_index in range(simu_rounds):
+        print("Simulation round {} of total rounds {}...".format(simu_index, simu_rounds))
+        # 2d array of payoff for a single simulation round
+        learned_total_payoff = np.zeros((alg_len, len(horizon_list)))
+        
+        horizon_index = 0
+        for horizon_index in range(len(horizon_list)):            
+            # example: for 3 algorithms, len(tmp_total_payoff) == 3
+            tmp_total_payoff = np.random.rand(alg_len)
+            
+            for alg_index in range(alg_len):
+                learned_total_payoff[alg_index][horizon_index] = tmp_total_payoff[alg_index] #/ horizon_list[horizon_index]
+                
+            type_series.extend(alg_types)
+            
+            tmp_time = [horizon_list[horizon_index]]*alg_len
+            
+            timepoints.extend(tmp_time)
+            
+            regret_series.extend(tmp_total_payoff)
+        
+    recorded_data = {}
+            
+    recorded_data['signal'] = regret_series
+        
+    recorded_data['time'] = timepoints
+    
+    recorded_data['algorithms'] = type_series
+    
+    my_data = pd.DataFrame(recorded_data)
+    
+#    sns.relplot(x="time", y="signal", hue = 'algorithms',
+#            kind="line", data=my_data, height=5, aspect=1.25 );
+    plot_data_frame(my_data, xlabel="time", ylabel="signal", huelabel='algorithms', save_file_name='test')
+
+flag_test2=False
+if flag_test2 == True:
+    T = np.linspace(start=25000, stop = 10000, num=20)
+    X = (5*2*np.log(T+2)**2 + 100*2*np.log(T+2))/T + 0.1
+    Label = ['$0.1+200\log(T+2)+10\log^2(T+2)$']*len(T)
+    
+    recorded_data = {}
+            
+    recorded_data['Total number of plays'] = T
+        
+    recorded_data['Average regret over time'] = X
+    
+    recorded_data['Algorithm'] = Label
+    
+    my_data = pd.DataFrame(recorded_data)
+    
+    colors = ["#4374B3"]
+    # Set your custom color palette
+    sns.set_palette(sns.color_palette(colors))
+    
+    file_name = "bound_regret"       
+    plot_data_frame(my_data, xlabel="Total number of plays", 
+                    ylabel="Average regret over time", huelabel='Algorithm', 
+                    save_file_name=file_name, save_data_Name='test_data')
+
+flag_test3=False
+if flag_test3==True:
+    plot_average_regret(start=25000, horzion=200000, nb_points=20)
+    
+    
+    regret_data = read_data_frame('regret_data_3_alg')   
+    
+    T = np.exp(np.linspace(start=np.log(45000), stop = np.log(300000), num=20))
+    X = (25*2*(np.log2(T+2)**2) + 100*2*np.log2(T+2)+10000)/T 
+    Label = ['$O(M\log_2^{\delta}(T))$']*len(T)
+    
+    Dash = [1]*len(T)
+    
+    T = np.append(regret_data['Total number of plays'], T)
+    X = np.append((regret_data['Average regret over time']), X)
+    Label = np.append((regret_data['Algorithm']), Label)
+    Dash = np.append([0]*len(regret_data['Algorithm']), Dash)
+    
+    recorded_data = {}            
+    recorded_data['Total number of plays'] = T        
+    recorded_data['Average regret over time'] = X    
+    recorded_data['Algorithms'] = Label
+    recorded_data['Dash'] = Label
+    
+    bound_data = pd.DataFrame(recorded_data)    
+    
+    g = plot_data_frame(bound_data, xlabel="Total number of plays", 
+                    ylabel="Average regret over time", huelabel='Algorithms')
+    
+    g.ax.lines[3].set_linestyle("--")
+    g.ax.lines[3].set_color("grey")
+    
+    g.ax.set(xscale="log")
+    
+    le = g.ax.legend()
+    le.get_lines()[4].set_color('grey')
+    le.get_lines()[4].set_linestyle("--")
+    le.get_frame().set_facecolor('none')    
+    le.get_frame().set_edgecolor('none')    
+    
+    file_path = prepare_file_name(file_name="monte_carlo_regret", ext_format='pdf', add_timestamp=False)
+    g.savefig(file_path)  
+    
+flag_test4=True
+if flag_test4==True:
+    game_config = CONFIG.ENV_SCENARIO_3
+    start = game_config.T_start
+    nb_point = game_config.T_step
+    game_horizon = game_config.game_horizon
+    
+    plot_repeated_simu_results(start=start, horzion=game_horizon, nbPoints=nb_point, flag_bound=True,
+                                data_file_name=game_config.repeated_play_data_name)
\ No newline at end of file
diff --git a/plotutils.py b/plotutils.py
new file mode 100644
index 0000000..b34e54c
--- /dev/null
+++ b/plotutils.py
@@ -0,0 +1,276 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+
+"""
+
+# This file defines the plotting methods for the simulation.
+# The configuration for pallette creation are partially inspired by the ones in the SMPyBandits project, 
+# see plotsettings.py in SMPyBandits (https://github.com/SMPyBandits/SMPyBandits)
+
+
+__author__ = "Wenbo Wang"
+
+from datetime import datetime
+
+import matplotlib as mpl
+#from matplotlib.ticker import FuncFormatter 
+
+import os, errno
+
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mticker
+
+#import numpy as np
+import seaborn as sns
+import pandas as pd
+
+import numpy as np
+
+#from pickle import dump as pickle_dump # alternative choice of dumping files
+
+DPI = 120  #: DPI to use for the figures
+FIGSIZE = (4,3) #: Figure size, in inches
+#FIGSIZE = (5,4) #: Figure size, in inches
+
+# Customize the colormap
+HLS = True  #: Use the HLS mapping, or HUSL mapping
+VIRIDIS = False  #: Use the Viridis colormap
+
+# Bbox in inches. Only the given portion of the figure is saved. If 'tight', try to figure out the tight bbox of the figure.
+BBOX_INCHES = "tight"  #: Use this parameter for bbox
+BBOX_INCHES = None
+
+if __name__ != '__main__':
+    # use a clever color palette, eg http://seaborn.pydata.org/api.html#color-palettes
+    sns.set(context="talk", style="whitegrid", palette="hls", font="sans-serif", font_scale=0.95)
+
+    # Use tex by default http://matplotlib.org/2.0.0/users/dflt_style_changes.html#math-text
+    # mpl.rcParams['text.usetex'] = True  # XXX force use of LaTeX
+    mpl.rcParams['font.family'] = "sans-serif"
+    mpl.rcParams['font.sans-serif'] = "DejaVu Sans"
+    mpl.rcParams['mathtext.fontset'] = "cm"
+    mpl.rcParams['mathtext.rm'] = "serif"
+
+    # Configure size for axes and x and y labels
+    # Cf. https://stackoverflow.com/a/12444777/
+    mpl.rcParams['axes.labelsize']  = "x-small"
+    mpl.rcParams['xtick.labelsize'] = "x-small"
+    mpl.rcParams['ytick.labelsize'] = "x-small"
+    mpl.rcParams['figure.titlesize'] = "x-small"
+
+    # Configure the DPI of all images, once for all!
+    mpl.rcParams['figure.dpi'] = DPI
+    # print(" - Setting dpi of all figures to", DPI, "...")  # DEBUG
+
+    # Configure figure size, even of if saved directly and not displayed, use HD screen
+    # cf. https://en.wikipedia.org/wiki/Computer_display_standard
+    mpl.rcParams['figure.figsize'] = FIGSIZE
+    # print(" - Setting 'figsize' of all figures to", FIGSIZE, "...")  # DEBUG
+
+def prepare_file_name(file_name = None, alg_name = None, ext_format = None, add_timestamp=True):    
+    now = datetime.now()
+    current_date = now.strftime("%Y-%m-%d-%H-%M-%S")
+    
+    cwd = os.getcwd() # current directory
+    target_directory = "{}\{}".format(cwd, "results")
+    
+    if not os.path.exists(target_directory):
+        try:
+            os.makedirs(target_directory)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+
+    file_name_no_ext = ""    
+    file_path = "" 
+    if alg_name is None and add_timestamp == False:   
+        file_name_no_ext = "{}".format(file_name if file_name is not None else "-")    
+        
+        file_path = "{}\{}.{}".format(target_directory, file_name_no_ext,
+                     ext_format if ext_format is not None else "")
+    else:
+        file_name_no_ext = "{}-{}-{}".format(file_name if file_name is not None else "", 
+                 alg_name if alg_name is not None else "", 
+                 current_date if add_timestamp else "")  
+        
+        file_path = "{}\{}.{}".format(target_directory, file_name_no_ext,
+                     ext_format if ext_format is not None else "")  
+        
+    
+    return file_path, file_name_no_ext
+
+def read_data_frame(file_name, ext_format='pkl'):
+    """ 
+    Read a DataFrame from the default path with file name identified as 'file_name'
+    """
+    file_path, file_name = prepare_file_name(file_name=file_name, ext_format=ext_format, add_timestamp=False)
+    df = pd.read_pickle(file_path) 
+    
+    return df
+
+def make_palette(nbColors, hls=HLS, viridis=False):
+    """ 
+    Use the seaborn palette to create nbColors different curves on the same figure.
+    See also http://seaborn.pydata.org/generated/seaborn.hls_palette.html#seaborn.hls_palette
+    """
+    if viridis:
+        return sns.color_palette('viridis', nbColors)
+    else:
+        return sns.hls_palette(nbColors + 1)[:nbColors] if hls else sns.husl_palette(nbColors + 1)[:nbColors]
+
+
+def make_markers(nbMarkers):
+    """ 
+    Give a list of cycling markers. See also https://matplotlib.org/3.1.1/api/markers_api.html
+    List of markers in SMPyBandits (as an example):
+        allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>']
+
+    """
+    allmarkers = ['o', 'D', 'v', 'X', 'P', '^', 'p', '<', 's', '^', '*', 'h', '>']
+    marker_list = allmarkers * (1 + int(nbMarkers / float(len(allmarkers))))  # Cycle the good number of time
+    return marker_list[:nbMarkers]  # Truncate
+
+
+#: Shrink factor if the legend is displayed on the right of the plot.
+SHRINKFACTOR = 0.60
+
+#: Default parameter for maximum number of label to display in the legend INSIDE the figure
+MAXNBOFLABELINFIGURE = 8
+
+def display_legend(putatright=False, fontsize="xx-small", shrinkfactor=SHRINKFACTOR, 
+           maxnboflabelinfigure=MAXNBOFLABELINFIGURE, fig=None, title=None):
+    """plt.legend() with good options, cf. http://matplotlib.org/users/recipes.html#transparent-fancy-legends.
+    - For the purpose of generating figures for papers, it is not recommended to place it at the right-side.
+    """
+    try:
+        len_leg = len(plt.gca().get_legend_handles_labels()[1])
+        putatright = len_leg > maxnboflabelinfigure
+        if len_leg > maxnboflabelinfigure: 
+            print("Warning: forcing to use putatright = {} because there is {} items in the legend.".format(putatright, len_leg))  # DEBUG
+    except (ValueError, AttributeError, IndexError) as e:
+        print("error =", e)  # DEBUG
+    
+    if fig is None:
+        fig = plt
+    if putatright:
+        try:
+            # Shrink current axis by 20% on xaxis and 10% on yaxis
+            delta_rect = (1. - shrinkfactor)/6.25
+            fig.tight_layout(rect=[delta_rect, delta_rect, shrinkfactor, 1 - 2*delta_rect])
+            # Put a legend to the right of the current axis
+            fig.legend(loc='center left', numpoints=1, fancybox=True, framealpha=0.8, bbox_to_anchor=(1, 0.5), title=title, fontsize=fontsize)
+        except:
+            fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize)
+    else:
+        fig.legend(loc='best', numpoints=1, fancybox=True, framealpha=0.8, title=title, fontsize=fontsize)
+        
+
+def plot_data_frame(input_dframe, xlabel, ylabel, huelabel, stylelabel=None, height=5, aspect=1.25, flag_semilogx=False,
+                    save_file_name=None, sav_file_ext=None, save_data_name=None):
+    """
+    plot_data_frame() takes 'input_dframe' as the payload data. \
+    It also tries to plot the repeated simulation results with the labels of x, y axis and 
+    the huelabel identified by the keys of 'input_dframe' as 'xlabel', 'ylabel' and 'huelabel'.
+    """
+#    sns.set(font_scale=1.0)
+    sns_figure = sns.relplot(x=xlabel, y=ylabel, hue = huelabel, style=stylelabel,
+                kind="line", data=input_dframe, height=height, aspect=aspect);
+    
+    if flag_semilogx == True:
+        sns_figure.ax.set(xscale="log")         
+        
+    # force scientific notations on x-axis
+    formatter = mticker.ScalarFormatter(useOffset=False, useMathText=True)
+    formatter_func = lambda x,pos : "${}$".format(formatter._formatSciNotation('%1.10e' % x))
+    
+    sns_figure.ax.get_xaxis().set_major_formatter(mticker.FuncFormatter(formatter_func))
+    sns_figure.ax.get_yaxis().set_major_formatter(mticker.FuncFormatter(formatter_func))
+                 
+    if save_file_name is not None:
+        sav_file_ext = sav_file_ext if sav_file_ext is not None else 'pdf'        
+        figure_file_path, figure_file_name = prepare_file_name(file_name=save_file_name, ext_format=sav_file_ext)
+        sns_figure.savefig(figure_file_path)
+        
+    data_file_name = None
+    if save_data_name is not None:
+        data_file_path, data_file_name = prepare_file_name(file_name=save_data_name, ext_format='pkl', add_timestamp=True)
+        input_dframe.to_pickle(data_file_path)        
+           
+    return sns_figure, data_file_name
+
+
+"""
+Specifically used for plotting regret data, with theoretical bound
+"""
+def plot_repeated_simu_results(start, horzion, nbPoints, 
+                        nbArm=2, c1=100, c2=20, flag_bound = False,
+                        key_x='Total number of plays', key_y='Average regret', key_alg='Algorithms',
+                        data_file_name='regret_data', save_fig_name="monte_carlo_regret"):
+    #plot key_x, key_y with huelable as key_alg
+    repeated_play_data = read_data_frame(data_file_name)   
+    
+    if flag_bound:
+        T = np.linspace(start=4*start, stop = horzion, num=nbPoints)
+        
+        # This formula is heuristic, and for different parameter sets (context-arm numbers)
+        # we need to obtain the proper parameters of a tight bound with manually testing.
+        X = (c2*nbArm*(np.log2(T+2)**2) + c1*nbArm*np.log2(T+2))/T 
+        Label = ['$O(M\log_2^{\delta}(T))$']*len(T)
+        
+        Dash = [1]*len(T)
+        
+        T = np.append(repeated_play_data[key_x], T)
+        X = np.append((repeated_play_data[key_y]), X)
+        Label = np.append((repeated_play_data[key_alg]), Label)
+        Dash = np.append([0]*len(repeated_play_data[key_alg]), Dash)
+    
+        recorded_data = {}            
+        recorded_data[key_x] = T        
+        recorded_data[key_y] = X    
+        recorded_data[key_alg] = Label
+        recorded_data['Dash'] = Label
+        
+        final_data = pd.DataFrame(recorded_data)    
+        
+        g, data_file_name = plot_data_frame(final_data, xlabel=key_x, ylabel=key_y, huelabel=key_alg)
+        
+        nbLines = len(set(final_data[key_alg]))
+        print(nbLines)
+        
+#        # force scientific notations on x-axis
+#        g.ax.get_xaxis().get_major_formatter().set_scientific(True)
+        g.ax.lines[nbLines-1].set_linestyle("--")
+        g.ax.lines[nbLines-1].set_color("grey")
+        
+        le = g.ax.legend()
+        le.get_lines()[nbLines].set_color('grey')
+        le.get_lines()[nbLines].set_linestyle("--")
+        le.get_frame().set_facecolor('none')    
+        le.get_frame().set_edgecolor('none')    
+    else:
+        final_data = repeated_play_data    
+        g, data_file_name = plot_data_frame(final_data, xlabel=key_x, ylabel=key_y, huelabel=key_alg)
+#        # force scientific notations on x-axi
+#        g.ax.get_xaxis().get_major_formatter().set_scientific(True)
+        
+    # force scientific notations on x-axis
+    formatter = mticker.ScalarFormatter(useOffset=False, useMathText=True)
+    formatter_func = lambda x,pos : "${}$".format(formatter._formatSciNotation('%1.10e' % x))
+    
+    g.ax.get_xaxis().set_major_formatter(mticker.FuncFormatter(formatter_func))
+    g.ax.get_yaxis().set_major_formatter(mticker.FuncFormatter(formatter_func))   
+          
+    file_path, file_name = prepare_file_name(file_name=save_fig_name, ext_format='pdf', add_timestamp=False)
+    g.savefig(file_path)  
\ No newline at end of file
diff --git a/results/tmp_plot_postprocess.py b/results/tmp_plot_postprocess.py
new file mode 100644
index 0000000..c1a916d
--- /dev/null
+++ b/results/tmp_plot_postprocess.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Mar  3 13:28:25 2020
+
+@author: wenbo2017
+"""
+
+# remeber to change the names of data source files
+
+import sys
+import os.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
+
+import pandas as pd
+from plotutils import plot_data_frame, plot_repeated_simu_results
+
+data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-00.pkl')
+
+plot_data_frame(data_reward, 
+                xlabel="Total number of plays", ylabel="Average sum of rewards", huelabel='Algorithms', 
+                flag_semilogx = False,
+                save_file_name=None, save_data_name=None)
+
+
+data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-03.pkl')
+
+plot_data_frame(data_reward, 
+                xlabel="Total number of plays", ylabel="Accumulated switching counts", huelabel='Algorithms', 
+                flag_semilogx = False,
+                save_file_name=None, save_data_name=None)
+
+data_reward = pd.read_pickle('reward_data_4_alg_HetNet--2020-03-27-11-22-03.pkl')
+
+plot_data_frame(data_reward, 
+                xlabel="Total number of plays", ylabel="Accumulated collision counts", huelabel='Algorithms', 
+                flag_semilogx = False,
+                save_file_name=None, save_data_name=None)
\ No newline at end of file
diff --git a/simu_config.py b/simu_config.py
new file mode 100644
index 0000000..e10ca44
--- /dev/null
+++ b/simu_config.py
@@ -0,0 +1,380 @@
+# -*- coding: utf-8 -*-
+"""
+@author: Wenbo Wang
+
+[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel 
+Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
+
+License:
+This program is licensed under the GPLv2 license. If you in any way use this code for research 
+that results in publications, please cite our original article listed above.
+ 
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+See the GNU General Public License for more details.
+"""
+
+# This file provides the configurations for each simulation.
+
+__author__ = "Wenbo Wang"
+
+import numpy as np
+from envutils import Struct as Section
+
+if __name__ == '__main__':
+    print("Warning: this script 'simu_config.py' is NOT executable..")  # DEBUG
+    exit(0)
+
+# (context-player): arm-vector: {lower bound} - {upper bound}
+initial_data =  [{("context 1", 0): np.array([0., 0.5, 0.3]), ("context 2", 0): np.array([0.1, 0.2, 0.2]), ("context 3", 0): np.array([0., 0.2, 0.25]),
+                 ("context 1", 1): np.array([0.1, 0.6, 0.2]), ("context 2", 1): np.array([0., 0., 0.]), ("context 3", 1): np.array([0.2, 0.1, 0.45])},
+                {("context 1", 0): np.array([0.5, 0.8, 0.6]), ("context 2", 0): np.array([1., 1., 0.4]), ("context 3", 0): np.array([1, 0.3, 0.65]),
+                 ("context 1", 1): np.array([0.81, 0.96, 0.52]), ("context 2", 1): np.array([0.5, 0.4, 0.9]), ("context 3", 1): np.array([0.62, 0.21, 0.95])}    
+                ]
+
+initial_data_2 =  [{("context 1", 0): np.array([0.0, 0.5, 0.3, 0.1]), ("context 2", 0): np.array([0.1, 0.2, 0.2, 0.5]), ("context 3", 0): np.array([0.0, 0.2, 0.25, 0.4]),
+                    ("context 1", 1): np.array([0.1 , 0.6 , 0.2 , 0.44]), ("context 2", 1): np.array([0.0, 0.0, 0.0, 0.2]), ("context 3", 1): np.array([0.2 , 0.1 , 0.45, 0.36]),
+                    ("context 1", 2): np.array([0.24, 0.11, 0.3 , 0.14]), ("context 2", 2): np.array([0.2, 0.0 , 0.1, 0.2]), ("context 3", 2): np.array([0.32, 0.21, 0.25, 0.59])},
+
+                   {("context 1", 0): np.array([0.5, 0.8, 0.6, 0.7]), ("context 2", 0): np.array([1.0, 1.0, 0.4, 1.0]), ("context 3", 0): np.array([1.0, 0.3, 0.65, 0.9]),
+                    ("context 1", 1): np.array([0.81, 0.96, 0.52, 1.0 ]), ("context 2", 1): np.array([0.5, 0.4, 0.9, 0.6]), ("context 3", 1): np.array([0.62, 0.31, 0.95, 0.79]),
+                    ("context 1", 2): np.array([0.81, 0.78, 0.67, 1.0 ]), ("context 2", 2): np.array([0.3, 0.95, 0.9, 0.6]), ("context 3", 2): np.array([0.75, 0.63, 1.0 , 0.99]),}    
+                ]
+
+###############################################################################
+# Section 1:
+# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with
+# 3-contexts, 2-plaers, 3-arms
+###############################################################################
+ENV_SCENARIO_1 = Section("2-player-3-context-3-unifroms-arm MAB: regret evolution")
+ENV_SCENARIO_1.game_horizon = 200000
+ENV_SCENARIO_1.env_config = {'horizon': ENV_SCENARIO_1.game_horizon,
+                      'arm number': 3,
+                      'player number': 2,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'uniform', # change the underlying distribution here
+                      'initial data': initial_data
+                      }
+# Enable simulation for regret evolution with repetition
+ENV_SCENARIO_1.enable_regret_simulation = True
+ENV_SCENARIO_1.enable_reward_simulation = True
+
+ENV_SCENARIO_1.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error']
+ENV_SCENARIO_1.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}]
+                             
+# Experiment parameters
+ENV_SCENARIO_1.T_start = 5000
+ENV_SCENARIO_1.T_step = 20
+ENV_SCENARIO_1.T_simu_rounds = 20
+
+ENV_SCENARIO_1.flag_save_figure = True
+ENV_SCENARIO_1.repeated_play_data_name = 'regret_data'
+
+# Enable parallel processing
+ENV_SCENARIO_1.flag_parallel = False
+ENV_SCENARIO_1.flag_progress_bar = True
+
+###############################################################################
+# Section 1:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_1_PARALLEL = ENV_SCENARIO_1
+ENV_SCENARIO_1_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 2:
+# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with
+# 3-contexts, 2-plaers, 3-arms
+###############################################################################
+ENV_SCENARIO_2 = Section("2-player-3-context-3-unifroms-arm MAB: reward evolution")
+ENV_SCENARIO_2.game_horizon = 80000
+ENV_SCENARIO_2.env_config = {'horizon': ENV_SCENARIO_2.game_horizon,
+                      'arm number': 3,
+                      'player number': 2,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'uniform', # change the underlying distribution here
+                      'initial data': initial_data
+                      }
+
+# Disable simulation for reward evolution in a single shot
+ENV_SCENARIO_2.enable_efficiency_simulation = True
+
+ENV_SCENARIO_2.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error']
+ENV_SCENARIO_2.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}]
+                             
+# Experiment parameters
+ENV_SCENARIO_2.flag_save_figure = True
+ENV_SCENARIO_2.save_data = True
+
+# Enable parallel processing
+ENV_SCENARIO_2.flag_parallel = False
+ENV_SCENARIO_2.flag_progress_bar = True
+
+###############################################################################
+# Section 2:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_2_PARALLEL = ENV_SCENARIO_2
+ENV_SCENARIO_2_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 3:
+# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with
+# 3-contexts, 2-plaers, 3-arms
+###############################################################################
+ENV_SCENARIO_3 = Section("2-player-3-context-3-unifroms-arm MAB: regret evolution")
+ENV_SCENARIO_3.game_horizon = 200000
+ENV_SCENARIO_3.env_config = {'horizon': ENV_SCENARIO_3.game_horizon,
+                      'arm number': 3,
+                      'player number': 2,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'uniform', # change the underlying distribution here
+                      'initial data': initial_data
+                      }
+
+# Enable simulation for regret evolution with repetition
+ENV_SCENARIO_3.enable_regret_simulation = True
+
+ENV_SCENARIO_3.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error', 'Game of Thrones']
+ENV_SCENARIO_3.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,}, 
+                           {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}]
+                             
+# Experiment parameters
+ENV_SCENARIO_3.flag_save_figure = True
+ENV_SCENARIO_3.save_data = True
+
+# Experiment parameters
+ENV_SCENARIO_3.T_start = 5000
+ENV_SCENARIO_3.T_step = 20
+ENV_SCENARIO_3.T_simu_rounds = 200
+
+ENV_SCENARIO_3.repeated_play_data_name = 'regret_data_3_alg'
+
+# Enable parallel processing
+ENV_SCENARIO_3.flag_parallel = False
+ENV_SCENARIO_3.flag_progress_bar = True
+
+###############################################################################
+# Section 3:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_3_PARALLEL = ENV_SCENARIO_3
+ENV_SCENARIO_3_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 4:
+# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with
+# 3-contexts, 2-plaers, 3-arms, test of parallel simulation
+# for a single round of this 4-algorithm example, multiprocessing accelerates by
+# about 1/3
+###############################################################################
+ENV_SCENARIO_4 = Section("2-player-3-context-3-unifroms-arm MAB: reward evolution")
+ENV_SCENARIO_4.game_horizon = 200000
+ENV_SCENARIO_4.env_config = {'horizon': ENV_SCENARIO_4.game_horizon,
+                      'arm number': 3,
+                      'player number': 2,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'uniform', # change the underlying distribution here
+                      'initial data': initial_data
+                      }
+
+# Disable simulation for reward evolution in a single shot
+ENV_SCENARIO_4.enable_efficiency_simulation = True
+
+ENV_SCENARIO_4.alg_types = ['Static Hungarian', 'Musical Chairs', 'Trial and Error', 'Game of Thrones']
+ENV_SCENARIO_4.alg_configs = [None, None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4,},
+                       {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}]
+                             
+# Experiment parameters
+ENV_SCENARIO_4.flag_save_figure = True
+ENV_SCENARIO_4.save_data = False
+
+# Experiment parameters
+ENV_SCENARIO_4.T_start = 5000
+ENV_SCENARIO_4.T_step = 20
+ENV_SCENARIO_4.T_simu_rounds = 200
+
+ENV_SCENARIO_4.repeated_play_data_name = 'regret_data_3_alg'
+
+# Enable parallel processing
+ENV_SCENARIO_4.flag_parallel = False
+ENV_SCENARIO_4.flag_progress_bar = True
+
+###############################################################################
+# Section 4:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_4_PARALLEL = ENV_SCENARIO_4
+ENV_SCENARIO_4_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 5:
+# MAB environment in HetNet, with 12 random arms/channel and 10 randomly placed
+# users, 3 contexts (MUE transmission in the underlying macro cells)
+# for a single round of this 4-algorithm example, multiprocessing is to be implemented
+###############################################################################
+ENV_SCENARIO_5 = Section("10-UE-10-Channel HetNet: regret evolution")
+ENV_SCENARIO_5.game_horizon = 80000
+ENV_SCENARIO_5.env_config = {'horizon': ENV_SCENARIO_5.game_horizon,
+                      'arm number': 12,
+                      'player number': 10,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'HetNet simulator', # change the underlying distribution here
+                      'enabel mmWave': True,
+                      'cell range': 200,
+                      'context_prob': {'context 1': 1, 'context 2': 1, 'context 3': 1},
+                      'los_prob':  {'context 1': 1, 'context 2': 1, 'context 3': 1}
+                      }
+
+# Disable simulation for reward evolution in a single shot
+ENV_SCENARIO_5.enable_efficiency_simulation = True
+
+ENV_SCENARIO_5.alg_types = ['Musical Chairs', 'Trial and Error', 'Game of Thrones']
+ENV_SCENARIO_5.alg_configs = [None, {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.39, "alpha22": 0.4,},
+                       {"c1": 100, "c2": 200,"c3":100, "epsilon": 0.01, "delta": 2}]
+                             
+# Experiment parameters
+ENV_SCENARIO_5.flag_save_figure = True
+ENV_SCENARIO_5.save_data = False
+
+# Experiment parameters
+ENV_SCENARIO_5.T_start = 5000
+ENV_SCENARIO_5.T_step = 20
+ENV_SCENARIO_5.T_simu_rounds = 200
+
+ENV_SCENARIO_5.repeated_play_data_name = 'regret_data_3_alg'
+
+# Enable parallel processing
+ENV_SCENARIO_5.flag_parallel = False
+ENV_SCENARIO_5.flag_progress_bar = True
+
+###############################################################################
+# Section 5:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_5_PARALLEL = ENV_SCENARIO_5
+ENV_SCENARIO_5_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 6:
+# MAB environment in HetNet, with 12 random arms/channel and 10 randomly placed
+# users, 3 contexts (MUE transmission in the underlying macro cells)
+###############################################################################
+ENV_SCENARIO_6 = Section("10-UE-12-Channel HetNet: reward evolution")
+ENV_SCENARIO_6.game_horizon = 200000
+ENV_SCENARIO_6.env_config = {'horizon': ENV_SCENARIO_6.game_horizon,
+                      'arm number': 12,
+                      'player number': 10,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'HetNet simulator', # change the underlying distribution here
+                      'enabel mmWave': True,
+                      'cell range': 250,
+                      'context_prob': {'context 1': 2, 'context 2': 1, 'context 3': 1},
+                      'los_prob':  {'context 1': 1.5, 'context 2': 2, 'context 3': 1}
+                      }
+
+# Disable simulation for reward evolution in a single shot
+ENV_SCENARIO_6.enable_efficiency_simulation = False
+ENV_SCENARIO_6.enable_regret_simulation = False
+ENV_SCENARIO_6.enable_reward_simulation = True
+ENV_SCENARIO_6.enable_switching_simulation = True
+
+ENV_SCENARIO_6.alg_types = ['Musical Chairs', 'SOC', 'Trial and Error', 'Game of Thrones'] #, 
+ENV_SCENARIO_6.alg_configs = [None,                               
+                              {"delta": 0.02, "exploration_time": 10000},
+                              {"c1": 1000, "c2": 3000,"c3":3000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, 
+                                                 "alpha11": -0.04, "alpha12": 0.05, "alpha21": -0.035, "alpha22": 0.04, "observable": 1},
+                              {"c1": 1000, "c2": 3000,"c3":3000, "epsilon": 0.01, "delta": 1.5},
+                              ]
+                             
+# Experiment parameters
+ENV_SCENARIO_6.flag_save_figure = True
+ENV_SCENARIO_6.save_data = False
+
+# Experiment parameters
+ENV_SCENARIO_6.T_start = 40000
+ENV_SCENARIO_6.T_step = 12
+ENV_SCENARIO_6.T_simu_rounds = 200
+
+ENV_SCENARIO_6.repeated_play_data_name = 'reward_data_4_alg_HetNet'
+
+# Enable parallel processing
+ENV_SCENARIO_6.flag_parallel = False
+ENV_SCENARIO_6.flag_progress_bar = True
+
+###############################################################################
+# Section 6:
+# Parallel version
+###############################################################################
+ENV_SCENARIO_6_PARALLEL = ENV_SCENARIO_6
+ENV_SCENARIO_6_PARALLEL.flag_parallel = True
+
+###############################################################################
+# Section 7:
+# Hard-coded MAB environment for uniform/gaussian arms and unifrom context with
+# 3-contexts, 2-plaers, 3-arms
+###############################################################################
+ENV_SCENARIO_7 = Section("3-context-3-player-4-unifroms-arm MAB: reward evolution")
+ENV_SCENARIO_7.game_horizon = 100000
+ENV_SCENARIO_7.env_config = {'horizon': ENV_SCENARIO_7.game_horizon,
+                      'arm number': 4,
+                      'player number': 3,
+                      'context set': {"context 1", "context 2", "context 3"},#
+                      'env_type': 'uniform', # change the underlying distribution here
+                      'initial data': initial_data_2
+                      }
+
+# add algorithms
+ENV_SCENARIO_7.alg_types = ['Musical Chairs', 'SOC', 'Game of Thrones', 'Trial and Error'] #,  , 'TnE Nonobservable'
+ENV_SCENARIO_7.alg_configs = [None, 
+                              {"delta": 0.02, "exploration_time": 10000},
+                              {"c1": 500, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5}, 
+                              {"c1": 500, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, 
+                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4},
+#                              {"c1": 300, "c2": 1000,"c3":1000, "epsilon": 0.01, "delta": 1.5, "xi": 0.001, 
+#                                                 "alpha11": -0.12, "alpha12": 0.15, "alpha21": -0.35, "alpha22": 0.4, "observable": 0}
+                               ]
+
+# Disable simulation for reward evolution in a single shot
+ENV_SCENARIO_7.enable_efficiency_simulation = False
+ENV_SCENARIO_7.enable_regret_simulation = False
+ENV_SCENARIO_7.enable_reward_simulation = True
+ENV_SCENARIO_7.enable_switching_simulation = True
+
+# Experiment parameters
+ENV_SCENARIO_7.T_start = 20000
+ENV_SCENARIO_7.T_step = 10
+ENV_SCENARIO_7.T_simu_rounds = 20
+
+ENV_SCENARIO_7.repeated_play_data_name = 'congfig_7_5_algs_uniform'
+        
+# Experiment parameters
+ENV_SCENARIO_7.flag_save_figure = True
+ENV_SCENARIO_7.save_data = False
+
+# Enable parallel processing
+ENV_SCENARIO_7.flag_parallel = True
+ENV_SCENARIO_7.flag_progress_bar = True
+
+###############################################################################
+# All configurations are stored in the following dictionary:
+###############################################################################
+CONFIGURATION_DICT = {1: ENV_SCENARIO_1,
+                      2: ENV_SCENARIO_2,
+                      3: ENV_SCENARIO_3,
+                      4: ENV_SCENARIO_4,
+                      5: ENV_SCENARIO_5,
+                      6: ENV_SCENARIO_6,       
+                      7: ENV_SCENARIO_1_PARALLEL,   
+                      8: ENV_SCENARIO_2_PARALLEL,
+                      9: ENV_SCENARIO_3_PARALLEL,
+                      10: ENV_SCENARIO_4_PARALLEL,
+                      11: ENV_SCENARIO_5_PARALLEL,
+                      12: ENV_SCENARIO_6_PARALLEL,
+                      13: ENV_SCENARIO_7
+        }