From 1711a2d3379c849c56b95d197f7551c3debe3bc9 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 12 Sep 2022 17:16:57 -0400 Subject: [PATCH 01/10] fixing gamemode weighting to be more robust and stable --- .../redis/redis_rollout_worker.py | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index a29bfd3..e081042 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -80,8 +80,11 @@ def __init__(self, redis: Redis, name: str, match: Match, self.send_obs = send_obs self.dynamic_gm = dynamic_gm self.gamemode_weights = gamemode_weights - if self.gamemode_weights is not None: - assert sum(self.gamemode_weights.values()) == 1, "gamemode_weights must sum to 1" + self.updated_weights = True + if self.gamemode_weights is None: + self.gamemode_weights = {'1v1': 0.333334, '2v2': 0.333333, '3v3': 0.333333} + assert sum(self.gamemode_weights.values()) == 1, "gamemode_weights must sum to 1" + self.previous_weights = self.gamemode_weights self.local_cache_name = local_cache_name self.uuid = str(uuid4()) @@ -210,15 +213,20 @@ def _get_past_model(self, version): return model def select_gamemode(self): - mode_exp = {m.decode("utf-8"): int(v) for m, v in self.redis.hgetall(EXPERIENCE_PER_MODE).items()} - if self.gamemode_weights is None: - mode = min(mode_exp, key=mode_exp.get) - else: + + if not self.updated_weights: + # update weights once per worker per new model + mode_exp = {m.decode("utf-8"): int(v) for m, v in self.redis.hgetall(EXPERIENCE_PER_MODE).items()} total = sum(mode_exp.values()) + 1e-8 mode_exp = {k: mode_exp[k] / total for k in mode_exp.keys()} # find exp which is farthest below desired exp diff = {k: self.gamemode_weights[k] - mode_exp[k] for k in mode_exp.keys()} - mode = max(diff, key=diff.get) + self.gamemode_weights = {k: self.gamemode_weights[k] + diff[k] / 3 for k in self.gamemode_weights.keys()} + new_sum = sum(self.gamemode_weights.values()) + self.gamemode_weights = {k: self.gamemode_weights[k] / new_sum for k in self.gamemode_weights.keys()} + self.updated_weights = True + print(f"New gamemode weights are {self.gamemode_weights}") + mode = np.random.choice(list(self.gamemode_weights.keys()), p=list(self.gamemode_weights.values())) b, o = mode.split("v") return int(b), int(o) @@ -241,6 +249,7 @@ def run(self): # Mimics Thread # Only try to download latest version when new if latest_version != available_version: + self.updated_weights = False model_bytes = self.redis.get(MODEL_LATEST) if model_bytes is None: time.sleep(1) From ea6690ae845ee06c29ac3e05226c28ccc46ea4e8 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Tue, 13 Sep 2022 09:06:07 -0400 Subject: [PATCH 02/10] fixing gamemode weighting to be more robust and stable --- .../rollout_generator/redis/redis_rollout_worker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index e081042..91c1a18 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -84,7 +84,7 @@ def __init__(self, redis: Redis, name: str, match: Match, if self.gamemode_weights is None: self.gamemode_weights = {'1v1': 0.333334, '2v2': 0.333333, '3v3': 0.333333} assert sum(self.gamemode_weights.values()) == 1, "gamemode_weights must sum to 1" - self.previous_weights = self.gamemode_weights + self.target_weights = self.gamemode_weights self.local_cache_name = local_cache_name self.uuid = str(uuid4()) @@ -220,8 +220,8 @@ def select_gamemode(self): total = sum(mode_exp.values()) + 1e-8 mode_exp = {k: mode_exp[k] / total for k in mode_exp.keys()} # find exp which is farthest below desired exp - diff = {k: self.gamemode_weights[k] - mode_exp[k] for k in mode_exp.keys()} - self.gamemode_weights = {k: self.gamemode_weights[k] + diff[k] / 3 for k in self.gamemode_weights.keys()} + diff = {k: self.target_weights[k] - mode_exp[k] for k in mode_exp.keys()} + self.gamemode_weights = {k: max(self.gamemode_weights[k] + diff[k] / 3, 0) for k in self.gamemode_weights.keys()} new_sum = sum(self.gamemode_weights.values()) self.gamemode_weights = {k: self.gamemode_weights[k] / new_sum for k in self.gamemode_weights.keys()} self.updated_weights = True From 095f94ce1a34833a4f36b9beb8a4ffb7debdd517 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Tue, 13 Sep 2022 14:10:57 -0400 Subject: [PATCH 03/10] removing redis from gamemode weights, just per worker --- .../redis/redis_rollout_worker.py | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index 91c1a18..bd04e01 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -2,6 +2,7 @@ import itertools import os import time +import copy from threading import Thread from uuid import uuid4 @@ -80,11 +81,17 @@ def __init__(self, redis: Redis, name: str, match: Match, self.send_obs = send_obs self.dynamic_gm = dynamic_gm self.gamemode_weights = gamemode_weights - self.updated_weights = True if self.gamemode_weights is None: - self.gamemode_weights = {'1v1': 0.333334, '2v2': 0.333333, '3v3': 0.333333} + self.gamemode_weights = {'1v1': 1/3, '2v2': 1/3, '3v3': 1/3} assert sum(self.gamemode_weights.values()) == 1, "gamemode_weights must sum to 1" - self.target_weights = self.gamemode_weights + self.target_weights = copy.copy(self.gamemode_weights) + # change weights from percentage of experience desired to percentage of gamemodes necessary (approx) + for k in self.gamemode_weights.keys(): + b, o = k.split("v") + self.gamemode_weights[k] /= int(b) + weights_sum = sum(self.gamemode_weights.values()) + self.gamemode_weights = {k: self.gamemode_weights[k] / weights_sum for k in self.gamemode_weights.keys()} + self.experience = {'1v1': 0, '2v2': 0, '3v3': 0} self.local_cache_name = local_cache_name self.uuid = str(uuid4()) @@ -214,18 +221,16 @@ def _get_past_model(self, version): def select_gamemode(self): - if not self.updated_weights: - # update weights once per worker per new model - mode_exp = {m.decode("utf-8"): int(v) for m, v in self.redis.hgetall(EXPERIENCE_PER_MODE).items()} - total = sum(mode_exp.values()) + 1e-8 - mode_exp = {k: mode_exp[k] / total for k in mode_exp.keys()} - # find exp which is farthest below desired exp - diff = {k: self.target_weights[k] - mode_exp[k] for k in mode_exp.keys()} - self.gamemode_weights = {k: max(self.gamemode_weights[k] + diff[k] / 3, 0) for k in self.gamemode_weights.keys()} - new_sum = sum(self.gamemode_weights.values()) - self.gamemode_weights = {k: self.gamemode_weights[k] / new_sum for k in self.gamemode_weights.keys()} - self.updated_weights = True - print(f"New gamemode weights are {self.gamemode_weights}") + total = sum(self.experience.values()) + 1e-8 + mode_exp = {k: self.experience[k] / total for k in self.experience.keys()} + diff = {k: self.target_weights[k] - mode_exp[k] for k in mode_exp.keys()} + # change diff from experience weights to gamemode weights + for k in diff.keys(): + b, o = k.split("v") + diff[k] *= int(b) + self.gamemode_weights = {k: max(self.gamemode_weights[k] + diff[k], 0) for k in self.gamemode_weights.keys()} + new_sum = sum(self.gamemode_weights.values()) + self.gamemode_weights = {k: self.gamemode_weights[k] / new_sum for k in self.gamemode_weights.keys()} mode = np.random.choice(list(self.gamemode_weights.keys()), p=list(self.gamemode_weights.values())) b, o = mode.split("v") return int(b), int(o) @@ -249,7 +254,6 @@ def run(self): # Mimics Thread # Only try to download latest version when new if latest_version != available_version: - self.updated_weights = False model_bytes = self.redis.get(MODEL_LATEST) if model_bytes is None: time.sleep(1) @@ -319,7 +323,9 @@ def run(self): # Mimics Thread state = rollouts[0].infos[-2]["state"] goal_speed = np.linalg.norm(state.ball.linear_velocity) * 0.036 # kph str_result = ('+' if result > 0 else "") + str(result) - self.total_steps_generated += len(rollouts[0].observations) * len(rollouts) + episode_exp = len(rollouts[0].observations) * len(rollouts) + self.total_steps_generated += episode_exp + self.experience[f"{blue}v{orange}"] += episode_exp post_stats = f"Rollout finished after {len(rollouts[0].observations)} steps ({self.total_steps_generated} total steps), result was {str_result}" if result != 0: post_stats += f", goal speed: {goal_speed:.2f} kph" From fefc16a566a42983260493f961cafebb7c15eb18 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 14 Nov 2022 11:18:01 -0500 Subject: [PATCH 04/10] changing name of attribute. --- .../redis/redis_rollout_worker.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index bd04e01..db9d2ad 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -86,11 +86,12 @@ def __init__(self, redis: Redis, name: str, match: Match, assert sum(self.gamemode_weights.values()) == 1, "gamemode_weights must sum to 1" self.target_weights = copy.copy(self.gamemode_weights) # change weights from percentage of experience desired to percentage of gamemodes necessary (approx) - for k in self.gamemode_weights.keys(): + self.current_weights = copy.copy(self.gamemode_weights) + for k in self.current_weights.keys(): b, o = k.split("v") - self.gamemode_weights[k] /= int(b) - weights_sum = sum(self.gamemode_weights.values()) - self.gamemode_weights = {k: self.gamemode_weights[k] / weights_sum for k in self.gamemode_weights.keys()} + self.current_weights[k] /= int(b) + weights_sum = sum(self.current_weights.values()) + self.current_weights = {k: self.current_weights[k] / weights_sum for k in self.current_weights.keys()} self.experience = {'1v1': 0, '2v2': 0, '3v3': 0} self.local_cache_name = local_cache_name @@ -228,10 +229,10 @@ def select_gamemode(self): for k in diff.keys(): b, o = k.split("v") diff[k] *= int(b) - self.gamemode_weights = {k: max(self.gamemode_weights[k] + diff[k], 0) for k in self.gamemode_weights.keys()} - new_sum = sum(self.gamemode_weights.values()) - self.gamemode_weights = {k: self.gamemode_weights[k] / new_sum for k in self.gamemode_weights.keys()} - mode = np.random.choice(list(self.gamemode_weights.keys()), p=list(self.gamemode_weights.values())) + self.current_weights = {k: max(self.current_weights[k] + diff[k], 0) for k in self.current_weights.keys()} + new_sum = sum(self.current_weights.values()) + self.current_weights = {k: self.current_weights[k] / new_sum for k in self.current_weights.keys()} + mode = np.random.choice(list(self.current_weights.keys()), p=list(self.current_weights.values())) b, o = mode.split("v") return int(b), int(o) From 6bdd886f1bd5b8ea28a13745004a57a627afaea4 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Wed, 7 Dec 2022 22:19:40 -0500 Subject: [PATCH 05/10] WIP --- rocket_learn/rollout_generator/redis/redis_rollout_worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index db9d2ad..eb10f85 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -93,6 +93,7 @@ def __init__(self, redis: Redis, name: str, match: Match, weights_sum = sum(self.current_weights.values()) self.current_weights = {k: self.current_weights[k] / weights_sum for k in self.current_weights.keys()} self.experience = {'1v1': 0, '2v2': 0, '3v3': 0} + self.mean_exp_grant = {'1v1': 1000, '2v2': 2000, '3v3': 3000} self.local_cache_name = local_cache_name self.uuid = str(uuid4()) From f90a0fe23033f46e783ef3d412f432a85726a6de Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:24:43 -0500 Subject: [PATCH 06/10] testing --- .../redis/redis_rollout_worker.py | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index eb10f85..e13a0f6 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -52,7 +52,9 @@ def __init__(self, redis: Redis, name: str, match: Match, send_obs=True, scoreboard=None, pretrained_agents=None, human_agent=None, force_paging=False, auto_minimize=True, local_cache_name=None, - gamemode_weights=None,): + gamemode_weights=None, + gamemode_weight_ema_alpha=0.02, + ): # TODO model or config+params so workers can recreate just from redis connection? self.redis = redis self.name = name @@ -90,10 +92,9 @@ def __init__(self, redis: Redis, name: str, match: Match, for k in self.current_weights.keys(): b, o = k.split("v") self.current_weights[k] /= int(b) - weights_sum = sum(self.current_weights.values()) - self.current_weights = {k: self.current_weights[k] / weights_sum for k in self.current_weights.keys()} - self.experience = {'1v1': 0, '2v2': 0, '3v3': 0} + self.current_weights = {k: self.current_weights[k] / sum(self.current_weights.values()) + 1e-8 for k in self.current_weights.keys()} self.mean_exp_grant = {'1v1': 1000, '2v2': 2000, '3v3': 3000} + self.ema_alpha = gamemode_weight_ema_alpha self.local_cache_name = local_cache_name self.uuid = str(uuid4()) @@ -223,21 +224,14 @@ def _get_past_model(self, version): def select_gamemode(self): - total = sum(self.experience.values()) + 1e-8 - mode_exp = {k: self.experience[k] / total for k in self.experience.keys()} - diff = {k: self.target_weights[k] - mode_exp[k] for k in mode_exp.keys()} - # change diff from experience weights to gamemode weights - for k in diff.keys(): - b, o = k.split("v") - diff[k] *= int(b) - self.current_weights = {k: max(self.current_weights[k] + diff[k], 0) for k in self.current_weights.keys()} - new_sum = sum(self.current_weights.values()) - self.current_weights = {k: self.current_weights[k] / new_sum for k in self.current_weights.keys()} + emp_weight = {k: self.mean_exp_grant[k] / sum(self.mean_exp_grant.values()) + 1e-8 + for k in self.mean_exp_grant.keys()} + cor_weight = {k: self.gamemode_weights[k] / emp_weight[k] for k in self.gamemode_weights.keys()} + self.current_weights = {k: cor_weight[k] / sum(cor_weight.values()) + 1e-8 for k in cor_weight} mode = np.random.choice(list(self.current_weights.keys()), p=list(self.current_weights.values())) b, o = mode.split("v") return int(b), int(o) - def run(self): # Mimics Thread """ begin processing in already launched match and push to redis @@ -327,7 +321,8 @@ def run(self): # Mimics Thread str_result = ('+' if result > 0 else "") + str(result) episode_exp = len(rollouts[0].observations) * len(rollouts) self.total_steps_generated += episode_exp - self.experience[f"{blue}v{orange}"] += episode_exp + old_exp = self.mean_exp_grant[f"{blue}v{orange}"] + self.mean_exp_grant = ((episode_exp - old_exp) * self.ema_alpha) + old_exp post_stats = f"Rollout finished after {len(rollouts[0].observations)} steps ({self.total_steps_generated} total steps), result was {str_result}" if result != 0: post_stats += f", goal speed: {goal_speed:.2f} kph" From 90b0c8ac08ffe74202c5a18b0aec0056cfc77e99 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:30:23 -0500 Subject: [PATCH 07/10] adding param --- rocket_learn/rollout_generator/redis/redis_rollout_worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index e13a0f6..a2cb5f0 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -44,6 +44,7 @@ class RedisRolloutWorker: :param auto_minimize: automatically minimize the launched rocket league instance :param local_cache_name: name of local database used for model caching. If None, caching is not used :param gamemode_weights: dict of dynamic gamemode choice weights. If None, default equal experience + :param gamemode_weight_ema_alpha: alpha for the exponential moving average of gamemode weighting """ def __init__(self, redis: Redis, name: str, match: Match, From 4e97717276a24ec740e03e3c24e0defb181da764 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:49:57 -0500 Subject: [PATCH 08/10] fix weight division 1e-8 --- .../rollout_generator/redis/redis_rollout_worker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index a2cb5f0..9556c0c 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -93,7 +93,7 @@ def __init__(self, redis: Redis, name: str, match: Match, for k in self.current_weights.keys(): b, o = k.split("v") self.current_weights[k] /= int(b) - self.current_weights = {k: self.current_weights[k] / sum(self.current_weights.values()) + 1e-8 for k in self.current_weights.keys()} + self.current_weights = {k: self.current_weights[k] / (sum(self.current_weights.values()) + 1e-8) for k in self.current_weights.keys()} self.mean_exp_grant = {'1v1': 1000, '2v2': 2000, '3v3': 3000} self.ema_alpha = gamemode_weight_ema_alpha self.local_cache_name = local_cache_name @@ -225,10 +225,10 @@ def _get_past_model(self, version): def select_gamemode(self): - emp_weight = {k: self.mean_exp_grant[k] / sum(self.mean_exp_grant.values()) + 1e-8 + emp_weight = {k: self.mean_exp_grant[k] / (sum(self.mean_exp_grant.values()) + 1e-8) for k in self.mean_exp_grant.keys()} cor_weight = {k: self.gamemode_weights[k] / emp_weight[k] for k in self.gamemode_weights.keys()} - self.current_weights = {k: cor_weight[k] / sum(cor_weight.values()) + 1e-8 for k in cor_weight} + self.current_weights = {k: cor_weight[k] / (sum(cor_weight.values()) + 1e-8) for k in cor_weight} mode = np.random.choice(list(self.current_weights.keys()), p=list(self.current_weights.values())) b, o = mode.split("v") return int(b), int(o) From 6b9b8dd41e593eefc911fe1518626df0a83439be Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:56:39 -0500 Subject: [PATCH 09/10] typo --- rocket_learn/rollout_generator/redis/redis_rollout_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index 9556c0c..dced88b 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -323,7 +323,7 @@ def run(self): # Mimics Thread episode_exp = len(rollouts[0].observations) * len(rollouts) self.total_steps_generated += episode_exp old_exp = self.mean_exp_grant[f"{blue}v{orange}"] - self.mean_exp_grant = ((episode_exp - old_exp) * self.ema_alpha) + old_exp + self.mean_exp_grant[f"{blue}v{orange}"] = ((episode_exp - old_exp) * self.ema_alpha) + old_exp post_stats = f"Rollout finished after {len(rollouts[0].observations)} steps ({self.total_steps_generated} total steps), result was {str_result}" if result != 0: post_stats += f", goal speed: {goal_speed:.2f} kph" From 6d7d453a6f1313311cee5f6f4333f574cd040e40 Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Tue, 7 Feb 2023 10:05:26 -0500 Subject: [PATCH 10/10] Fix for 1v0 --- rocket_learn/rollout_generator/redis/redis_rollout_worker.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py index dced88b..fd89f55 100644 --- a/rocket_learn/rollout_generator/redis/redis_rollout_worker.py +++ b/rocket_learn/rollout_generator/redis/redis_rollout_worker.py @@ -322,8 +322,9 @@ def run(self): # Mimics Thread str_result = ('+' if result > 0 else "") + str(result) episode_exp = len(rollouts[0].observations) * len(rollouts) self.total_steps_generated += episode_exp - old_exp = self.mean_exp_grant[f"{blue}v{orange}"] - self.mean_exp_grant[f"{blue}v{orange}"] = ((episode_exp - old_exp) * self.ema_alpha) + old_exp + if self.dynamic_gm: + old_exp = self.mean_exp_grant[f"{blue}v{orange}"] + self.mean_exp_grant[f"{blue}v{orange}"] = ((episode_exp - old_exp) * self.ema_alpha) + old_exp post_stats = f"Rollout finished after {len(rollouts[0].observations)} steps ({self.total_steps_generated} total steps), result was {str_result}" if result != 0: post_stats += f", goal speed: {goal_speed:.2f} kph"