From b2820307ad708b9ec930ca81fdfe7120dc307cca Mon Sep 17 00:00:00 2001
From: Adrian Skrobacz <adik993@gmail.com>
Date: Sat, 23 Dec 2017 20:36:37 +0100
Subject: [PATCH] Sarsa Lambda and True Online Sarsa Lambda done

---
 ...tial_semi_gradient_sarsa_access_control.py |   4 +-
 envs/__init__.py                              |   9 ++
 sarsa-lambda.py                               | 142 ++++++++++++++++++
 semi_gradient_sarsa_mountain_car.py           |  57 +++----
 utils/__init__.py                             | 113 +-------------
 utils/algo_utils.py                           | 129 ++++++++++++++++
 utils/averager.py                             |  85 +++++++++++
 7 files changed, 389 insertions(+), 150 deletions(-)
 create mode 100644 sarsa-lambda.py
 create mode 100644 utils/algo_utils.py
 create mode 100644 utils/averager.py

diff --git a/differential_semi_gradient_sarsa_access_control.py b/differential_semi_gradient_sarsa_access_control.py
index b4184c5..123f600 100644
--- a/differential_semi_gradient_sarsa_access_control.py
+++ b/differential_semi_gradient_sarsa_access_control.py
@@ -5,15 +5,15 @@
 from plotly import tools
 
 from envs.AcessControlQueueEnv import AccessControlQueueTimeLimit, AccessControlQueue
+from features.TileCoding import IHT
 from utils import Algorithm, randargmax, generate_episode, epsilon_probs, TilingValueFunction
 
 np.random.seed(7)
 
 
 class ValueFunction(TilingValueFunction):
-
     def __init__(self, n_tilings, max_size, n_priorities, n_servers):
-        super().__init__(n_tilings, max_size)
+        super().__init__(n_tilings, IHT(max_size))
         self.n_priorities = n_priorities - 1
         self.n_servers = n_servers
 
diff --git a/envs/__init__.py b/envs/__init__.py
index e69de29..506624e 100644
--- a/envs/__init__.py
+++ b/envs/__init__.py
@@ -0,0 +1,9 @@
+from envs.AcessControlQueueEnv import *
+from envs.BlackjackEnv import *
+from envs.CliffWalkingEnv import *
+from envs.DoubleQLearningEnv import *
+from envs.GridWorldEnv import *
+from envs.MazeEnv import *
+from envs.RaceCarEnv import *
+from envs.RandomWalkEnv import *
+from envs.WindyGridWorldEnv import *
\ No newline at end of file
diff --git a/sarsa-lambda.py b/sarsa-lambda.py
new file mode 100644
index 0000000..926c895
--- /dev/null
+++ b/sarsa-lambda.py
@@ -0,0 +1,142 @@
+from collections import defaultdict
+
+import numpy as np
+import gym
+from gym import Env
+
+from features.TileCoding import IHT
+from semi_gradient_sarsa_mountain_car import ValueFunction
+from utils import Algorithm, generate_episode, epsilon_probs, randargmax, TilingFunctionCreator, Averager, \
+    GymEpisodeTaskFactory, AlgorithmFactory, plot_scatters_from_dict
+
+N_TILINGS = 8
+
+
+class ValueFunctionCreator(TilingFunctionCreator):
+    def __init__(self, n_tilings: int, iht: IHT):
+        self.n_tilings = n_tilings
+        self.iht = iht
+
+    def create(self):
+        return ValueFunction(self.n_tilings, self.iht)
+
+
+class SarsaLambda(Algorithm):
+    def __init__(self, env: Env, creator: TilingFunctionCreator, alpha=0.5 / N_TILINGS, lam=0.92, epsilon=0.0,
+                 gamma=1.0):
+        self.env = env
+        self.value_func_creator = creator
+        self.value_function = creator.create()
+        self.alpha = alpha
+        self.lam = lam
+        self.epsilon = epsilon
+        self.gamma = gamma
+        self.actions = np.arange(env.action_space.n)
+        self._reset()
+
+    def action(self, state):
+        if self.next_action is None:
+            return self._action(state)
+        else:
+            return self.next_action
+
+    def _reset(self):
+        self.e_trace = self.value_func_creator.create()
+        self.next_action = None
+
+    def _action(self, state):
+        greedy = self.greedy_action(state)
+        probs = epsilon_probs(greedy, self.actions, self.epsilon)
+        return np.random.choice(self.actions, p=probs)
+
+    def greedy_action(self, state):
+        array = np.array([self.value_function.estimated(state, action) for action in self.actions])
+        return randargmax(array)
+
+    def on_new_state(self, state, action, reward, next_state, done):
+        if not done:
+            self.next_action = self._action(next_state)
+        q = self.value_function.estimated(state, action)
+        q_next = 0 if done else self.value_function.estimated(next_state, self.next_action)
+        delta = reward + self.gamma * q_next - q
+        self.e_trace[state, action] = 1
+        self.value_function[:, :] += self.alpha * delta * self.e_trace[:, :]
+        self.e_trace[:, :] *= self.gamma * self.lam
+        if done:
+            self._reset()
+
+
+class TrueOnlineSarsaLambda(SarsaLambda):
+    def _reset(self):
+        super()._reset()
+        self.q_old = 0
+
+    def on_new_state(self, state, action, reward, next_state, done):
+        # Note value_function.x(...) and e_trace.x(...) returns same values since they use the same IHT
+        if not done:
+            self.next_action = self._action(next_state)
+        q = self.value_function.estimated(state, action)
+        q_next = 0 if done else self.value_function.estimated(next_state, self.next_action)
+        x = self.value_function.x(state, action)
+        delta = reward + self.gamma * q_next - q
+        self.e_trace[:, :] *= self.gamma * self.lam
+        self.e_trace[state, action] += 1 - self.alpha * self.gamma * self.lam * self.e_trace.estimated(state, action)
+        q_delta = q - self.q_old
+        self.value_function[:, :] += self.alpha * (delta + q_delta) * self.e_trace[:, :] - self.alpha * q_delta * x
+        self.q_old = q_next
+        if done:
+            self._reset()
+
+
+class SarsaLambdaFactory(AlgorithmFactory):
+    def __init__(self, env: Env):
+        self.env = env
+
+    def create(self, lam, alpha) -> Algorithm:
+        return SarsaLambda(env, ValueFunctionCreator(N_TILINGS, IHT(4096)), lam=lam, alpha=alpha / N_TILINGS)
+
+
+class TrueOnlineSarsaLambdaFactory(AlgorithmFactory):
+    def __init__(self, env: Env):
+        self.env = env
+
+    def create(self, lam, alpha) -> Algorithm:
+        return TrueOnlineSarsaLambda(env, ValueFunctionCreator(N_TILINGS, IHT(4096)), lam=lam, alpha=alpha / N_TILINGS)
+
+
+def average_steps_per_episode(results, n_avg):
+    tmp = np.mean(results, axis=1)
+    return np.sum(tmp, axis=0) / n_avg
+
+
+def perform_lambda_test(n_episodes, n_avg):
+    averager = Averager(GymEpisodeTaskFactory(env, n_episodes, SarsaLambdaFactory(env)))
+    alphas = np.arange(1, 15) / N_TILINGS  # Those are again divided by N_TILINGS in sarsa to give final alpha value
+    results = defaultdict(lambda: np.zeros(len(alphas)))
+    for lam in [0, .68, .84, .92, .96, .98, .99]:
+        for i, alpha in np.ndenumerate(alphas):
+            results[lam][i] = averager.average((lam, alpha), n_avg, merge=average_steps_per_episode)
+    plot_scatters_from_dict(results, 'lambda={}', alphas)
+
+
+def perform_sarsa_lambda_comparison(n_episodes, n_avg):
+    alphas = np.arange(0.2, 2.2, 0.2)  # Those are divided by N_TILINGS in sarsa to give final alpha value
+    lam = 0.84
+    results = defaultdict(lambda: np.zeros(len(alphas)))
+    averager = Averager(GymEpisodeTaskFactory(env, n_episodes, SarsaLambdaFactory(env)))
+    for i, alpha in np.ndenumerate(alphas):
+        results['Sarsa(Lam) with replacing'][i] = -averager.average((lam, alpha), n_avg,
+                                                                    merge=average_steps_per_episode)
+
+    averager = Averager(GymEpisodeTaskFactory(env, n_episodes, TrueOnlineSarsaLambdaFactory(env)))
+    for i, alpha in np.ndenumerate(alphas):
+        results['True Online Sarsa(Lam)'][i] = -averager.average((lam, alpha), n_avg, merge=average_steps_per_episode)
+
+    plot_scatters_from_dict(results, '{}', alphas)
+
+
+if __name__ == '__main__':
+    env = gym.make('MountainCar-v0')
+    env._max_episode_steps = int(3e3)
+    # perform_lambda_test(n_episodes=50, n_avg=40)
+    perform_sarsa_lambda_comparison(n_episodes=20, n_avg=100)
diff --git a/semi_gradient_sarsa_mountain_car.py b/semi_gradient_sarsa_mountain_car.py
index d88c657..1bd3fb4 100644
--- a/semi_gradient_sarsa_mountain_car.py
+++ b/semi_gradient_sarsa_mountain_car.py
@@ -12,7 +12,7 @@
 from joblib import Parallel, delayed
 from multiprocessing import cpu_count
 
-from utils import epsilon_prob, randargmax, Algorithm, calc_batch_size
+from utils import epsilon_prob, randargmax, Algorithm, calc_batch_size, TilingValueFunction
 
 POSITION_MIN = -1.2
 POSITION_MAX = 0.6
@@ -25,32 +25,15 @@
 EPSILON = 0
 
 
-class TilingValueFunction:
-    def __init__(self, n_tilings=N_TILINGS, max_size=MAX_SIZE):
-        self.iht = IHT(MAX_SIZE)
-        self.n_tilings = n_tilings
-        self.weights = np.zeros((max_size,))
-        self.position_scale = self.n_tilings / (POSITION_MAX - POSITION_MIN)
-        self.velocity_scale = self.n_tilings / (VELOCITY_MAX - VELOCITY_MIN)
-
-    def _idx(self, item):
-        position, velocity, action = item
-        return tiles(self.iht, self.n_tilings,
-                     [self.position_scale * position, self.velocity_scale * velocity],
-                     [action])
-
-    def __getitem__(self, item):
-        position, _, _ = item
-        if position >= POSITION_GOAL:
-            return np.zeros(1)
-        else:
-            return self.weights[self._idx(item)]
-
-    def estimated(self, item):
-        return self[item].sum()
+class ValueFunction(TilingValueFunction):
+    def __init__(self, n_tilings: int, iht: IHT):
+        super().__init__(n_tilings, iht)
 
-    def __setitem__(self, key, value):
-        self.weights[self._idx(key)] = value
+    def scaled_values(self, state):
+        position, velocity = state
+        position_scale = self.n_tilings / (POSITION_MAX - POSITION_MIN)
+        velocity_scale = self.n_tilings / (VELOCITY_MAX - VELOCITY_MIN)
+        return [position * position_scale, velocity * velocity_scale]
 
 
 class SemiGradientSarsa(Algorithm):
@@ -81,16 +64,16 @@ def _action(self, state):
         return np.random.choice(self.actions, p=self._probs(state))
 
     def greedy_action(self, state):
-        array = np.array([self.value_function.estimated((*state, action)) for action in self.actions])
+        array = np.array([self.value_function.estimated(state, action) for action in self.actions])
         return np.argmax(array)
 
     def on_new_state(self, state, action, reward, next_state, done):
         self.next_action = self._action(next_state)
-        q_next = self.value_function.estimated((*next_state, self.next_action))
-        q = self.value_function.estimated((*state, action))
+        q_next = self.value_function.estimated(next_state, self.next_action)
+        q = self.value_function.estimated(state, action)
         delta = reward + self.gamma * q_next - q
         update = self.alpha * delta
-        self.value_function[(*state, action)] += update
+        self.value_function[state, action] += update
         if done:
             self.next_action = None
 
@@ -138,7 +121,7 @@ def get_entry(self, t):
 
     def _get_key(self, t):
         entry = self.get_entry(t)
-        return (*entry.state, entry.action)
+        return entry.state, entry.action
 
     def action(self, state):
         if self.t > 0:
@@ -158,7 +141,7 @@ def _prob(self, action, greedy):
         return epsilon_prob(greedy, action, len(self.actions), self.epsilon)
 
     def greedy_action(self, state):
-        array = np.array([self.value_function.estimated((*state, action)) for action in self.actions])
+        array = np.array([self.value_function.estimated(state, action) for action in self.actions])
         return randargmax(array)
 
     def calc_returns(self, update_time):
@@ -182,8 +165,8 @@ def on_new_state(self, state, action, reward, next_state, done):
             returns = self.calc_returns(update_time)
             not_last_state = update_time + self.n < self.T
             if not_last_state:
-                returns += pow(self.gamma, self.n) * self.value_function.estimated(key_t_plus_n)
-            self.value_function[key_t] += self.alpha * (returns - self.value_function.estimated(key_t))
+                returns += pow(self.gamma, self.n) * self.value_function.estimated(*key_t_plus_n)
+            self.value_function[key_t] += self.alpha * (returns - self.value_function.estimated(*key_t))
         self.t += 1
         if done and update_time != self.T - 1:
             self.on_new_state(state, action, reward, next_state, done)
@@ -259,7 +242,7 @@ def __init__(self, env):
         self.env = env
 
     def __call__(self, alpha):
-        return SemiGradientSarsa(self.env, TilingValueFunction(), alpha)
+        return SemiGradientSarsa(self.env, ValueFunction(N_TILINGS, IHT(MAX_SIZE)), alpha)
 
 
 class GimmeNStepSarsa:
@@ -267,7 +250,7 @@ def __init__(self, env):
         self.env = env
 
     def __call__(self, alpha, n):
-        return NStepSemiGradientSarsa(self.env, TilingValueFunction(), n, alpha)
+        return NStepSemiGradientSarsa(self.env, ValueFunction(N_TILINGS, IHT(MAX_SIZE)), n, alpha)
 
 
 def plot_value_function_using_plotly(value_function):
@@ -326,7 +309,7 @@ def plot_n_step_sarsa_by_alpha_and_n(env):
 
     plot_n_step_sarsa_by_alpha_and_n(env)
 
-    # value_function = TilingValueFunction(N_TILINGS)
+    # value_function = ValueFunction(N_TILINGS, IHT(MAX_SIZE))
     # for i in range(100):
     #     # steps = generate_episode(env, NStepSemiGradientSarsa(env, value_function, 8, 0.5 / N_TILINGS))
     #     steps = generate_episode(env, SemiGradientSarsa(env, value_function, 0.5 / N_TILINGS))
diff --git a/utils/__init__.py b/utils/__init__.py
index 48988cc..b8a8dc1 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -1,111 +1,2 @@
-import numpy as np
-from math import ceil
-import itertools
-
-from gym import Env
-
-from features.TileCoding import IHT, tiles
-
-
-def randomargmax(d, key=None):
-    k_max = max(d, key=key)
-    return np.random.choice([k for k, v in d.items() if d[k_max] == v])
-
-
-def randargmax(b, **kw):
-    """ a random tie-breaking argmax"""
-    return np.argmax(np.random.random(b.shape) * (b == b.max()), **kw)
-
-
-def epsilon_probs(greedy, actions, epsilon):
-    return [epsilon_prob(greedy, action, len(actions), epsilon) for action in actions]
-
-
-def epsilon_prob(greedy, action, n_actions, epsilon):
-    if greedy == action:
-        return epsilon_greedy_prob(n_actions, epsilon)
-    else:
-        return epsilon_explore_prob(n_actions, epsilon)
-
-
-def epsilon_greedy_prob(n_actions, epsilon):
-    return 1 - epsilon + epsilon / n_actions
-
-
-def epsilon_explore_prob(n_actions, epsilon):
-    return epsilon / n_actions
-
-
-def calc_batch_size(size, n_batches, batch_idx):
-    return max(0, min(size - batch_idx * ceil(size / n_batches), ceil(size / n_batches)))
-
-
-class Algorithm:
-    def action(self, state):
-        raise NotImplementedError()
-
-    def on_new_state(self, state, action, reward, next_state, done):
-        raise NotImplementedError()
-
-
-class EpisodeAlgorithm:
-    def action(self, state):
-        raise NotImplementedError()
-
-    def on_new_episode(self, history):
-        raise NotImplementedError()
-
-
-def generate_episode(env: Env, algorithm: Algorithm, render=False, print_step=False):
-    done = False
-    obs = env.reset()
-    counter = 0
-    while not done:
-        if print_step:
-            print('Step:', counter)
-        if render:
-            env.render()
-        prev_obs = obs
-        action = algorithm.action(obs)
-        obs, reward, done, _ = env.step(action)
-        algorithm.on_new_state(prev_obs, action, reward, obs, done)
-        counter += 1
-    return counter
-
-
-class TilingValueFunction:
-    def __init__(self, n_tilings, max_size):
-        self.iht = IHT(max_size)
-        self.n_tilings = n_tilings
-        self.weights = np.zeros(max_size)
-
-    def scaled_values(self, state):
-        raise NotImplementedError('Implement me and return scaled values from state')
-
-    def _idx(self, state, action):
-        return tiles(self.iht, self.n_tilings,
-                     self.scaled_values(state),
-                     [action])
-
-    def __getitem__(self, item):
-        state, action = item
-        return self.weights[self._idx(state, action)]
-
-    def estimated(self, state, action):
-        return self[state, action].sum()
-
-    def __setitem__(self, key, value):
-        state, action = key
-        self.weights[self._idx(state, action)] = value
-
-    def to_policy(self, actions, *args):
-        policy = np.zeros([len(arg) for arg in args])
-        for state in itertools.product(*[list(arg) for arg in args]):
-            policy[state] = np.argmax([self.estimated(state, action) for action in actions])
-        return policy
-
-    def to_value(self, actions, *args):
-        value = np.zeros([len(arg) for arg in args])
-        for state in itertools.product(*[list(arg) for arg in args]):
-            value[state] = np.max([self.estimated(state, action) for action in actions])
-        return value
+from utils.algo_utils import *
+from utils.averager import *
diff --git a/utils/algo_utils.py b/utils/algo_utils.py
new file mode 100644
index 0000000..a335458
--- /dev/null
+++ b/utils/algo_utils.py
@@ -0,0 +1,129 @@
+import itertools
+from math import ceil
+
+from gym import Env
+
+from features.TileCoding import tiles
+import numpy as np
+
+
+def randomargmax(d, key=None):
+    k_max = max(d, key=key)
+    return np.random.choice([k for k, v in d.items() if d[k_max] == v])
+
+
+def randargmax(b, **kw):
+    """ a random tie-breaking argmax"""
+    return np.argmax(np.random.random(b.shape) * (b == b.max()), **kw)
+
+
+def epsilon_probs(greedy, actions, epsilon):
+    return [epsilon_prob(greedy, action, len(actions), epsilon) for action in actions]
+
+
+def epsilon_prob(greedy, action, n_actions, epsilon):
+    if greedy == action:
+        return epsilon_greedy_prob(n_actions, epsilon)
+    else:
+        return epsilon_explore_prob(n_actions, epsilon)
+
+
+def epsilon_greedy_prob(n_actions, epsilon):
+    return 1 - epsilon + epsilon / n_actions
+
+
+def epsilon_explore_prob(n_actions, epsilon):
+    return epsilon / n_actions
+
+
+def calc_batch_size(size, n_batches, batch_idx):
+    return max(0, min(size - batch_idx * ceil(size / n_batches), ceil(size / n_batches)))
+
+
+class Algorithm:
+    def action(self, state):
+        raise NotImplementedError()
+
+    def on_new_state(self, state, action, reward, next_state, done):
+        raise NotImplementedError()
+
+
+class EpisodeAlgorithm:
+    def action(self, state):
+        raise NotImplementedError()
+
+    def on_new_episode(self, history):
+        raise NotImplementedError()
+
+
+def generate_episode(env: Env, algorithm: Algorithm, render=False, print_step=False):
+    done = False
+    obs = env.reset()
+    counter = 0
+    while not done:
+        if print_step:
+            print('Step:', counter)
+        if render:
+            env.render()
+        prev_obs = obs
+        action = algorithm.action(obs)
+        obs, reward, done, _ = env.step(action)
+        algorithm.on_new_state(prev_obs, action, reward, obs, done)
+        counter += 1
+    return counter
+
+
+class TilingValueFunction:
+    ALL = slice(None, None, None)
+
+    def __init__(self, n_tilings, iht):
+        self.iht = iht
+        self.n_tilings = n_tilings
+        self.weights = np.zeros(iht.size)
+
+    def scaled_values(self, state):
+        raise NotImplementedError('Implement me and return scaled values from state')
+
+    def _idx(self, state, action):
+        if self.is_all_slice(state) and self.is_all_slice(action):
+            return TilingValueFunction.ALL
+        else:
+            return tiles(self.iht, self.n_tilings,
+                         self.scaled_values(state),
+                         [action])
+
+    def is_all_slice(self, item):
+        return isinstance(item, slice) and item == TilingValueFunction.ALL
+
+    def x(self, state, action):
+        x = np.zeros(self.weights.shape)
+        x[self._idx(state, action)] = 1
+        return x
+
+    def __getitem__(self, item):
+        state, action = item
+        return self.weights[self._idx(state, action)]
+
+    def estimated(self, state, action):
+        return self[state, action].sum()
+
+    def __setitem__(self, key, value):
+        state, action = key
+        self.weights[self._idx(state, action)] = value
+
+    def to_policy(self, actions, *args):
+        policy = np.zeros([len(arg) for arg in args])
+        for state in itertools.product(*[list(arg) for arg in args]):
+            policy[state] = np.argmax([self.estimated(state, action) for action in actions])
+        return policy
+
+    def to_value(self, actions, *args):
+        value = np.zeros([len(arg) for arg in args])
+        for state in itertools.product(*[list(arg) for arg in args]):
+            value[state] = np.max([self.estimated(state, action) for action in actions])
+        return value
+
+
+class TilingFunctionCreator:
+    def create(self):
+        raise NotImplementedError('Implement this method and return subclass of TilingValueFunction')
diff --git a/utils/averager.py b/utils/averager.py
new file mode 100644
index 0000000..4883f1f
--- /dev/null
+++ b/utils/averager.py
@@ -0,0 +1,85 @@
+from multiprocessing import cpu_count
+from joblib import Parallel, delayed
+import numpy as np
+import plotly.graph_objs as go
+import plotly.offline as py
+
+from utils import calc_batch_size, Algorithm, generate_episode
+
+
+class AlgorithmFactory:
+    def create(self, *args, **kwargs) -> Algorithm:
+        raise NotImplementedError('Implement me')
+
+
+class AveragingTask:
+    def run(self, batch_size, batch_idx):
+        results = self.create_results()
+        for i in range(batch_size):
+            self.run_single(i, results)
+        return results
+
+    def run_single(self, i, results):
+        raise NotImplementedError('Implement me')
+
+    def create_results(self):
+        raise NotImplementedError('Implement me')
+
+
+class GymEpisodeTask(AveragingTask):
+    def __init__(self, env, n_episodes, algorithm_factory: AlgorithmFactory, algo_params):
+        self.env = env
+        self.n_episodes = n_episodes
+        self.algorithm_factory = algorithm_factory
+        self.algo_params = algo_params
+
+    def run_single(self, i, results):
+        algorithm = self.algorithm_factory.create(*self.algo_params)
+        for episode in range(self.n_episodes):
+            steps = generate_episode(self.env, algorithm, render=False)
+            results[episode] += steps
+            print('Run: {:2}, params: {}, ep: {:3}, steps: {:4}'.format(i, self.algo_params, episode, steps))
+
+    def create_results(self):
+        return np.zeros(self.n_episodes)
+
+
+class TaskFactory:
+    def create(self, params) -> AveragingTask:
+        raise NotImplementedError('Implement me')
+
+
+class GymEpisodeTaskFactory(TaskFactory):
+    def __init__(self, env, n_episodes, algorithm_factory: AlgorithmFactory):
+        self.env = env
+        self.n_episodes = n_episodes
+        self.algorithm_factory = algorithm_factory
+
+    def create(self, params) -> AveragingTask:
+        return GymEpisodeTask(self.env, self.n_episodes, self.algorithm_factory, params)
+
+
+def average(results, n_avg):
+    return np.sum(results, axis=0) / n_avg
+
+
+class Averager:
+    def __init__(self, task_factory: TaskFactory):
+        self.task_factory = task_factory
+
+    def average(self, algo_params, n_avg, n_jobs=cpu_count(), merge=average):
+        with Parallel(n_jobs=n_jobs) as parallel:
+            jobs = []
+            for batch_idx in range(n_jobs):
+                task = self.task_factory.create(algo_params)
+                batch_size = calc_batch_size(n_avg, n_jobs, batch_idx)
+                jobs.append(delayed(task.run)(batch_size, batch_idx))
+            results = parallel(jobs)
+            return merge(results, n_avg)
+
+
+def plot_scatters_from_dict(results, label_format: str, x=None):
+    data = []
+    for label, values in results.items():
+        data.append(go.Scatter(y=values, x=x, name=label_format.format(label)))
+    py.plot(data)