Merge pull request #9 from Arena-Rosnav/master

Master
Arena-Rosnav · Oct 13, 2022 · c11ad89 · c11ad89
2 parents 1af3a40 + 7669886
commit c11ad89
Show file tree

Hide file tree

Showing 20 changed files with 406 additions and 203 deletions.
diff --git a/agents/jackal_marl/best_model.zip b/agents/jackal_marl/best_model.zip
diff --git a/agents/jackal_marl/hyperparameters.json b/agents/jackal_marl/hyperparameters.json
@@ -0,0 +1,25 @@
+{
+  "agent_name": "AGENT_24_2022_02_04__11_28",
+  "robot": "jackal",
+  "actions_in_observationspace": true,
+  "reward_fnc": "rule_06",
+  "discrete_action_space": false,
+  "normalize": false,
+  "task_mode": "staged",
+  "train_max_steps_per_episode": 400,
+  "eval_max_steps_per_episode": 1100,
+  "goal_radius": 0.7,
+  "curr_stage": 1,
+  "batch_size": 38400,
+  "gamma": 0.99,
+  "n_steps": 3200,
+  "ent_coef": 0.005,
+  "learning_rate": 0.0003,
+  "vf_coef": 0.22,
+  "max_grad_norm": 0.5,
+  "gae_lambda": 0.95,
+  "m_batch_size": 15,
+  "n_epochs": 3,
+  "clip_range": 0.22,
+  "observation_space": ["laser_scan", "goal_in_robot_frame", "last_action"]
+}
diff --git a/agents/new_jackal/hyperparameters.json b/agents/new_jackal/hyperparameters.json
@@ -1,24 +1,25 @@
 {
-    "agent_name": "AGENT_24_2022_02_04__11_28",
-    "robot": "jackal",
-    "actions_in_observationspace": true,
-    "reward_fnc": "rule_05",
-    "discrete_action_space": false,
-    "normalize": false,
-    "task_mode": "staged",
-    "train_max_steps_per_episode": 150,
-    "eval_max_steps_per_episode": 200,
-    "goal_radius": 0.7,
-    "curr_stage": 8,
-    "batch_size": 19200,
-    "gamma": 0.99,
-    "n_steps": 4800,
-    "ent_coef": 0.005,
-    "learning_rate": 0.0003,
-    "vf_coef": 0.22,
-    "max_grad_norm": 0.5,
-    "gae_lambda": 0.95,
-    "m_batch_size": 15,
-    "n_epochs": 3,
-    "clip_range": 0.22
-}
+  "agent_name": "AGENT_24_2022_02_04__11_28",
+  "robot": "jackal",
+  "actions_in_observationspace": true,
+  "reward_fnc": "rule_05",
+  "discrete_action_space": false,
+  "normalize": false,
+  "task_mode": "staged",
+  "train_max_steps_per_episode": 150,
+  "eval_max_steps_per_episode": 200,
+  "goal_radius": 0.7,
+  "curr_stage": 8,
+  "batch_size": 19200,
+  "gamma": 0.99,
+  "n_steps": 4800,
+  "ent_coef": 0.005,
+  "learning_rate": 0.0003,
+  "vf_coef": 0.22,
+  "max_grad_norm": 0.5,
+  "gae_lambda": 0.95,
+  "m_batch_size": 15,
+  "n_epochs": 3,
+  "clip_range": 0.22,
+  "observation_space": ["laser_scan", "goal_in_robot_frame", "last_action"]
+}
diff --git a/agents/ridgeback_marl/best_model.zip b/agents/ridgeback_marl/best_model.zip
diff --git a/agents/ridgeback_marl/hyperparameters.json b/agents/ridgeback_marl/hyperparameters.json
@@ -0,0 +1,25 @@
+{
+  "agent_name": "ridgeback_2022_09_20__15_37",
+  "robot": "myrobot",
+  "actions_in_observationspace": true,
+  "reward_fnc": "rule_06",
+  "discrete_action_space": false,
+  "normalize": false,
+  "task_mode": "staged",
+  "train_max_steps_per_episode": 800,
+  "eval_max_steps_per_episode": 1100,
+  "goal_radius": 0.7,
+  "curr_stage": 1,
+  "batch_size": 57600,
+  "gamma": 0.99,
+  "n_steps": 4800,
+  "ent_coef": 0.005,
+  "learning_rate": 0.0003,
+  "vf_coef": 0.22,
+  "max_grad_norm": 0.5,
+  "gae_lambda": 0.95,
+  "m_batch_size": 16,
+  "n_epochs": 3,
+  "clip_range": 0.22,
+  "observation_space": ["laser_scan", "goal_in_robot_frame", "last_action"]
+}
diff --git a/agents/rto_tlabs/hyperparameters.json b/agents/rto_tlabs/hyperparameters.json
@@ -1,24 +1,25 @@
 {
-    "agent_name": "AGENT_24_2022_02_04__11_36",
-    "robot": "rto_real",
-    "actions_in_observationspace": true,
-    "reward_fnc": "rule_05",
-    "discrete_action_space": false,
-    "normalize": false,
-    "task_mode": "staged",
-    "train_max_steps_per_episode": 120,
-    "eval_max_steps_per_episode": 170,
-    "goal_radius": 0.7,
-    "curr_stage": 10,
-    "batch_size": 19200,
-    "gamma": 0.99,
-    "n_steps": 2400,
-    "ent_coef": 0.005,
-    "learning_rate": 0.0003,
-    "vf_coef": 0.22,
-    "max_grad_norm": 0.5,
-    "gae_lambda": 0.95,
-    "m_batch_size": 15,
-    "n_epochs": 3,
-    "clip_range": 0.22
-}
+  "agent_name": "AGENT_24_2022_02_04__11_36",
+  "robot": "rto_real",
+  "actions_in_observationspace": true,
+  "reward_fnc": "rule_05",
+  "discrete_action_space": false,
+  "normalize": false,
+  "task_mode": "staged",
+  "train_max_steps_per_episode": 120,
+  "eval_max_steps_per_episode": 170,
+  "goal_radius": 0.7,
+  "curr_stage": 10,
+  "batch_size": 19200,
+  "gamma": 0.99,
+  "n_steps": 2400,
+  "ent_coef": 0.005,
+  "learning_rate": 0.0003,
+  "vf_coef": 0.22,
+  "max_grad_norm": 0.5,
+  "gae_lambda": 0.95,
+  "m_batch_size": 15,
+  "n_epochs": 3,
+  "clip_range": 0.22,
+  "observation_space": ["laser_scan", "goal_in_robot_frame", "last_action"]
+}
diff --git a/agents/rto_tlabs_marl/best_model.zip b/agents/rto_tlabs_marl/best_model.zip
diff --git a/agents/rto_tlabs_marl/hyperparameters.json b/agents/rto_tlabs_marl/hyperparameters.json
@@ -0,0 +1,25 @@
+{
+  "agent_name": "AGENT_24_2022_02_04__11_36",
+  "robot": "rto_real",
+  "actions_in_observationspace": true,
+  "reward_fnc": "rule_06",
+  "discrete_action_space": false,
+  "normalize": false,
+  "task_mode": "staged",
+  "train_max_steps_per_episode": 400,
+  "eval_max_steps_per_episode": 1100,
+  "goal_radius": 0.7,
+  "curr_stage": 1,
+  "batch_size": 38400,
+  "gamma": 0.99,
+  "n_steps": 3200,
+  "ent_coef": 0.005,
+  "learning_rate": 0.0003,
+  "vf_coef": 0.22,
+  "max_grad_norm": 0.5,
+  "gae_lambda": 0.95,
+  "m_batch_size": 15,
+  "n_epochs": 3,
+  "clip_range": 0.22,
+  "observation_space": ["laser_scan", "goal_in_robot_frame", "last_action"]
+}
diff --git a/rosnav/model/agent_factory.py b/rosnav/model/agent_factory.py
@@ -40,21 +40,23 @@ def inner_wrapper(wrapped_class) -> Callable:
     # end register()
 
     @classmethod
-    def instantiate(cls, name: str, **kwargs) -> Union[Type[BaseAgent], Type[BasePolicy]]:
+    def instantiate(
+        cls, name: str, **kwargs
+    ) -> Union[Type[BaseAgent], Type[BasePolicy]]:
         """Factory command to create the agent.
         This method gets the appropriate agent class from the registry
         and creates an instance of it, while passing in the parameters
         given in ``kwargs``.
 
         Args:
-            name (str): The name of the agent to create.
+            name (str): The name of the agent to create.agent_class
 
         Returns:
             An instance of the agent that is created.
         """
         assert name in cls.registry, f"Agent '{name}' is not registered!"
         agent_class = cls.registry[name]
-        
+
         if issubclass(agent_class, BaseAgent):
             return agent_class(**kwargs)
         else:

diff --git a/rosnav/model/base_agent.py b/rosnav/model/base_agent.py
@@ -1,9 +1,9 @@
-from typing import Type, List
-
 from abc import ABC, abstractmethod
 from enum import Enum
-from torch.nn.modules.module import Module
+from typing import List, Type
+
 from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+from torch.nn.modules.module import Module
 
 
 class PolicyType(Enum):
@@ -47,14 +47,17 @@ def activation_fn(self) -> Type[Module]:
         pass
 
     def get_kwargs(self):
+        fe_kwargs = self.features_extractor_kwargs
+        fe_kwargs["robot_model"] = self.robot_model
+
         kwargs = {
             "features_extractor_class": self.features_extractor_class,
-            "features_extractor_kwargs": self.features_extractor_kwargs,
+            "features_extractor_kwargs": fe_kwargs,
             "net_arch": self.net_arch,
             "activation_fn": self.activation_fn,
         }
-        if not kwargs['features_extractor_class']:
-            del kwargs['features_extractor_class']
-        if not kwargs['features_extractor_kwargs']:
-            del kwargs['features_extractor_kwargs']
+        if not kwargs["features_extractor_class"]:
+            del kwargs["features_extractor_class"]
+        if not kwargs["features_extractor_kwargs"]:
+            del kwargs["features_extractor_kwargs"]
         return kwargs
diff --git a/rosnav/model/custom_sb3_policy.py b/rosnav/model/custom_sb3_policy.py
@@ -14,6 +14,9 @@ class AGENT_6(BaseAgent):
     net_arch = [128, 64, 64, dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_7")
 class AGENT_7(BaseAgent):
@@ -23,6 +26,9 @@ class AGENT_7(BaseAgent):
     net_arch = [128, 128, 128, dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_8")
 class AGENT_8(BaseAgent):
@@ -32,6 +38,9 @@ class AGENT_8(BaseAgent):
     net_arch = [64, 64, 64, 64, dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_9")
 class AGENT_9(BaseAgent):
@@ -41,6 +50,9 @@ class AGENT_9(BaseAgent):
     net_arch = [64, 64, 64, 64, dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_10")
 class AGENT_10(BaseAgent):
@@ -50,6 +62,9 @@ class AGENT_10(BaseAgent):
     net_arch = [128, 128, 128, 128, dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_11")
 class AGENT_11(BaseAgent):
@@ -59,6 +74,9 @@ class AGENT_11(BaseAgent):
     net_arch = [512, 512, 512, 512, dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_17")
 class AGENT_17(BaseAgent):
@@ -68,6 +86,9 @@ class AGENT_17(BaseAgent):
     net_arch = [dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_18")
 class AGENT_18(BaseAgent):
@@ -77,6 +98,9 @@ class AGENT_18(BaseAgent):
     net_arch = [128, dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_19")
 class AGENT_19(BaseAgent):
@@ -86,6 +110,9 @@ class AGENT_19(BaseAgent):
     net_arch = [dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_20")
 class AGENT_20(BaseAgent):
@@ -95,6 +122,9 @@ class AGENT_20(BaseAgent):
     net_arch = [dict(pi=[128], vf=[128])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_21")
 class AGENT_21(BaseAgent):
@@ -104,6 +134,9 @@ class AGENT_21(BaseAgent):
     net_arch = [dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_22")
 class AGENT_22(BaseAgent):
@@ -113,6 +146,9 @@ class AGENT_22(BaseAgent):
     net_arch = [dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_23")
 class AGENT_23(BaseAgent):
@@ -122,6 +158,9 @@ class AGENT_23(BaseAgent):
     net_arch = [128, dict(pi=[64, 64, 64], vf=[64, 64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_24")
 class AGENT_24(BaseAgent):
@@ -131,11 +170,17 @@ class AGENT_24(BaseAgent):
     net_arch = [128, dict(pi=[64, 64], vf=[64, 64])]
     activation_fn = nn.ReLU
 
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model
+
 
 @AgentFactory.register("AGENT_25")
 class AGENT_25(BaseAgent):
     type = PolicyType.MLP
     features_extractor_class = None
     features_extractor_kwargs = None
     net_arch = [512, 256, dict(pi=[128], vf=[128])]
-    activation_fn = nn.ReLU
+    activation_fn = nn.ReLU
+
+    def __init__(self, robot_model: str = None):
+        self.robot_model = robot_model