Rewards init moved to make_world in debug scenarios

matteobettini · matteobettini · commit 794d78525e1f · 2023-01-02T19:27:17.000+01:00
diff --git a/vmas/scenarios/debug/asym_joint.py b/vmas/scenarios/debug/asym_joint.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import math
@@ -141,6 +141,9 @@ def mass_collision_filter(e):
             )
             world.add_joint(joint)
 
+        self.rot_rew = torch.zeros(batch_dim, device=device)
+        self.energy_rew = self.rot_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -227,10 +230,7 @@ def reward(self, agent: Agent):
         is_first = agent == self.world.agents[0]
 
         if is_first:
-            self.rew = torch.zeros(
-                self.world.batch_dim, device=self.world.device, dtype=torch.float32
-            )
-            self.rot_rew = self.rew.clone()
+            self.rot_rew[:] = 0
 
             # Rot shaping
             joint_dist_to_90_rot = get_line_angle_dist_0_180(
diff --git a/vmas/scenarios/debug/circle_trajectory.py b/vmas/scenarios/debug/circle_trajectory.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict
@@ -53,6 +53,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         )
         world.add_agent(self.agent)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device)
+        self.dot_product = self.pos_rew.clone()
+
         return world
 
     def process_action(self, agent: Agent):
diff --git a/vmas/scenarios/debug/goal.py b/vmas/scenarios/debug/goal.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import math
@@ -77,8 +77,12 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
             agent, world, controller_params, "standard"
         )
         agent.goal = self.goal
+        agent.energy_rew = torch.zeros(batch_dim, device=device)
         world.add_agent(agent)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device)
+        self.time_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -189,8 +193,8 @@ def reward(self, agent: Agent):
         is_first = agent == self.world.agents[0]
 
         if is_first:
-            self.pos_rew = torch.zeros(self.world.batch_dim, device=self.world.device)
-            self.time_rew = torch.zeros(self.world.batch_dim, device=self.world.device)
+            self.pos_rew[:] = 0
+            self.time_rew[:] = 0
 
             # Pos shaping
             goal_dist = torch.stack(
diff --git a/vmas/scenarios/debug/het_mass.py b/vmas/scenarios/debug/het_mass.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import math
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 from torch import Tensor
+
 from vmas import render_interactively
 from vmas.simulator.core import Agent, World
 from vmas.simulator.scenario import BaseScenario
@@ -37,6 +38,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         )
         world.add_agent(self.blue_agent)
 
+        self.max_speed = torch.zeros(batch_dim, device=device)
+        self.energy_expenditure = self.max_speed.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
diff --git a/vmas/scenarios/debug/line_trajectory.py b/vmas/scenarios/debug/line_trajectory.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict
@@ -39,6 +39,10 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.tangent = torch.zeros((world.batch_dim, world.dim_p), device=world.device)
         self.tangent[:, Y] = 1
 
+        self.pos_rew = torch.zeros(batch_dim, device=device)
+        self.dot_product = self.pos_rew.clone()
+        self.steady_rew = self.pos_rew.clone()
+
         return world
 
     def process_action(self, agent: Agent):
diff --git a/vmas/scenarios/debug/vel_control.py b/vmas/scenarios/debug/vel_control.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict
@@ -84,6 +84,8 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.landmark = Landmark("landmark 0", collide=False, movable=True)
         world.add_landmark(self.landmark)
 
+        self.energy_expenditure = torch.zeros(batch_dim, device=device)
+
         return world
 
     def reset_world_at(self, env_index: int = None):