Rewards init moved to make_world in main scenarios

matteobettini · matteobettini · commit e0a2c05d0008 · 2023-01-02T18:32:15.000+01:00
diff --git a/vmas/scenarios/balance.py b/vmas/scenarios/balance.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 
@@ -72,6 +72,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         )
         world.add_landmark(floor)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device, dtype=torch.float32)
+        self.ground_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -201,12 +204,8 @@ def reward(self, agent: Agent):
         is_first = agent == self.world.agents[0]
 
         if is_first:
-            self.pos_rew = torch.zeros(
-                self.world.batch_dim, device=self.world.device, dtype=torch.float32
-            )
-            self.ground_rew = torch.zeros(
-                self.world.batch_dim, device=self.world.device, dtype=torch.float32
-            )
+            self.pos_rew[:] = 0
+            self.ground_rew[:] = 0
 
             self.on_the_ground = (
                 self.package.state.pos[:, Y] <= -self.world.y_semidim
diff --git a/vmas/scenarios/ball_passage.py b/vmas/scenarios/ball_passage.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 
@@ -73,6 +73,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
 
         self.create_passage_map(world)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device, dtype=torch.float32)
+        self.collision_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -213,8 +216,8 @@ def reward(self, agent: Agent):
             self.rew = torch.zeros(
                 self.world.batch_dim, device=self.world.device, dtype=torch.float32
             )
-            self.pos_rew = self.rew.clone()
-            self.collision_rew = self.rew.clone()
+            self.pos_rew[:] = 0
+            self.collision_rew[:] = 0
 
             ball_passed = self.ball.state.pos[:, Y] > 0
 
@@ -328,32 +331,35 @@ def removed(i):
     def spawn_passage_map(self, env_index):
         if not self.fixed_passage:
             order = torch.randperm(len(self.passages)).tolist()
-            self.passages = [self.passages[i] for i in order]
-        for i, passage in enumerate(self.passages):
+            self.passages_to_place = [self.passages[i] for i in order]
+        else:
+            self.passages_to_place = self.passages
+        for i, passage in enumerate(self.passages_to_place):
             if not passage.collide:
                 passage.is_rendering[:] = False
             passage.neighbour = False
             try:
-                passage.neighbour += not self.passages[i - 1].collide
+                passage.neighbour += not self.passages_to_place[i - 1].collide
             except IndexError:
                 pass
             try:
-                passage.neighbour += not self.passages[i + 1].collide
+                passage.neighbour += not self.passages_to_place[i + 1].collide
             except IndexError:
                 pass
+            pos = torch.tensor(
+                [
+                    -1
+                    - self.agent_radius
+                    + self.passage_length / 2
+                    + self.passage_length * i,
+                    0.0,
+                ],
+                dtype=torch.float32,
+                device=self.world.device,
+            )
             passage.neighbour *= passage.collide
             passage.set_pos(
-                torch.tensor(
-                    [
-                        -1
-                        - self.agent_radius
-                        + self.passage_length / 2
-                        + self.passage_length * i,
-                        0.0,
-                    ],
-                    dtype=torch.float32,
-                    device=self.world.device,
-                ),
+                pos,
                 batch_index=env_index,
             )
 
diff --git a/vmas/scenarios/ball_trajectory.py b/vmas/scenarios/ball_trajectory.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict
@@ -72,6 +72,10 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
                 )
                 world.add_joint(self.joints[i])
 
+        self.pos_rew = torch.zeros(batch_dim, device=device, dtype=torch.float32)
+        self.speed_rew = self.pos_rew.clone()
+        self.dist_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
diff --git a/vmas/scenarios/buzz_wire.py b/vmas/scenarios/buzz_wire.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict
@@ -90,6 +90,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
 
         self.build_path_line(world)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device, dtype=torch.float32)
+        self.collision_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -204,8 +207,8 @@ def reward(self, agent: Agent):
             self.rew = torch.zeros(
                 self.world.batch_dim, device=self.world.device, dtype=torch.float32
             )
-            self.pos_rew = self.rew.clone()
-            self.collision_rew = self.rew.clone()
+            self.pos_rew[:] = 0
+            self.collision_rew[:] = 0
             self.collided = torch.full(
                 (self.world.batch_dim,), False, device=self.world.device
             )
diff --git a/vmas/scenarios/discovery.py b/vmas/scenarios/discovery.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 
@@ -85,6 +85,8 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
                     ),
                 ],
             )
+            agent.collision_rew = torch.zeros(batch_dim, device=device)
+            agent.covering_reward = agent.collision_rew.clone()
             world.add_agent(agent)
 
         self._targets = []
@@ -99,6 +101,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
             world.add_landmark(target)
             self._targets.append(target)
 
+        self.covered_targets = torch.zeros(batch_dim, self.n_targets, device=device)
+        self.shared_covering_rew = torch.zeros(batch_dim, device=device)
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -140,17 +145,13 @@ def reward(self, agent: Agent):
             )
             self.covered_targets = self.agents_per_target >= self._agents_per_target
 
-            self.shared_covering_rew = torch.zeros(
-                self.world.batch_dim, device=self.world.device
-            )
+            self.shared_covering_rew[:] = 0
             for a in self.world.agents:
                 self.shared_covering_rew += self.agent_reward(a)
             self.shared_covering_rew[self.shared_covering_rew != 0] /= 2
 
         # Avoid collisions with each other
-        agent.collision_rew = torch.zeros(
-            self.world.batch_dim, device=self.world.device
-        )
+        agent.collision_rew[:] = 0
         for a in self.world.agents:
             if a != agent:
                 agent.collision_rew[
@@ -206,9 +207,7 @@ def get_outside_pos(self, env_index):
     def agent_reward(self, agent):
         agent_index = self.world.agents.index(agent)
 
-        agent.covering_reward = torch.zeros(
-            self.world.batch_dim, device=self.world.device
-        )
+        agent.covering_reward[:] = 0
         targets_covered_by_agent = (
             self.agents_targets_dists[:, agent_index] < self._covering_range
         )
diff --git a/vmas/scenarios/dropout.py b/vmas/scenarios/dropout.py
@@ -1,11 +1,12 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import math
 from typing import Dict
 
 import torch
 from torch import Tensor
+
 from vmas import render_interactively
 from vmas.simulator.core import Agent, Landmark, Sphere, World
 from vmas.simulator.scenario import BaseScenario
@@ -37,6 +38,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         )
         world.add_landmark(goal)
 
+        self.pos_rew = torch.zeros(batch_dim, device=device)
+        self.energy_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -98,7 +102,7 @@ def reward(self, agent: Agent):
                 dim=-1,
             )
 
-        self.pos_rew = torch.zeros(self.world.batch_dim, device=self.world.device)
+        self.pos_rew[:] = 0
         self.pos_rew[self.any_eaten * ~self.world.landmarks[0].eaten] = 1
 
         if is_last:
@@ -132,11 +136,7 @@ def observation(self, agent: Agent):
         )
 
     def info(self, agent: Agent) -> Dict[str, Tensor]:
-        try:
-            info = {"pos_rew": self.pos_rew, "energy_rew": self.energy_rew}
-        # When reset is called before reward()
-        except AttributeError:
-            info = {}
+        info = {"pos_rew": self.pos_rew, "energy_rew": self.energy_rew}
         return info
 
     def done(self):
diff --git a/vmas/scenarios/flocking.py b/vmas/scenarios/flocking.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 from typing import Dict, Callable
@@ -61,6 +61,11 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         )
         world.add_landmark(self._target)
 
+        self.collision_rew = torch.zeros(batch_dim, device=device)
+        self.velocity_rew = self.collision_rew.clone()
+        self.separation_rew = self.collision_rew.clone()
+        self.cohesion_rew = self.collision_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -75,7 +80,7 @@ def reset_world_at(self, env_index: int = None):
 
     def reward(self, agent: Agent):
         # Avoid collisions with each other
-        self.collision_rew = torch.zeros(self.world.batch_dim, device=self.world.device)
+        self.collision_rew[:] = 0
         for a in self.world.agents:
             if a != agent:
                 self.collision_rew[self.world.is_overlapping(a, agent)] -= 1.0
@@ -112,16 +117,14 @@ def observation(self, agent: Agent):
         )
 
     def info(self, agent: Agent) -> Dict[str, Tensor]:
-        try:
-            info = {
-                "collision_rew": self.collision_rew,
-                "velocity_rew": self.velocity_rew,
-                "separation_rew": self.separation_rew,
-                "cohesion_rew": self.cohesion_rew,
-            }
-        # When reset is called before reward()
-        except AttributeError:
-            info = {}
+
+        info = {
+            "collision_rew": self.collision_rew,
+            "velocity_rew": self.velocity_rew,
+            "separation_rew": self.separation_rew,
+            "cohesion_rew": self.cohesion_rew,
+        }
+
         return info
 
 
diff --git a/vmas/scenarios/give_way.py b/vmas/scenarios/give_way.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022.
+#  Copyright (c) 2022-2023.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import math
@@ -121,6 +121,14 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
 
         self.spawn_map(world)
 
+        for agent in world.agents:
+            agent.energy_rew = torch.zeros(batch_dim, device=device)
+            agent.agent_collision_rew = agent.energy_rew.clone()
+            agent.obstacle_collision_rew = agent.agent_collision_rew.clone()
+
+        self.pos_rew = torch.zeros(batch_dim, device=device)
+        self.final_rew = self.pos_rew.clone()
+
         return world
 
     def reset_world_at(self, env_index: int = None):
@@ -222,10 +230,8 @@ def reward(self, agent: Agent):
         green_agent = self.world.agents[-1]
 
         if is_first:
-            self.pos_rew = torch.zeros(
-                self.world.batch_dim, device=self.world.device, dtype=torch.float32
-            )
-            self.final_rew = torch.zeros(self.world.batch_dim, device=self.world.device)
+            self.pos_rew[:] = 0
+            self.final_rew[:] = 0
 
             self.blue_distance = torch.linalg.vector_norm(
                 blue_agent.state.pos - blue_agent.goal.state.pos,
@@ -253,12 +259,8 @@ def reward(self, agent: Agent):
             self.final_rew[self.goal_reached] = self.final_reward
             self.reached_goal += self.goal_reached
 
-        agent.agent_collision_rew = torch.zeros(
-            (self.world.batch_dim,), device=self.world.device
-        )
-        agent.obstacle_collision_rew = torch.zeros(
-            (self.world.batch_dim,), device=self.world.device
-        )
+        agent.agent_collision_rew[:] = 0
+        agent.obstacle_collision_rew[:] = 0
         for a in self.world.agents:
             if a != agent:
                 agent.agent_collision_rew[
diff --git a/vmas/scenarios/joint_passage.py b/vmas/scenarios/joint_passage.py
diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py
diff --git a/vmas/scenarios/multi_give_way.py b/vmas/scenarios/multi_give_way.py
diff --git a/vmas/scenarios/navigation.py b/vmas/scenarios/navigation.py