GT-STAR-Lab · piercehowell · Dec 26, 2023 · Dec 27, 2023 · Jan 2, 2024 · Jan 10, 2024
diff --git a/README.md b/README.md
@@ -402,15 +402,16 @@ To create a fake screen you need to have `Xvfb` installed.
 
 ## TODOS
 
-- [ ] Improve VMAS performance
-- [X] Dict obs support in torchrl
-- [X] Make TextLine a Geom usable in a scenario
-- [ ] Implement 2D birds eye view camera sensor
-- [X] Notebook on how to use torch rl with vmas
 - [ ] Reset any number of dimensions
 - [ ] Improve test efficiency and add new tests
 - [ ] Implement 1D camera sensor
-- [ ] Allow any number of actions
+- [ ] Implement 2D birds eye view camera sensor
+- [ ] Implement 2D drone dynamics
+- [X] Allow any number of actions
+- [X] Improve VMAS performance
+- [X] Dict obs support in torchrl
+- [X] Make TextLine a Geom usable in a scenario
+- [X] Notebook on how to use torch rl with vmas
 - [X] Allow dict obs spaces and multidim obs
 - [X] Talk about action preprocessing and velocity controller
 - [X] New envs from joint project with their descriptions

diff --git a/setup.py b/setup.py
@@ -1,12 +1,12 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 
 from setuptools import setup, find_packages
 
 setup(
     name="vmas",
-    version="1.3.3",
+    version="1.3.4",
     description="Vectorized Multi-Agent Simulator",
     url="https://github.com/proroklab/VectorizedMultiAgentSimulator",
     license="GPLv3",

diff --git a/test_waypoint_tracker.py b/test_waypoint_tracker.py
@@ -0,0 +1,78 @@
+from vmas import make_env
+import numpy as np
+import torch
+
+from vmas.simulator.utils import save_video
+
+
+DEVICE="cuda"
+SEED=1
+
+env = make_env(
+    scenario="navigation",
+    num_envs=1,
+    max_steps=200,
+    device=DEVICE,
+    continuous_actions=True,
+    wrapper=None,
+    seed=SEED,
+    # Environment specific variables
+    n_agents=1,
+)
+
+obs = env.reset()
+frame_list = []
+for i in range(100):
+    if i == 0:
+        agent = env.agents[0]
+        agent.goal.state.pos = torch.tensor([[0.0, 1.0]])
+        agent.state.pos = torch.tensor([[0.0, 0.0]])
+        # agent.state.rot = torch.tensor([[0.0]]) # this doesn't work
+
+    # print(i)
+    # act once for each agent
+    act = []
+    for i, agent in enumerate(env.agents):
+
+        # find goal w.r.t robot frame (difference given in global frame)
+        # robot frame = FLU
+        rel_pose_to_goal = agent.goal.state.pos - agent.state.pos
+        goal_x_global = rel_pose_to_goal[:, 0]
+        goal_y_global = rel_pose_to_goal[:, 1]
+        agent_x_global = agent.state.pos[:, 0]
+        agent_y_global = agent.state.pos[:, 1]
+        theta_robot_to_global = -agent.state.rot
+
+        # print(f'({agent_x_global.item()}, {agent_y_global.item()})')
+        # print("agent state")
+        # print(agent_x_global, agent_y_global, theta_robot_to_global)
+        # print("rel goal global")
+        # print(goal_x_global, goal_y_global)
+
+        goal_x_robot = goal_x_global * torch.cos(theta_robot_to_global) - goal_y_global * torch.sin(theta_robot_to_global)
+        goal_y_robot = goal_x_global * torch.sin(theta_robot_to_global) + goal_y_global * torch.cos(theta_robot_to_global)
+
+        # print("goal_robot_coords", goal_x_robot, goal_y_robot)
+
+        to_goal = torch.cat([goal_x_robot, goal_y_robot], dim=1)
+        # print("to goal", to_goal)
+
+        action = to_goal
+        action = (to_goal / torch.linalg.norm(to_goal)) * 1e-1
+        # action = torch.tensor([[0.00, 0.01]])
+        print("raw action input", action)
+        act.append(action)
+
+    next, rew, done, info = env.step(act)
+
+    frame = env.render(
+        mode="rgb_array",
+        visualize_when_rgb=True,
+    )
+    frame_list.append(frame)
+
+save_video(
+    "test_waypoint_tracker",
+    frame_list,
+    fps=10,
+)
diff --git a/vmas/examples/use_vmas_env.py b/vmas/examples/use_vmas_env.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import random
@@ -11,45 +11,53 @@
 from vmas.simulator.utils import save_video
 
 
-def _get_random_action(agent: Agent, continuous: bool):
+def _get_random_action(agent: Agent, continuous: bool, env):
     if continuous:
-        action = torch.zeros(
-            (agent.batch_dim, 2),
-            device=agent.device,
-            dtype=torch.float32,
-        ).uniform_(
-            -agent.action.u_range,
-            agent.action.u_range,
-        )
-        if agent.u_rot_range > 0:
-            action = torch.cat(
-                [
-                    action,
+        actions = []
+        for action_index in range(agent.action_size):
+            actions.append(
+                torch.zeros(
+                    agent.batch_dim,
+                    device=agent.device,
+                    dtype=torch.float32,
+                ).uniform_(
+                    -agent.action.u_range_tensor[action_index],
+                    agent.action.u_range_tensor[action_index],
+                )
+            )
+        if env.world.dim_c != 0 and not agent.silent:
+            # If the agent needs to communicate
+            for _ in range(env.world.dim_c):
+                actions.append(
                     torch.zeros(
-                        (agent.batch_dim, 1),
+                        agent.batch_dim,
                         device=agent.device,
                         dtype=torch.float32,
                     ).uniform_(
-                        -agent.action.u_rot_range,
-                        agent.action.u_rot_range,
-                    ),
-                ],
-                dim=-1,
-            )
+                        0,
+                        1,
+                    )
+                )
+        action = torch.stack(actions, dim=-1)
     else:
         action = torch.randint(
-            low=0, high=5, size=(agent.batch_dim,), device=agent.device
+            low=0,
+            high=env.get_agent_action_space(agent).n,
+            size=(agent.batch_dim,),
+            device=agent.device,
+        )
+    return action
+
+
+def _get_deterministic_action(agent: Agent, continuous: bool, env):
+    if continuous:
+        action = -agent.action.u_range_tensor.expand(env.batch_dim, agent.action_size)
+    else:
+        action = (
+            torch.tensor([1], device=env.device, dtype=torch.long)
+            .unsqueeze(-1)
+            .expand(env.batch_dim, 1)
         )
-        if agent.u_rot_range > 0:
-            action = torch.stack(
-                [
-                    action,
-                    torch.randint(
-                        low=0, high=3, size=(agent.batch_dim,), device=agent.device
-                    ),
-                ],
-                dim=-1,
-            )
     return action
 
 
@@ -85,13 +93,6 @@ def use_vmas_env(
     dict_spaces = True  # Weather to return obs, rewards, and infos as dictionaries with agent names
     # (by default they are lists of len # of agents)
 
-    simple_2d_action = (
-        [0, -1.0] if continuous_actions else [3]
-    )  # Simple action for an agent with 2d actions
-    simple_3d_action = (
-        [0, -1.0, 0.1] if continuous_actions else [3, 1]
-    )  # Simple action for an agent with 3d actions (2d forces and torque)
-
     env = make_env(
         scenario=scenario_name,
         num_envs=num_envs,
@@ -120,12 +121,9 @@ def use_vmas_env(
         actions = {} if dict_actions else []
         for i, agent in enumerate(env.agents):
             if not random_action:
-                action = torch.tensor(
-                    simple_2d_action if agent.u_rot_range == 0 else simple_3d_action,
-                    device=device,
-                ).repeat(num_envs, 1)
+                action = _get_deterministic_action(agent, continuous_actions, env)
             else:
-                action = _get_random_action(agent, continuous_actions)
+                action = _get_random_action(agent, continuous_actions, env)
             if dict_actions:
                 actions.update({agent.name: action})
             else:
@@ -158,5 +156,5 @@ def use_vmas_env(
         render=True,
         save_render=False,
         random_action=False,
-        continuous_actions=True,
+        continuous_actions=False,
     )
diff --git a/vmas/interactive_rendering.py b/vmas/interactive_rendering.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 """
@@ -101,21 +101,14 @@ def _cycle(self):
                 self.reset = False
                 total_rew = [0] * self.n_agents
 
-            action_list = [
-                [0.0] * self.env.unwrapped().get_agent_action_size(agent)
-                for agent in self.agents
-            ]
+            action_list = [[0.0] * agent.action_size for agent in self.agents]
             action_list[self.current_agent_index] = self.u[
-                : self.env.unwrapped().get_agent_action_size(
-                    self.agents[self.current_agent_index]
-                )
+                : self.agents[self.current_agent_index].action_size
             ]
 
             if self.n_agents > 1 and self.control_two_agents:
                 action_list[self.current_agent_index2] = self.u2[
-                    : self.env.unwrapped().get_agent_action_size(
-                        self.agents[self.current_agent_index2]
-                    )
+                    : self.agents[self.current_agent_index2].action_size
                 ]
             obs, rew, done, info = self.env.step(action_list)
 
@@ -167,56 +160,60 @@ def _write_values(self, index: int, message: str):
     def _key_press(self, k, mod):
         from pyglet.window import key
 
-        agent_range = self.agents[self.current_agent_index].u_range
-        agent_rot_range = self.agents[self.current_agent_index].u_rot_range
+        agent_range = self.agents[self.current_agent_index].action.u_range_tensor
+        try:
+            if k == key.LEFT:
+                self.keys[0] = agent_range[0]
+            elif k == key.RIGHT:
+                self.keys[1] = agent_range[0]
+            elif k == key.DOWN:
+                self.keys[2] = agent_range[1]
+            elif k == key.UP:
+                self.keys[3] = agent_range[1]
+            elif k == key.M:
+                self.keys[4] = agent_range[2]
+            elif k == key.N:
+                self.keys[5] = agent_range[2]
+            elif k == key.TAB:
+                self.current_agent_index = self._increment_selected_agent_index(
+                    self.current_agent_index
+                )
+                if self.control_two_agents:
+                    while self.current_agent_index == self.current_agent_index2:
+                        self.current_agent_index = self._increment_selected_agent_index(
+                            self.current_agent_index
+                        )
 
-        if k == key.LEFT:
-            self.keys[0] = agent_range
-        elif k == key.RIGHT:
-            self.keys[1] = agent_range
-        elif k == key.DOWN:
-            self.keys[2] = agent_range
-        elif k == key.UP:
-            self.keys[3] = agent_range
-        elif k == key.M:
-            self.keys[4] = agent_rot_range
-        elif k == key.N:
-            self.keys[5] = agent_rot_range
-        elif k == key.TAB:
-            self.current_agent_index = self._increment_selected_agent_index(
-                self.current_agent_index
-            )
             if self.control_two_agents:
-                while self.current_agent_index == self.current_agent_index2:
-                    self.current_agent_index = self._increment_selected_agent_index(
-                        self.current_agent_index
-                    )
-
-        if self.control_two_agents:
-            agent2_range = self.agents[self.current_agent_index2].u_range
-            agent2_rot_range = self.agents[self.current_agent_index2].u_rot_range
-
-            if k == key.A:
-                self.keys2[0] = agent2_range
-            elif k == key.D:
-                self.keys2[1] = agent2_range
-            elif k == key.S:
-                self.keys2[2] = agent2_range
-            elif k == key.W:
-                self.keys2[3] = agent2_range
-            elif k == key.E:
-                self.keys2[4] = agent2_rot_range
-            elif k == key.Q:
-                self.keys2[5] = agent2_rot_range
-
-            elif k == key.LSHIFT:
-                self.current_agent_index2 = self._increment_selected_agent_index(
+                agent2_range = self.agents[
                     self.current_agent_index2
-                )
-                while self.current_agent_index == self.current_agent_index2:
+                ].action.u_range_tensor
+
+                if k == key.A:
+                    self.keys2[0] = agent2_range[0]
+                elif k == key.D:
+                    self.keys2[1] = agent2_range[0]
+                elif k == key.S:
+                    self.keys2[2] = agent2_range[1]
+                elif k == key.W:
+                    self.keys2[3] = agent2_range[1]
+                elif k == key.E:
+                    self.keys2[4] = agent2_range[2]
+                elif k == key.Q:
+                    self.keys2[5] = agent2_range[2]
+
+                elif k == key.LSHIFT:
                     self.current_agent_index2 = self._increment_selected_agent_index(
                         self.current_agent_index2
                     )
+                    while self.current_agent_index == self.current_agent_index2:
+                        self.current_agent_index2 = (
+                            self._increment_selected_agent_index(
+                                self.current_agent_index2
+                            )
+                        )
+        except IndexError:
+            print("Action not available")
 
         if k == key.R:
             self.reset = True