diff --git a/README.md b/README.md index 7e802b39..173cdabb 100644 --- a/README.md +++ b/README.md @@ -402,15 +402,16 @@ To create a fake screen you need to have `Xvfb` installed. ## TODOS -- [ ] Improve VMAS performance -- [X] Dict obs support in torchrl -- [X] Make TextLine a Geom usable in a scenario -- [ ] Implement 2D birds eye view camera sensor -- [X] Notebook on how to use torch rl with vmas - [ ] Reset any number of dimensions - [ ] Improve test efficiency and add new tests - [ ] Implement 1D camera sensor -- [ ] Allow any number of actions +- [ ] Implement 2D birds eye view camera sensor +- [ ] Implement 2D drone dynamics +- [X] Allow any number of actions +- [X] Improve VMAS performance +- [X] Dict obs support in torchrl +- [X] Make TextLine a Geom usable in a scenario +- [X] Notebook on how to use torch rl with vmas - [X] Allow dict obs spaces and multidim obs - [X] Talk about action preprocessing and velocity controller - [X] New envs from joint project with their descriptions diff --git a/setup.py b/setup.py index 3ae282d1..1af56c5f 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -6,7 +6,7 @@ setup( name="vmas", - version="1.3.3", + version="1.3.4", description="Vectorized Multi-Agent Simulator", url="https://github.com/proroklab/VectorizedMultiAgentSimulator", license="GPLv3", diff --git a/test_waypoint_tracker.py b/test_waypoint_tracker.py new file mode 100644 index 00000000..b051ad15 --- /dev/null +++ b/test_waypoint_tracker.py @@ -0,0 +1,78 @@ +from vmas import make_env +import numpy as np +import torch + +from vmas.simulator.utils import save_video + + +DEVICE="cuda" +SEED=1 + +env = make_env( + scenario="navigation", + num_envs=1, + max_steps=200, + device=DEVICE, + continuous_actions=True, + wrapper=None, + seed=SEED, + # Environment specific variables + n_agents=1, +) + +obs = env.reset() +frame_list = [] +for i in range(100): + if i == 0: + agent = env.agents[0] + agent.goal.state.pos = torch.tensor([[0.0, 1.0]]) + agent.state.pos = torch.tensor([[0.0, 0.0]]) + # agent.state.rot = torch.tensor([[0.0]]) # this doesn't work + + # print(i) + # act once for each agent + act = [] + for i, agent in enumerate(env.agents): + + # find goal w.r.t robot frame (difference given in global frame) + # robot frame = FLU + rel_pose_to_goal = agent.goal.state.pos - agent.state.pos + goal_x_global = rel_pose_to_goal[:, 0] + goal_y_global = rel_pose_to_goal[:, 1] + agent_x_global = agent.state.pos[:, 0] + agent_y_global = agent.state.pos[:, 1] + theta_robot_to_global = -agent.state.rot + + # print(f'({agent_x_global.item()}, {agent_y_global.item()})') + # print("agent state") + # print(agent_x_global, agent_y_global, theta_robot_to_global) + # print("rel goal global") + # print(goal_x_global, goal_y_global) + + goal_x_robot = goal_x_global * torch.cos(theta_robot_to_global) - goal_y_global * torch.sin(theta_robot_to_global) + goal_y_robot = goal_x_global * torch.sin(theta_robot_to_global) + goal_y_global * torch.cos(theta_robot_to_global) + + # print("goal_robot_coords", goal_x_robot, goal_y_robot) + + to_goal = torch.cat([goal_x_robot, goal_y_robot], dim=1) + # print("to goal", to_goal) + + action = to_goal + action = (to_goal / torch.linalg.norm(to_goal)) * 1e-1 + # action = torch.tensor([[0.00, 0.01]]) + print("raw action input", action) + act.append(action) + + next, rew, done, info = env.step(act) + + frame = env.render( + mode="rgb_array", + visualize_when_rgb=True, + ) + frame_list.append(frame) + +save_video( + "test_waypoint_tracker", + frame_list, + fps=10, +) diff --git a/vmas/examples/use_vmas_env.py b/vmas/examples/use_vmas_env.py index 0bf621fb..507baed8 100644 --- a/vmas/examples/use_vmas_env.py +++ b/vmas/examples/use_vmas_env.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import random @@ -11,45 +11,53 @@ from vmas.simulator.utils import save_video -def _get_random_action(agent: Agent, continuous: bool): +def _get_random_action(agent: Agent, continuous: bool, env): if continuous: - action = torch.zeros( - (agent.batch_dim, 2), - device=agent.device, - dtype=torch.float32, - ).uniform_( - -agent.action.u_range, - agent.action.u_range, - ) - if agent.u_rot_range > 0: - action = torch.cat( - [ - action, + actions = [] + for action_index in range(agent.action_size): + actions.append( + torch.zeros( + agent.batch_dim, + device=agent.device, + dtype=torch.float32, + ).uniform_( + -agent.action.u_range_tensor[action_index], + agent.action.u_range_tensor[action_index], + ) + ) + if env.world.dim_c != 0 and not agent.silent: + # If the agent needs to communicate + for _ in range(env.world.dim_c): + actions.append( torch.zeros( - (agent.batch_dim, 1), + agent.batch_dim, device=agent.device, dtype=torch.float32, ).uniform_( - -agent.action.u_rot_range, - agent.action.u_rot_range, - ), - ], - dim=-1, - ) + 0, + 1, + ) + ) + action = torch.stack(actions, dim=-1) else: action = torch.randint( - low=0, high=5, size=(agent.batch_dim,), device=agent.device + low=0, + high=env.get_agent_action_space(agent).n, + size=(agent.batch_dim,), + device=agent.device, + ) + return action + + +def _get_deterministic_action(agent: Agent, continuous: bool, env): + if continuous: + action = -agent.action.u_range_tensor.expand(env.batch_dim, agent.action_size) + else: + action = ( + torch.tensor([1], device=env.device, dtype=torch.long) + .unsqueeze(-1) + .expand(env.batch_dim, 1) ) - if agent.u_rot_range > 0: - action = torch.stack( - [ - action, - torch.randint( - low=0, high=3, size=(agent.batch_dim,), device=agent.device - ), - ], - dim=-1, - ) return action @@ -85,13 +93,6 @@ def use_vmas_env( dict_spaces = True # Weather to return obs, rewards, and infos as dictionaries with agent names # (by default they are lists of len # of agents) - simple_2d_action = ( - [0, -1.0] if continuous_actions else [3] - ) # Simple action for an agent with 2d actions - simple_3d_action = ( - [0, -1.0, 0.1] if continuous_actions else [3, 1] - ) # Simple action for an agent with 3d actions (2d forces and torque) - env = make_env( scenario=scenario_name, num_envs=num_envs, @@ -120,12 +121,9 @@ def use_vmas_env( actions = {} if dict_actions else [] for i, agent in enumerate(env.agents): if not random_action: - action = torch.tensor( - simple_2d_action if agent.u_rot_range == 0 else simple_3d_action, - device=device, - ).repeat(num_envs, 1) + action = _get_deterministic_action(agent, continuous_actions, env) else: - action = _get_random_action(agent, continuous_actions) + action = _get_random_action(agent, continuous_actions, env) if dict_actions: actions.update({agent.name: action}) else: @@ -158,5 +156,5 @@ def use_vmas_env( render=True, save_render=False, random_action=False, - continuous_actions=True, + continuous_actions=False, ) diff --git a/vmas/interactive_rendering.py b/vmas/interactive_rendering.py index 2b456d41..8523efd9 100644 --- a/vmas/interactive_rendering.py +++ b/vmas/interactive_rendering.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. """ @@ -101,21 +101,14 @@ def _cycle(self): self.reset = False total_rew = [0] * self.n_agents - action_list = [ - [0.0] * self.env.unwrapped().get_agent_action_size(agent) - for agent in self.agents - ] + action_list = [[0.0] * agent.action_size for agent in self.agents] action_list[self.current_agent_index] = self.u[ - : self.env.unwrapped().get_agent_action_size( - self.agents[self.current_agent_index] - ) + : self.agents[self.current_agent_index].action_size ] if self.n_agents > 1 and self.control_two_agents: action_list[self.current_agent_index2] = self.u2[ - : self.env.unwrapped().get_agent_action_size( - self.agents[self.current_agent_index2] - ) + : self.agents[self.current_agent_index2].action_size ] obs, rew, done, info = self.env.step(action_list) @@ -167,56 +160,60 @@ def _write_values(self, index: int, message: str): def _key_press(self, k, mod): from pyglet.window import key - agent_range = self.agents[self.current_agent_index].u_range - agent_rot_range = self.agents[self.current_agent_index].u_rot_range + agent_range = self.agents[self.current_agent_index].action.u_range_tensor + try: + if k == key.LEFT: + self.keys[0] = agent_range[0] + elif k == key.RIGHT: + self.keys[1] = agent_range[0] + elif k == key.DOWN: + self.keys[2] = agent_range[1] + elif k == key.UP: + self.keys[3] = agent_range[1] + elif k == key.M: + self.keys[4] = agent_range[2] + elif k == key.N: + self.keys[5] = agent_range[2] + elif k == key.TAB: + self.current_agent_index = self._increment_selected_agent_index( + self.current_agent_index + ) + if self.control_two_agents: + while self.current_agent_index == self.current_agent_index2: + self.current_agent_index = self._increment_selected_agent_index( + self.current_agent_index + ) - if k == key.LEFT: - self.keys[0] = agent_range - elif k == key.RIGHT: - self.keys[1] = agent_range - elif k == key.DOWN: - self.keys[2] = agent_range - elif k == key.UP: - self.keys[3] = agent_range - elif k == key.M: - self.keys[4] = agent_rot_range - elif k == key.N: - self.keys[5] = agent_rot_range - elif k == key.TAB: - self.current_agent_index = self._increment_selected_agent_index( - self.current_agent_index - ) if self.control_two_agents: - while self.current_agent_index == self.current_agent_index2: - self.current_agent_index = self._increment_selected_agent_index( - self.current_agent_index - ) - - if self.control_two_agents: - agent2_range = self.agents[self.current_agent_index2].u_range - agent2_rot_range = self.agents[self.current_agent_index2].u_rot_range - - if k == key.A: - self.keys2[0] = agent2_range - elif k == key.D: - self.keys2[1] = agent2_range - elif k == key.S: - self.keys2[2] = agent2_range - elif k == key.W: - self.keys2[3] = agent2_range - elif k == key.E: - self.keys2[4] = agent2_rot_range - elif k == key.Q: - self.keys2[5] = agent2_rot_range - - elif k == key.LSHIFT: - self.current_agent_index2 = self._increment_selected_agent_index( + agent2_range = self.agents[ self.current_agent_index2 - ) - while self.current_agent_index == self.current_agent_index2: + ].action.u_range_tensor + + if k == key.A: + self.keys2[0] = agent2_range[0] + elif k == key.D: + self.keys2[1] = agent2_range[0] + elif k == key.S: + self.keys2[2] = agent2_range[1] + elif k == key.W: + self.keys2[3] = agent2_range[1] + elif k == key.E: + self.keys2[4] = agent2_range[2] + elif k == key.Q: + self.keys2[5] = agent2_range[2] + + elif k == key.LSHIFT: self.current_agent_index2 = self._increment_selected_agent_index( self.current_agent_index2 ) + while self.current_agent_index == self.current_agent_index2: + self.current_agent_index2 = ( + self._increment_selected_agent_index( + self.current_agent_index2 + ) + ) + except IndexError: + print("Action not available") if k == key.R: self.reset = True diff --git a/vmas/make_env.py b/vmas/make_env.py index 24e3c971..0ef93f78 100644 --- a/vmas/make_env.py +++ b/vmas/make_env.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -22,6 +22,8 @@ def make_env( max_steps: Optional[int] = None, seed: Optional[int] = None, dict_spaces: bool = False, + multidiscrete_actions: bool = False, + clamp_actions: bool = False, **kwargs, ): """ @@ -35,7 +37,12 @@ def make_env( max_steps: Maximum number of steps in each vectorized environment after which done is returned seed: seed dict_spaces: Weather to use dictionary i/o spaces with format {agent_name: tensor} - for obs, rewards, and info instead of tuples. + for obs, rewards, and info instead of tuples. + multidiscrete_actions (bool): Whether to use multidiscrete_actions action spaces when continuous_actions=False. + Otherwise, (default) the action space will be Discrete, and it will be the cartesian product of the + action spaces of an agent. + clamp_actions: Weather to clamp input actions to the range instead of throwing + an error when continuous_actions is True and actions are out of bounds **kwargs (): Returns: @@ -55,6 +62,8 @@ def make_env( max_steps=max_steps, seed=seed, dict_spaces=dict_spaces, + multidiscrete_actions=multidiscrete_actions, + clamp_actions=clamp_actions, **kwargs, ) diff --git a/vmas/scenarios/debug/diff_drive.py b/vmas/scenarios/debug/diff_drive.py index 8546c941..75c76da5 100644 --- a/vmas/scenarios/debug/diff_drive.py +++ b/vmas/scenarios/debug/diff_drive.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import typing @@ -8,7 +8,8 @@ from vmas import render_interactively from vmas.simulator.core import Agent, World -from vmas.simulator.dynamics.diff_drive import DiffDriveDynamics +from vmas.simulator.dynamics.diff_drive import DiffDrive +from vmas.simulator.dynamics.holonomic_with_rot import HolonomicWithRotation from vmas.simulator.scenario import BaseScenario from vmas.simulator.utils import Color, ScenarioUtils @@ -39,16 +40,24 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): world = World(batch_dim, device, substeps=10) for i in range(self.n_agents): - agent = Agent( - name=f"agent_{i}", - collide=True, - render_action=True, - u_range=1, - u_rot_range=1, - u_rot_multiplier=0.001, - ) if i == 0: - agent.dynamics = DiffDriveDynamics(agent, world, integration="rk4") + agent = Agent( + name=f"diff_drive_{i}", + collide=True, + render_action=True, + u_range=[1, 1], + u_multiplier=[1, 0.001], + dynamics=DiffDrive(world, integration="rk4"), + ) + else: + agent = Agent( + name=f"holo_rot_{i}", + collide=True, + render_action=True, + u_range=[1, 1, 1], + u_multiplier=[1, 1, 0.001], + dynamics=HolonomicWithRotation(), + ) world.add_agent(agent) @@ -64,12 +73,6 @@ def reset_world_at(self, env_index: int = None): y_bounds=(-1, 1), ) - def process_action(self, agent: Agent): - try: - agent.dynamics.process_force() - except AttributeError: - pass - def reward(self, agent: Agent): return torch.zeros(self.world.batch_dim) diff --git a/vmas/scenarios/debug/kinematic_bicycle.py b/vmas/scenarios/debug/kinematic_bicycle.py index f5dc5b79..90c5c74e 100644 --- a/vmas/scenarios/debug/kinematic_bicycle.py +++ b/vmas/scenarios/debug/kinematic_bicycle.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024. +# ProrokLab (https://www.proroklab.org/) +# All rights reserved. + import typing from typing import List @@ -5,51 +9,63 @@ from vmas import render_interactively from vmas.simulator.core import Agent, World, Box -from vmas.simulator.dynamics.kinematic_bicycle import KinematicBicycleDynamics +from vmas.simulator.dynamics.holonomic_with_rot import HolonomicWithRotation +from vmas.simulator.dynamics.kinematic_bicycle import KinematicBicycle from vmas.simulator.scenario import BaseScenario from vmas.simulator.utils import Color, ScenarioUtils if typing.TYPE_CHECKING: from vmas.simulator.rendering import Geom + class Scenario(BaseScenario): def make_world(self, batch_dim: int, device: torch.device, **kwargs): """ Kinematic bicycle model example scenario """ self.n_agents = kwargs.get("n_agents", 2) - width = kwargs.get("width", 0.1) # Agent width - l_f = kwargs.get("l_f", 0.1) # Distance between the front axle and the center of gravity - l_r = kwargs.get("l_r", 0.1) # Distance between the rear axle and the center of gravity - max_steering_angle = kwargs.get("max_steering_angle", torch.deg2rad(torch.tensor(30.0))) + width = kwargs.get("width", 0.1) # Agent width + l_f = kwargs.get( + "l_f", 0.1 + ) # Distance between the front axle and the center of gravity + l_r = kwargs.get( + "l_r", 0.1 + ) # Distance between the rear axle and the center of gravity + max_steering_angle = kwargs.get( + "max_steering_angle", torch.deg2rad(torch.tensor(30.0)) + ) # Make world - world = World(batch_dim, device, substeps=10) + world = World(batch_dim, device, substeps=10, collision_force=500) for i in range(self.n_agents): if i == 0: # Use the kinematic bicycle model for the first agent agent = Agent( - name=f"agent_{i}", - shape=Box(length=l_f+l_r, width=width), - collide=False, # turn off since the check of box-box collisions is quite expensive currently + name=f"bicycle_{i}", + shape=Box(length=l_f + l_r, width=width), + collide=True, render_action=True, - u_range=1, - u_rot_range=max_steering_angle, - u_rot_multiplier=1, - ) - agent.dynamics = KinematicBicycleDynamics( - agent, world, width=width, l_f=l_f, l_r=l_r, max_steering_angle=max_steering_angle, integration="euler" # one of "euler", "rk4" + u_range=[1, max_steering_angle], + u_multiplier=[1, 1], + dynamics=KinematicBicycle( + world, + width=width, + l_f=l_f, + l_r=l_r, + max_steering_angle=max_steering_angle, + integration="euler", # one of "euler", "rk4" + ), ) else: agent = Agent( - name=f"agent_{i}", - shape=Box(length=l_f+l_r, width=width), - collide=False, + name=f"holo_rot_{i}", + shape=Box(length=l_f + l_r, width=width), + collide=True, render_action=True, - u_range=1, - u_rot_range=1, - u_rot_multiplier=0.001, + u_range=[1, 1, 1], + u_multiplier=[1, 1, 0.001], + dynamics=HolonomicWithRotation(), ) world.add_agent(agent) @@ -66,13 +82,6 @@ def reset_world_at(self, env_index: int = None): y_bounds=(-1, 1), ) - def process_action(self, agent: Agent): - if hasattr(agent, 'dynamics') and hasattr(agent.dynamics, 'process_force'): - agent.dynamics.process_force() - else: - # The agent does not have a dynamics property, or it does not have a process_force method - pass - def reward(self, agent: Agent): return torch.zeros(self.world.batch_dim) @@ -108,6 +117,14 @@ def extra_render(self, env_index: int = 0) -> "List[Geom]": return geoms + # ... and the code to run the simulation. if __name__ == "__main__": - render_interactively(__file__, control_two_agents=True, width=0.1, l_f=0.1, l_r=0.1, display_info=True) \ No newline at end of file + render_interactively( + __file__, + control_two_agents=True, + width=0.1, + l_f=0.1, + l_r=0.1, + display_info=True, + ) diff --git a/vmas/scenarios/dispersion.py b/vmas/scenarios/dispersion.py index d3610a99..58afed0c 100644 --- a/vmas/scenarios/dispersion.py +++ b/vmas/scenarios/dispersion.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -15,11 +15,14 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): n_agents = kwargs.get("n_agents", 4) self.share_reward = kwargs.get("share_reward", False) self.penalise_by_time = kwargs.get("penalise_by_time", False) - - n_food = n_agents + self.food_radius = kwargs.get("food_radius", 0.05) + self.pos_range = kwargs.get("pos_range", 1.0) + n_food = kwargs.get("n_food", n_agents) # Make world - world = World(batch_dim, device) + world = World( + batch_dim, device, x_semidim=self.pos_range, y_semidim=self.pos_range + ) # Add agents for i in range(n_agents): # Constraint: all agents have same action range and multiplier @@ -32,9 +35,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): # Add landmarks for i in range(n_food): food = Landmark( - name=f"food {i}", + name=f"food_{i}", collide=False, - shape=Sphere(radius=0.02), + shape=Sphere(radius=self.food_radius), color=Color.GREEN, ) world.add_landmark(food) @@ -58,8 +61,8 @@ def reset_world_at(self, env_index: int = None): device=self.world.device, dtype=torch.float32, ).uniform_( - -1.0, - 1.0, + -self.pos_range, + self.pos_range, ), batch_index=env_index, ) diff --git a/vmas/scenarios/dropout.py b/vmas/scenarios/dropout.py index 54a933e0..6d8333da 100644 --- a/vmas/scenarios/dropout.py +++ b/vmas/scenarios/dropout.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import math @@ -6,11 +6,10 @@ import torch from torch import Tensor - from vmas import render_interactively from vmas.simulator.core import Agent, Landmark, Sphere, World from vmas.simulator.scenario import BaseScenario -from vmas.simulator.utils import Color +from vmas.simulator.utils import Color, ScenarioUtils DEFAULT_ENERGY_COEFF = 0.02 @@ -21,19 +20,24 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.energy_coeff = kwargs.get( "energy_coeff", DEFAULT_ENERGY_COEFF ) # Weight of team energy penalty + self.start_same_point = kwargs.get("start_same_point", False) + self.agent_radius = 0.05 + self.goal_radius = 0.03 # Make world world = World(batch_dim, device) # Add agents for i in range(n_agents): # Constraint: all agents have same action range and multiplier - agent = Agent(name=f"agent_{i}", collide=False) + agent = Agent( + name=f"agent_{i}", collide=False, shape=Sphere(radius=self.agent_radius) + ) world.add_agent(agent) # Add landmarks goal = Landmark( name="goal", collide=False, - shape=Sphere(radius=0.03), + shape=Sphere(radius=self.goal_radius), color=Color.GREEN, ) world.add_landmark(goal) @@ -45,36 +49,42 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): return world def reset_world_at(self, env_index: int = None): - for agent in self.world.agents: - # Random pos between -1 and 1 - agent.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), + if self.start_same_point: + for agent in self.world.agents: + agent.set_pos( + torch.zeros( + (1, 2) if env_index is not None else (self.world.batch_dim, 2), + device=self.world.device, + dtype=torch.float, + ), + batch_index=env_index, + ) + ScenarioUtils.spawn_entities_randomly( + self.world.landmarks, + self.world, + env_index, + self.goal_radius + self.agent_radius + 0.01, + x_bounds=(-1, 1), + y_bounds=(-1, 1), + occupied_positions=torch.zeros( + 1 if env_index is not None else self.world.batch_dim, + 1, + 2, device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, + dtype=torch.float, ), - batch_index=env_index, ) - for landmark in self.world.landmarks: - # Random pos between -1 and 1 - landmark.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), - device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, - ), - batch_index=env_index, + else: + ScenarioUtils.spawn_entities_randomly( + self.world.policy_agents + self.world.landmarks, + self.world, + env_index, + self.goal_radius + self.agent_radius + 0.01, + x_bounds=(-1, 1), + y_bounds=(-1, 1), ) + + for landmark in self.world.landmarks: if env_index is None: landmark.eaten = torch.full( (self.world.batch_dim,), False, device=self.world.device diff --git a/vmas/scenarios/give_way.py b/vmas/scenarios/give_way.py index af1fb9bc..a1998601 100644 --- a/vmas/scenarios/give_way.py +++ b/vmas/scenarios/give_way.py @@ -1,10 +1,9 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import math import torch - from vmas import render_interactively from vmas.simulator.core import Agent, World, Landmark, Sphere, Line, Box from vmas.simulator.scenario import BaseScenario @@ -21,6 +20,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.linear_friction = kwargs.get("linear_friction", 0.1) self.mirror_passage = kwargs.get("mirror_passage", False) self.done_on_completion = kwargs.get("done_on_completion", False) + self.observe_rel_pos = kwargs.get("observe_rel_pos", False) # Reward params self.pos_shaping_factor = kwargs.get("pos_shaping_factor", 1.0) @@ -63,7 +63,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): # Add agents blue_agent = Agent( - name="blue_agent_0", + name="agent_0", rotatable=False, linear_friction=self.linear_friction, shape=Sphere(radius=self.agent_radius) @@ -79,7 +79,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): blue_agent, world, controller_params, "standard" ) blue_goal = Landmark( - name="blue goal", + name="goal_0", collide=False, shape=Sphere(radius=self.agent_radius / 2), color=Color.BLUE, @@ -89,7 +89,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): world.add_landmark(blue_goal) green_agent = Agent( - name="green_agent_0", + name="agent_1", color=Color.GREEN, linear_friction=self.linear_friction, shape=Sphere(radius=self.agent_radius) @@ -106,7 +106,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): green_agent, world, controller_params, "standard" ) green_goal = Landmark( - name="green goal", + name="goal_1", collide=False, shape=Sphere(radius=self.agent_radius / 2), color=Color.GREEN, @@ -302,11 +302,17 @@ def reward(self, agent: Agent): ) def observation(self, agent: Agent): + rel = [] + for a in self.world.agents: + if a != agent: + rel.append(agent.state.pos - a.state.pos) + observations = [ agent.state.pos, agent.state.vel, - agent.state.pos, ] + if self.observe_rel_pos: + observations += rel if self.obs_noise > 0: for i, obs in enumerate(observations): diff --git a/vmas/scenarios/mpe/simple_crypto.py b/vmas/scenarios/mpe/simple_crypto.py index ba53c23a..739457c3 100644 --- a/vmas/scenarios/mpe/simple_crypto.py +++ b/vmas/scenarios/mpe/simple_crypto.py @@ -181,10 +181,10 @@ def observation(self, agent: Agent): key, ], dim=-1, - ) + ).to(torch.float) # listener if not agent.speaker and not agent.adversary: - return torch.cat([key, *comm], dim=-1) + return torch.cat([key, *comm], dim=-1).to(torch.float) # adv if not agent.speaker and agent.adversary: - return torch.cat([*comm], dim=-1) + return torch.cat([*comm], dim=-1).to(torch.float) diff --git a/vmas/scenarios/mpe/simple_tag.py b/vmas/scenarios/mpe/simple_tag.py index 67ee8078..aa771711 100644 --- a/vmas/scenarios/mpe/simple_tag.py +++ b/vmas/scenarios/mpe/simple_tag.py @@ -1,23 +1,41 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import torch from vmas import render_interactively -from vmas.simulator.core import World, Agent, Landmark, Sphere +from vmas.simulator.core import World, Agent, Landmark, Sphere, Line from vmas.simulator.scenario import BaseScenario from vmas.simulator.utils import Color class Scenario(BaseScenario): def make_world(self, batch_dim: int, device: torch.device, **kwargs): - world = World(batch_dim=batch_dim, device=device, x_semidim=1, y_semidim=1) + num_good_agents = kwargs.get("num_good_agents", 1) + num_adversaries = kwargs.get("num_adversaries", 3) + num_landmarks = kwargs.get("num_landmarks", 2) + self.shape_agent_rew = kwargs.get("shape_agent_rew", False) + self.shape_adversary_rew = kwargs.get("shape_adversary_rew", False) + self.agents_share_rew = kwargs.get("agents_share_rew", False) + self.adversaries_share_rew = kwargs.get("adversaries_share_rew", True) + self.observe_same_team = kwargs.get("observe_same_team", True) + self.observe_pos = kwargs.get("observe_pos", True) + self.observe_vel = kwargs.get("observe_vel", True) + self.bound = kwargs.get("bound", 1.0) + self.respawn_at_catch = kwargs.get("respawn_at_catch", False) + + world = World( + batch_dim=batch_dim, + device=device, + x_semidim=self.bound, + y_semidim=self.bound, + substeps=10, + collision_force=500, + ) # set any world properties first - num_good_agents = 1 - num_adversaries = 3 num_agents = num_adversaries + num_good_agents - num_landmarks = 2 + self.adversary_radius = 0.075 # Add agents for i in range(num_agents): @@ -26,7 +44,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): agent = Agent( name=name, collide=True, - shape=Sphere(radius=0.075 if adversary else 0.05), + shape=Sphere(radius=self.adversary_radius if adversary else 0.05), u_multiplier=3.0 if adversary else 4.0, max_speed=1.0 if adversary else 1.3, color=Color.RED if adversary else Color.GREEN, @@ -55,8 +73,8 @@ def reset_world_at(self, env_index: int = None): device=self.world.device, dtype=torch.float32, ).uniform_( - -1.0, - 1.0, + -self.bound, + self.bound, ), batch_index=env_index, ) @@ -70,15 +88,15 @@ def reset_world_at(self, env_index: int = None): device=self.world.device, dtype=torch.float32, ).uniform_( - -0.9, - 0.9, + -(self.bound - 0.1), + self.bound - 0.1, ), batch_index=env_index, ) def is_collision(self, agent1: Agent, agent2: Agent): delta_pos = agent1.state.pos - agent2.state.pos - dist = torch.sqrt(torch.sum(torch.square(delta_pos), dim=-1)) + dist = torch.linalg.vector_norm(delta_pos, dim=-1) dist_min = agent1.shape.radius + agent2.shape.radius return dist < dist_min @@ -91,27 +109,52 @@ def adversaries(self): return [agent for agent in self.world.agents if agent.adversary] def reward(self, agent: Agent): - # Agents are rewarded based on minimum agent distance to each landmark - main_reward = ( - self.adversary_reward(agent) - if agent.adversary - else self.agent_reward(agent) - ) - return main_reward + is_first = agent == self.world.agents[0] + + if is_first: + for a in self.world.agents: + a.rew = ( + self.adversary_reward(a) if a.adversary else self.agent_reward(a) + ) + self.agents_rew = torch.stack( + [a.rew for a in self.good_agents()], dim=-1 + ).sum(-1) + self.adverary_rew = torch.stack( + [a.rew for a in self.adversaries()], dim=-1 + ).sum(-1) + if self.respawn_at_catch: + for a in self.good_agents(): + for adv in self.adversaries(): + coll = self.is_collision(a, adv) + a.state.pos[coll] = torch.zeros( + (self.world.batch_dim, self.world.dim_p), + device=self.world.device, + dtype=torch.float32, + ).uniform_(-self.bound, self.bound,)[coll] + a.state.vel[coll] = 0.0 + + if agent.adversary: + if self.adversaries_share_rew: + return self.adverary_rew + else: + return agent.rew + else: + if self.agents_share_rew: + return self.agents_rew + else: + return agent.rew def agent_reward(self, agent: Agent): # Agents are negatively rewarded if caught by adversaries rew = torch.zeros( self.world.batch_dim, device=self.world.device, dtype=torch.float32 ) - shape = False adversaries = self.adversaries() - if ( - shape - ): # reward can optionally be shaped (increased reward for increased distance from adversary) + if self.shape_agent_rew: + # reward can optionally be shaped (increased reward for increased distance from adversary) for adv in adversaries: - rew += 0.1 * torch.sqrt( - torch.sum(torch.square(agent.state.pos - adv.state.pos), dim=-1) + rew += 0.1 * torch.linalg.vector_norm( + agent.state.pos - adv.state.pos, dim=-1 ) if agent.collide: for a in adversaries: @@ -124,35 +167,29 @@ def adversary_reward(self, agent: Agent): rew = torch.zeros( self.world.batch_dim, device=self.world.device, dtype=torch.float32 ) - shape = False agents = self.good_agents() - adversaries = self.adversaries() if ( - shape + self.shape_adversary_rew ): # reward can optionally be shaped (decreased reward for increased distance from agents) - for adv in adversaries: - rew -= ( - 0.1 - * torch.min( - torch.stack( - [ - torch.sqrt( - torch.sum( - torch.square(a.state.pos - adv.state.pos), - dim=-1, - ) - ) - for a in agents - ], - dim=1, - ), + rew -= ( + 0.1 + * torch.min( + torch.stack( + [ + torch.linalg.vector_norm( + a.state.pos - agent.state.pos, + dim=-1, + ) + for a in agents + ], dim=-1, - )[0] - ) + ), + dim=-1, + )[0] + ) if agent.collide: for ag in agents: - for adv in adversaries: - rew[self.is_collision(ag, adv)] += 10 + rew[self.is_collision(ag, agent)] += 10 return rew def observation(self, agent: Agent): @@ -166,14 +203,66 @@ def observation(self, agent: Agent): for other in self.world.agents: if other is agent: continue - other_pos.append(other.state.pos - agent.state.pos) - if not other.adversary: + if agent.adversary and not other.adversary: + other_pos.append(other.state.pos - agent.state.pos) + other_vel.append(other.state.vel) + elif not agent.adversary and not other.adversary and self.observe_same_team: + other_pos.append(other.state.pos - agent.state.pos) other_vel.append(other.state.vel) + elif not agent.adversary and other.adversary: + other_pos.append(other.state.pos - agent.state.pos) + elif agent.adversary and other.adversary and self.observe_same_team: + other_pos.append(other.state.pos - agent.state.pos) + return torch.cat( - [agent.state.vel, agent.state.pos, *entity_pos, *other_pos, *other_vel], + [ + *([agent.state.vel] if self.observe_vel else []), + *([agent.state.pos] if self.observe_pos else []), + *entity_pos, + *other_pos, + *other_vel, + ], dim=-1, ) + def extra_render(self, env_index: int = 0): + from vmas.simulator import rendering + + geoms = [] + + # Perimeter + for i in range(4): + geom = Line( + length=2 + * ((self.bound - self.adversary_radius) + self.adversary_radius * 2) + ).get_geometry() + xform = rendering.Transform() + geom.add_attr(xform) + + xform.set_translation( + 0.0 + if i % 2 + else ( + self.bound + self.adversary_radius + if i == 0 + else -self.bound - self.adversary_radius + ), + 0.0 + if not i % 2 + else ( + self.bound + self.adversary_radius + if i == 1 + else -self.bound - self.adversary_radius + ), + ) + xform.set_rotation(torch.pi / 2 if not i % 2 else 0.0) + color = Color.BLACK.value + if isinstance(color, torch.Tensor) and len(color.shape) > 1: + color = color[env_index] + geom.set_color(*color) + geoms.append(geom) + return geoms + if __name__ == "__main__": render_interactively(__file__, control_two_agents=True) diff --git a/vmas/scenarios/navigation.py b/vmas/scenarios/navigation.py index c1f409f0..1f1fb928 100644 --- a/vmas/scenarios/navigation.py +++ b/vmas/scenarios/navigation.py @@ -7,21 +7,33 @@ import torch from torch import Tensor +import numpy as np + from vmas import render_interactively -from vmas.simulator.core import Agent, Landmark, World, Sphere, Entity + +from vmas.simulator.core import Agent, Landmark, World, Sphere, Box, Entity from vmas.simulator.heuristic_policy import BaseHeuristicPolicy from vmas.simulator.scenario import BaseScenario from vmas.simulator.sensors import Lidar -from vmas.simulator.utils import Color, ScenarioUtils +from vmas.simulator.utils import Color, ScenarioUtils, X, Y +from vmas.simulator.dynamics.waypoint_tracker import WaypointTracker +from vmas.simulator.controllers.velocity_controller import VelocityController +from vmas.simulator.dynamics.diff_drive import DiffDrive +from vmas.simulator.dynamics.holonomic import Holonomic +from vmas.simulator.dynamics.kinematic_bicycle import KinematicBicycle +from copy import deepcopy + if typing.TYPE_CHECKING: from vmas.simulator.rendering import Geom +DYNAMIC_MODELS = {'holonomic': Holonomic, + 'differential': DiffDrive, + 'bicycle': KinematicBicycle} class Scenario(BaseScenario): def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.plot_grid = False - self.n_agents = kwargs.get("n_agents", 4) self.collisions = kwargs.get("collisions", True) self.agents_with_same_goal = kwargs.get("agents_with_same_goal", 1) @@ -42,6 +54,22 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.world_semidim = 1 self.min_collision_distance = 0.005 + # Build all of the agents from a 'robots' file. Each robot shall have + # an ID (binary), a motion model, maximum speed (dynamics profile), and a + # geometry profile. The function should receive the robots file as a dictionary + self.n_agents = kwargs.get("n_agents", 3) + self.robots_file = kwargs.get("robots", + # default 3 agents + {'name': 'robots_0', # name of the robots file/pool + 'robots': + # list of robots in the robot pool + [ + {'id': "001", 'dynamics': 'bicycle'}, + {'id': "010", 'dynamics': 'differential'}, + {'id': "100", 'dynamics': 'holonomic'} + ] + }) + assert 1 <= self.agents_with_same_goal <= self.n_agents if self.agents_with_same_goal > 1: assert ( @@ -59,58 +87,103 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): # Make world world = World(batch_dim, device, substeps=2) - known_colors = [ - (0.22, 0.49, 0.72), - (1.00, 0.50, 0), - (0.30, 0.69, 0.29), - (0.97, 0.51, 0.75), - (0.60, 0.31, 0.64), - (0.89, 0.10, 0.11), - (0.87, 0.87, 0), - ] - colors = torch.randn( - (max(self.n_agents - len(known_colors), 0), 3), device=device - ) entity_filter_agents: Callable[[Entity], bool] = lambda e: isinstance(e, Agent) + # Set color to be based on kinematic model + # red = holonomic + # green = differential + # blue = bicycle. + motion_model_colors_hsl = { + 'holonomic': [1,0,0], + 'differential': [0,1,0], + 'bicycle': [0,0,1] + } + # Unpack the robot pool + self.robots = self.robots_file['robots'] # list of dictionaries + # Add agents - for i in range(self.n_agents): - color = ( - known_colors[i] - if i < len(known_colors) - else colors[i - len(known_colors)] - ) - - # Constraint: all agents have same action range and multiplier - agent = Agent( - name=f"agent_{i}", - collide=self.collisions, - color=color, - shape=Sphere(radius=self.agent_radius), - render_action=True, - sensors=[ - Lidar( - world, + self.agent_list = [] + for i, robot in enumerate(self.robots): + agent_id = robot['id'] + agent_dynamics = robot['dynamics'] + sensors=[Lidar(world, n_rays=12, max_range=self.lidar_range, - entity_filter=entity_filter_agents, - ), - ] - if self.collisions - else None, + entity_filter=entity_filter_agents)] + + color = motion_model_colors_hsl[agent_dynamics] + + if agent_dynamics == 'holonomic': + agent = Agent( + name=f"agent_holo-{agent_id}", + collide=True, + color=color, + shape=Sphere(0.1), + render_action=True, + u_range=[1, 1], + u_multiplier=[1, 0.001], + dynamics=Holonomic(), + sensors=sensors, + ) + # Holonomic dynamics by default have actions + # as force + controller_params = [0.2, 0.6, 0.0002] + agent.controller = VelocityController( + agent, world, controller_params, "standard" ) + + elif agent_dynamics == 'differential': + agent = Agent( + name=f"agent_diff-{agent_id}", + collide=True, + color=color, + shape=Sphere(0.1), + render_action=True, + u_range=[1, 1], + u_multiplier=[1, 0.001], + dynamics=DiffDrive(world, integration="rk4"), + sensors=sensors, + ) + elif agent_dynamics == 'bicycle': + # width, l_f, l_r = 0.1, 0.1, 0.1 + width, l_f, l_r = robot.get('width'), robot.get('l_f'), robot.get('l_r') + max_steering_angle = torch.deg2rad(torch.tensor(40.0)) + agent = Agent( + name=f"agent_bicycle-{agent_id}", + shape=Box(length=l_f + l_r, width=width), + collide=True, + color=color, + render_action=True, + u_range=[1, 1], + u_multiplier=[1, max_steering_angle], + dynamics=KinematicBicycle( + world, + width=width, + l_f=l_f, + l_r=l_r, + max_steering_angle=max_steering_angle, + integration="euler", # one of "euler", "rk4" + ), + sensors=sensors, + ) + else: + raise ValueError(f"Undefined agent dynamics {agent_dynamics}") + agent.pos_rew = torch.zeros(batch_dim, device=device) agent.agent_collision_rew = agent.pos_rew.clone() - world.add_agent(agent) + # Add goals goal = Landmark( - name=f"goal {i}", + name=f"goal_{agent_id}", collide=False, color=color, ) + world.add_landmark(goal) agent.goal = goal + world.add_agent(agent) + self.agent_list.append(agent) self.pos_rew = torch.zeros(batch_dim, device=device) self.final_rew = self.pos_rew.clone() @@ -145,7 +218,7 @@ def reset_world_at(self, env_index: int = None): ) goal_poses.append(position.squeeze(1)) occupied_positions = torch.cat([occupied_positions, position], dim=1) - + # print(self.world.agents) for i, agent in enumerate(self.world.agents): if self.split_goals: goal_index = int(i // self.agents_with_same_goal) @@ -225,6 +298,7 @@ def observation(self, agent: Agent): return torch.cat( [ agent.state.pos, + agent.state.rot, agent.state.vel, ] + goal_poses @@ -243,7 +317,7 @@ def done(self): agent.state.pos - agent.goal.state.pos, dim=-1, ) - < agent.shape.radius + < (agent.shape.radius if isinstance(agent.shape, Sphere) else agent.shape.width) for agent in self.world.agents ], dim=-1, @@ -318,6 +392,99 @@ def compute_action(self, observation: torch.Tensor, u_range: float) -> torch.Ten return action +class HeuristicPolicy(BaseHeuristicPolicy): + def __init__(self, clf_epsilon = 0.2, clf_slack = 100.0, *args, **kwargs): + super().__init__(*args, **kwargs) + self.clf_epsilon = clf_epsilon # Exponential CLF convergence rate + self.clf_slack = clf_slack # weights on CLF-QP slack variable + + def compute_action(self, observation: Tensor, u_range: Tensor) -> Tensor: + """ + QP inputs: + These values need to computed apriri based on observation before passing into QP + + V: Lyapunov function value + lfV: Lie derivative of Lyapunov function + lgV: Lie derivative of Lyapunov function + CLF_slack: CLF constraint slack variable + + QP outputs: + u: action + CLF_slack: CLF constraint slack variable, 0 if CLF constraint is satisfied + """ + # Install it with: pip install cvxpylayers + import cvxpy as cp + from cvxpylayers.torch import CvxpyLayer + + self.n_env = observation.shape[0] + self.device = observation.device + agent_pos = observation[:, :2] + agent_vel = observation[:, 2:4] + goal_pos = (-1.0) * (observation[:, 4:6] - agent_pos) + + # Pre-compute tensors for the CLF and CBF constraints, + # Lyapunov Function from: https://arxiv.org/pdf/1903.03692.pdf + + # Laypunov function + V_value = ( + (agent_pos[:, X] - goal_pos[:, X]) ** 2 + + 0.5 * (agent_pos[:, X] - goal_pos[:, X]) * agent_vel[:, X] + + agent_vel[:, X] ** 2 + + (agent_pos[:, Y] - goal_pos[:, Y]) ** 2 + + 0.5 * (agent_pos[:, Y] - goal_pos[:, Y]) * agent_vel[:, Y] + + agent_vel[:, Y] ** 2 + ) + + LfV_val = (2 * (agent_pos[:, X] - goal_pos[:, X]) + agent_vel[:, X]) * ( + agent_vel[:, X] + ) + (2 * (agent_pos[:, Y] - goal_pos[:, Y]) + agent_vel[:, Y]) * ( + agent_vel[:, Y] + ) + LgV_vals = torch.stack( + [ + 0.5 * (agent_pos[:, X] - goal_pos[:, X]) + 2 * agent_vel[:, X], + 0.5 * (agent_pos[:, Y] - goal_pos[:, Y]) + 2 * agent_vel[:, Y], + ], + dim=1, + ) + # Define Quadratic Program (QP) based controller + u = cp.Variable(2) + V_param = cp.Parameter(1) # Lyapunov Function: V(x): x -> R, dim: (1,1) + lfV_param = cp.Parameter(1) + lgV_params = cp.Parameter( + 2 + ) # Lie derivative of Lyapunov Function, dim: (1, action_dim) + clf_slack = cp.Variable(1) # CLF constraint slack variable, dim: (1,1) + + constraints = [] + + # QP Cost F = u^T @ u + clf_slack**2 + qp_objective = cp.Minimize(cp.sum_squares(u) + self.clf_slack * clf_slack**2) + + # control bounds between u_range + constraints += [u <= u_range] + constraints += [u >= -u_range] + # CLF constraint + constraints += [ + lfV_param + lgV_params @ u + self.clf_epsilon * V_param + clf_slack <= 0 + ] + + QP_problem = cp.Problem(qp_objective, constraints) + + # Initialize CVXPY layers + QP_controller = CvxpyLayer( + QP_problem, parameters=[V_param, lfV_param, lgV_params], variables=[u] + ) + + # Solve QP + CVXpylayer_parameters = [V_value.unsqueeze(1), LfV_val.unsqueeze(1), LgV_vals] + action = QP_controller(*CVXpylayer_parameters, solver_args={"max_iters": 500})[ + 0 + ] + + return action + + if __name__ == "__main__": render_interactively( __file__, diff --git a/vmas/scenarios/reverse_transport.py b/vmas/scenarios/reverse_transport.py index 005794af..26830066 100644 --- a/vmas/scenarios/reverse_transport.py +++ b/vmas/scenarios/reverse_transport.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -21,7 +21,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.shaping_factor = 100 # Make world - world = World(batch_dim, device, contact_margin=6e-3) + world = World( + batch_dim, device, contact_margin=6e-3, substeps=5, collision_force=500 + ) # Add agents for i in range(n_agents): agent = Agent(name=f"agent_{i}", shape=Sphere(0.03), u_multiplier=0.5) diff --git a/vmas/scenarios/sampling.py b/vmas/scenarios/sampling.py index 2dcfc894..37b21166 100644 --- a/vmas/scenarios/sampling.py +++ b/vmas/scenarios/sampling.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023. +# Copyright (c) 2023-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. from typing import Dict, Callable @@ -6,7 +6,6 @@ import torch from torch import Tensor from torch.distributions import MultivariateNormal - from vmas import render_interactively from vmas.simulator.core import World, Line, Agent, Sphere, Entity from vmas.simulator.scenario import BaseScenario @@ -27,11 +26,19 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.grid_spacing = kwargs.get("grid_spacing", 0.05) self.n_gaussians = kwargs.get("n_gaussians", 3) - self.cov = 0.05 + self.cov = kwargs.get("cov", 0.05) + self.collisions = kwargs.get("collisions", True) + self.spawn_same_pos = kwargs.get("spawn_same_pos", False) + self.norm = kwargs.get("norm", True) + assert not (self.spawn_same_pos and self.collisions) assert (self.xdim / self.grid_spacing) % 1 == 0 and ( self.ydim / self.grid_spacing ) % 1 == 0 + self.covs = ( + [self.cov] * self.n_gaussians if isinstance(self.cov, float) else self.cov + ) + assert len(self.covs) == self.n_gaussians self.plot_grid = False self.n_x_cells = int((2 * self.xdim) / self.grid_spacing) @@ -39,19 +46,24 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.max_pdf = torch.zeros((batch_dim,), device=device, dtype=torch.float32) self.alpha_plot: float = 0.5 + self.agent_xspawn_range = 0 if self.spawn_same_pos else self.xdim + self.agent_yspawn_range = 0 if self.spawn_same_pos else self.ydim + self.x_semidim = self.xdim - self.agent_radius + self.y_semidim = self.ydim - self.agent_radius + # Make world world = World( batch_dim, device, - x_semidim=self.xdim - self.agent_radius, - y_semidim=self.ydim - self.agent_radius, + x_semidim=self.x_semidim, + y_semidim=self.y_semidim, ) entity_filter_agents: Callable[[Entity], bool] = lambda e: isinstance(e, Agent) for i in range(self.n_agents): agent = Agent( name=f"agent_{i}", render_action=True, - collide=True, + collide=self.collisions, shape=Sphere(radius=self.agent_radius), sensors=[ Lidar( @@ -62,7 +74,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): max_range=self.lidar_range, entity_filter=entity_filter_agents, ), - ], + ] + if self.collisions + else None, ) world.add_agent(agent) @@ -77,9 +91,12 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): torch.zeros((batch_dim, world.dim_p), device=device, dtype=torch.float32) for _ in range(self.n_gaussians) ] - self.cov_matrix = torch.tensor( - [[self.cov, 0], [0, self.cov]], dtype=torch.float32, device=device - ).expand(batch_dim, world.dim_p, world.dim_p) + self.cov_matrices = [ + torch.tensor( + [[cov, 0], [0, cov]], dtype=torch.float32, device=device + ).expand(batch_dim, world.dim_p, world.dim_p) + for cov in self.covs + ] return world @@ -104,9 +121,9 @@ def reset_world_at(self, env_index: int = None): self.gaussians = [ MultivariateNormal( loc=loc, - covariance_matrix=self.cov_matrix, + covariance_matrix=cov_matrix, ) - for loc in self.locs + for loc, cov_matrix in zip(self.locs, self.cov_matrices) ] if env_index is None: @@ -127,20 +144,20 @@ def reset_world_at(self, env_index: int = None): else (self.world.batch_dim, 1), device=self.world.device, dtype=torch.float32, - ).uniform_(-self.xdim, self.xdim), + ).uniform_(-self.agent_xspawn_range, self.agent_xspawn_range), torch.zeros( (1, 1) if env_index is not None else (self.world.batch_dim, 1), device=self.world.device, dtype=torch.float32, - ).uniform_(-self.ydim, self.ydim), + ).uniform_(-self.agent_yspawn_range, self.agent_yspawn_range), ], dim=-1, ), batch_index=env_index, ) - agent.sample = self.sample(agent.state.pos) + agent.sample = self.sample(agent.state.pos, norm=self.norm) def sample( self, @@ -193,8 +210,8 @@ def sample_single_env( + (pos[:, Y] < -self.ydim) + (pos[:, Y] > self.ydim) ) - pos[:, X].clamp_(-self.world.x_semidim, self.world.x_semidim) - pos[:, Y].clamp_(-self.world.y_semidim, self.world.y_semidim) + pos[:, X].clamp_(-self.x_semidim, self.x_semidim) + pos[:, Y].clamp_(-self.y_semidim, self.y_semidim) index = pos / self.grid_spacing index[:, X] += self.n_x_cells / 2 @@ -240,7 +257,9 @@ def reward(self, agent: Agent) -> Tensor: is_first = self.world.agents.index(agent) == 0 if is_first: for a in self.world.agents: - a.sample = self.sample(a.state.pos, update_sampled_flag=True) + a.sample = self.sample( + a.state.pos, update_sampled_flag=True, norm=self.norm + ) self.sampling_rew = torch.stack( [a.sample for a in self.world.agents], dim=-1 ).sum(-1) @@ -337,16 +356,16 @@ def extra_render(self, env_index: int = 0): 0.0 if i % 2 else ( - self.world.x_semidim + self.agent_radius + self.x_semidim + self.agent_radius if i == 0 - else -self.world.x_semidim - self.agent_radius + else -self.x_semidim - self.agent_radius ), 0.0 if not i % 2 else ( - self.world.y_semidim + self.agent_radius + self.y_semidim + self.agent_radius if i == 1 - else -self.world.y_semidim - self.agent_radius + else -self.y_semidim - self.agent_radius ), ) xform.set_rotation(torch.pi / 2 if not i % 2 else 0.0) diff --git a/vmas/scenarios/transport.py b/vmas/scenarios/transport.py index 2c185f6e..cfb563bb 100644 --- a/vmas/scenarios/transport.py +++ b/vmas/scenarios/transport.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -8,7 +8,7 @@ from vmas.simulator.core import Agent, Box, Landmark, Sphere, World from vmas.simulator.heuristic_policy import BaseHeuristicPolicy from vmas.simulator.scenario import BaseScenario -from vmas.simulator.utils import Color +from vmas.simulator.utils import Color, ScenarioUtils class Scenario(BaseScenario): @@ -20,12 +20,25 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.package_mass = kwargs.get("package_mass", 50) self.shaping_factor = 100 + self.world_semidim = 1 + self.agent_radius = 0.03 # Make world - world = World(batch_dim, device) + world = World( + batch_dim, + device, + x_semidim=self.world_semidim + + 2 * self.agent_radius + + max(self.package_length, self.package_width), + y_semidim=self.world_semidim + + 2 * self.agent_radius + + max(self.package_length, self.package_width), + ) # Add agents for i in range(n_agents): - agent = Agent(name=f"agent_{i}", shape=Sphere(0.03), u_multiplier=0.6) + agent = Agent( + name=f"agent_{i}", shape=Sphere(self.agent_radius), u_multiplier=0.6 + ) world.add_agent(agent) # Add landmarks goal = Landmark( @@ -52,35 +65,50 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): return world def reset_world_at(self, env_index: int = None): + # Random pos between -1 and 1 + ScenarioUtils.spawn_entities_randomly( + self.world.agents, + self.world, + env_index, + min_dist_between_entities=self.agent_radius * 2, + x_bounds=( + -self.world_semidim, + self.world_semidim, + ), + y_bounds=( + -self.world_semidim, + self.world_semidim, + ), + ) + agent_occupied_positions = torch.stack( + [agent.state.pos for agent in self.world.agents], dim=1 + ) + if env_index is not None: + agent_occupied_positions = agent_occupied_positions[env_index].unsqueeze(0) + goal = self.world.landmarks[0] - goal.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), - device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, + ScenarioUtils.spawn_entities_randomly( + [goal] + self.packages, + self.world, + env_index, + min_dist_between_entities=max( + package.shape.circumscribed_radius() + goal.shape.radius + 0.01 + for package in self.packages ), - batch_index=env_index, + x_bounds=( + -self.world_semidim, + self.world_semidim, + ), + y_bounds=( + -self.world_semidim, + self.world_semidim, + ), + occupied_positions=agent_occupied_positions, ) + for i, package in enumerate(self.packages): - package.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), - device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, - ), - batch_index=env_index, - ) package.on_goal = self.world.is_overlapping(package, package.goal) + if env_index is None: package.global_shaping = ( torch.linalg.vector_norm( @@ -95,38 +123,6 @@ def reset_world_at(self, env_index: int = None): ) * self.shaping_factor ) - for i, agent in enumerate(self.world.agents): - # Random pos between -1 and 1 - agent.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), - device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, - ), - batch_index=env_index, - ) - for package in self.packages: - while self.world.is_overlapping( - agent, package, env_index=env_index - ).any(): - agent.set_pos( - torch.zeros( - (1, self.world.dim_p) - if env_index is not None - else (self.world.batch_dim, self.world.dim_p), - device=self.world.device, - dtype=torch.float32, - ).uniform_( - -1.0, - 1.0, - ), - batch_index=env_index, - ) def reward(self, agent: Agent): is_first = agent == self.world.agents[0] @@ -343,12 +339,4 @@ def get_action( if __name__ == "__main__": - render_interactively( - __file__, - control_two_agents=True, - n_agents=4, - n_packages=1, - package_width=0.15, - package_length=0.15, - package_mass=50, - ) + render_interactively(__file__, control_two_agents=True) diff --git a/vmas/scenarios/wind_flocking.py b/vmas/scenarios/wind_flocking.py index f67a9f0b..aa301181 100644 --- a/vmas/scenarios/wind_flocking.py +++ b/vmas/scenarios/wind_flocking.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import typing @@ -6,7 +6,6 @@ import torch from torch import Tensor - from vmas import render_interactively from vmas.simulator.core import Agent, World, Sphere from vmas.simulator.scenario import BaseScenario @@ -63,12 +62,17 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.rot_shaping_factor = kwargs.get("rot_shaping_factor", 0) self.energy_shaping_factor = kwargs.get("energy_shaping_factor", 0) + self.observe_rel_pos = kwargs.get("observe_rel_pos", False) + self.observe_rel_vel = kwargs.get("observe_rel_vel", False) + self.observe_pos = kwargs.get("observe_pos", True) + # Controller self.use_controller = kwargs.get("use_controller", True) self.wind = torch.tensor( [0, -kwargs.get("wind", 2)], device=device, dtype=torch.float32 ).expand(batch_dim, 2) self.v_range = kwargs.get("v_range", 0.5) + self.desired_vel = kwargs.get("desired_vel", self.v_range) self.f_range = kwargs.get("f_range", 100) controller_params = [1.5, 0.6, 0.002] @@ -85,7 +89,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): world = World(batch_dim, device, drag=0, linear_friction=0.1) self.desired_vel = torch.tensor( - [0.0, self.v_range], device=device, dtype=torch.float32 + [0.0, self.desired_vel], device=device, dtype=torch.float32 ) self.max_pos = (self.horizon * world.dt) * self.desired_vel[Y] self.desired_pos = 10.0 @@ -358,10 +362,19 @@ def set_friction(self): self.big_agent.gravity = self.wind * dist_to_goal_angle def observation(self, agent: Agent): - observations = [ - agent.state.pos, - agent.state.vel, - ] + observations = [] + if self.observe_pos: + observations.append(agent.state.pos) + observations.append(agent.state.vel) + if self.observe_rel_pos: + for a in self.world.agents: + if a != agent: + observations.append(a.state.pos - agent.state.pos) + if self.observe_rel_vel: + for a in self.world.agents: + if a != agent: + observations.append(a.state.vel - agent.state.vel) + return torch.cat( observations, dim=-1, diff --git a/vmas/simulator/core.py b/vmas/simulator/core.py index ce218f95..0bacebfd 100644 --- a/vmas/simulator/core.py +++ b/vmas/simulator/core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. @@ -7,10 +7,14 @@ import math import typing from abc import ABC, abstractmethod -from typing import Callable, List, Tuple, Dict + +from typing import Callable, List, Tuple, Union, Sequence, Dict import torch from torch import Tensor + +from vmas.simulator.dynamics.common import Dynamics +from vmas.simulator.dynamics.holonomic import Holonomic from vmas.simulator.joints import Joint from vmas.simulator.physics import ( _get_closest_point_line, @@ -286,7 +290,7 @@ def _reset(self, env_index: typing.Optional[int]): else: attr[env_index] = 0.0 - def _spawn(self, dim_c: int, dim_p: int, dim_capability: int): + def _spawn(self, dim_c: int, dim_p: int, dim_capability: int=None): self.pos = torch.zeros( self.batch_dim, dim_p, device=self.device, dtype=torch.float32 ) @@ -308,9 +312,16 @@ def __init__( super().__init__() # communication utterance self._c = None + + # Agent force from actions + self._force = None + # Agent torque from actions + self._torque = None + # capability state self._capability = None + @property def c(self): return self._c @@ -334,14 +345,44 @@ def capability(self, capability: Tensor): assert ( self._batch_dim is not None and self._device is not None ), "First add an entity to the world before settings its capability state" - assert ( capability.shape[0] == self._batch_dim ), f"Internal state must match batch dim, got {capability.shape[0]}, expected {self._batch_dim}" self._capability = capability + + @property + def force(self): + return self._force + + @force.setter + def force(self, value): + assert ( + self._batch_dim is not None and self._device is not None + ), "First add an entity to the world before setting its state" + assert ( + value.shape[0] == self._batch_dim + ), f"Internal state must match batch dim, got {value.shape[0]}, expected {self._batch_dim}" + + self._force = value.to(self._device) + + @property + def torque(self): + return self._torque + + @torque.setter + def torque(self, value): + assert ( + self._batch_dim is not None and self._device is not None + ), "First add an entity to the world before setting its state" + assert ( + value.shape[0] == self._batch_dim + ), f"Internal state must match batch dim, got {value.shape[0]}, expected {self._batch_dim}" + + self._torque = value.to(self._device) + @override(EntityState) def _reset(self, env_index: typing.Optional[int]): - for attr in [self.c, self.capability]: + for attr in [self.c, self.force, self.torque, self.capability]: if attr is not None: if env_index is None: attr[:] = 0.0 @@ -356,23 +397,22 @@ def _spawn(self, dim_c: int, dim_p: int, dim_capability: int): self.batch_dim, dim_c, device=self.device, dtype=torch.float32 ) - if dim_capability > 0: - self.capability = torch.zeros( - self.batch_dim, dim_capability, device=self.device, dtype=torch.float - ) - super()._spawn(dim_c, dim_p, dim_capability) - + self.force = torch.zeros( + self.batch_dim, dim_p, device=self.device, dtype=torch.float32 + ) + self.torque = torch.zeros( + self.batch_dim, 1, device=self.device, dtype=torch.float32 + ) + super()._spawn(dim_c, dim_p) # action of an agent class Action(TorchVectorizedObject): def __init__( self, - u_range: float, - u_multiplier: float, - u_noise: float, - u_rot_range: float, - u_rot_multiplier: float, - u_rot_noise: float, + u_range: Union[float, Sequence[float]], + u_multiplier: Union[float, Sequence[float]], + u_noise: Union[float, Sequence[float]], + action_size: int, ): super().__init__() # physical motor noise amount @@ -381,21 +421,28 @@ def __init__( self._u_range = u_range # agent action is a force multiplied by this amount self._u_multiplier = u_multiplier - - # physical motor noise amount - self._u_rot_noise = u_rot_noise - # control range - self._u_rot_range = u_rot_range - # agent action is a force multiplied by this amount - self._u_rot_multiplier = u_rot_multiplier + # Number of actions + self.action_size = action_size # physical action self._u = None - # rotation action - self._u_rot = None # communication_action self._c = None + self._u_range_tensor = None + self._u_multiplier_tensor = None + self._u_noise_tensor = None + + self._check_action_init() + + def _check_action_init(self): + for attr in (self.u_multiplier, self.u_range, self.u_noise): + if isinstance(attr, List): + assert len(attr) == self.action_size, ( + f"Action attributes u_... must be either a float or a list of floats" + f" (one per action) all with same length" + ) + @property def u(self): return self._u @@ -411,21 +458,6 @@ def u(self, u: Tensor): self._u = u.to(self._device) - @property - def u_rot(self): - return self._u_rot - - @u_rot.setter - def u_rot(self, u_rot: Tensor): - assert ( - self._batch_dim is not None and self._device is not None - ), "First add an agent to the world before setting its action" - assert ( - u_rot.shape[0] == self._batch_dim - ), f"Action must match batch dim, got {u_rot.shape[0]}, expected {self._batch_dim}" - - self._u_rot = u_rot.to(self._device) - @property def c(self): return self._c @@ -459,19 +491,31 @@ def u_noise(self): return self._u_noise @property - def u_rot_range(self): - return self._u_rot_range + def u_range_tensor(self): + if self._u_range_tensor is None: + self._u_range_tensor = self._to_tensor(self.u_range) + return self._u_range_tensor @property - def u_rot_multiplier(self): - return self._u_rot_multiplier + def u_multiplier_tensor(self): + if self._u_multiplier_tensor is None: + self._u_multiplier_tensor = self._to_tensor(self.u_multiplier) + return self._u_multiplier_tensor @property - def u_rot_noise(self): - return self._u_rot_noise + def u_noise_tensor(self): + if self._u_noise_tensor is None: + self._u_noise_tensor = self._to_tensor(self.u_noise) + return self._u_noise_tensor + + def _to_tensor(self, value): + return torch.tensor( + value if isinstance(value, Sequence) else [value] * self.action_size, + device=self.device, + ) def _reset(self, env_index: typing.Optional[int]): - for attr in [self.u, self.u_rot, self.c]: + for attr in [self.u, self.c]: if attr is not None: if env_index is None: attr[:] = 0.0 @@ -799,15 +843,12 @@ def __init__( alpha: float = 0.5, obs_range: float = None, obs_noise: float = None, - u_noise: float = None, - u_range: float = 1.0, - u_multiplier: float = 1.0, - u_rot_noise: float = None, - u_rot_range: float = 0.0, - u_rot_multiplier: float = 1.0, + u_noise: Union[float, Sequence[float]] = 0.0, + u_range: Union[float, Sequence[float]] = 1.0, + u_multiplier: Union[float, Sequence[float]] = 1.0, action_script: Callable[[Agent, World], None] = None, sensors: List[Sensor] = None, - c_noise: float = None, + c_noise: float = 0.0, silent: bool = True, capability_aware: bool = False, adversary: bool = False, @@ -817,6 +858,8 @@ def __init__( gravity: float = None, collision_filter: Callable[[Entity], bool] = lambda _: True, render_action: bool = False, + dynamics: Dynamics = None, # Defaults to holonomic + action_size: int = None, ): super().__init__( name, @@ -868,15 +911,20 @@ def __init__( # Render alpha self._alpha = alpha - # action + # Dynamics + self.dynamics = dynamics if dynamics is not None else Holonomic() + # Action + self.action_size = ( + action_size if action_size is not None else self.dynamics.needed_action_size + ) + self.dynamics.agent = self self._action = Action( u_range=u_range, u_multiplier=u_multiplier, u_noise=u_noise, - u_rot_range=u_rot_range, - u_rot_multiplier=u_rot_multiplier, - u_rot_noise=u_rot_noise, + action_size=self.action_size, ) + # state self._state = AgentState() @@ -905,29 +953,16 @@ def action_callback(self, world: World): assert ( self._action.u.shape[1] == world.dim_p ), f"Scripted physical action of agent {self.name} has wrong shape" + assert ( - (self._action.u / self.u_multiplier).abs() <= self.u_range + (self._action.u / self.action.u_multiplier_tensor).abs() + <= self.action.u_range_tensor ).all(), f"Scripted physical action of {self.name} is out of range" - if self.u_rot_range != 0: - assert ( - self._action.u_rot is not None - ), f"Action script of {self.name} should set u_rot action" - assert ( - self._action.u_rot.shape[1] == 1 - ), f"Scripted physical rotation action of agent {self.name} has wrong shape" - assert ( - (self._action.u_rot / self._action.u_rot_multiplier).abs() - <= self.u_rot_range - ).all(), f"Scripted physical rotation action of {self.name} is out of range" @property def u_range(self): return self.action.u_range - @property - def u_rot_range(self): - return self.action.u_rot_range - @property def obs_noise(self): return self._obs_noise if self._obs_noise is not None else 0 @@ -940,10 +975,6 @@ def action(self) -> Action: def u_multiplier(self): return self.action.u_multiplier - @property - def u_rot_multiplier(self): - return self.action.u_rot_multiplier - @property def max_f(self): return self._max_f @@ -1014,6 +1045,7 @@ def _spawn(self, dim_c: int, dim_p: int, dim_capability: int = None): @override(Entity) def _reset(self, env_index: int): self.action._reset(env_index) + self.dynamics.reset(env_index) super()._reset(env_index) @override(Entity) @@ -1035,11 +1067,11 @@ def render(self, env_index: int = 0) -> "List[Geom]": if self._sensors is not None: for sensor in self._sensors: geoms += sensor.render(env_index=env_index) - if self._render_action and self.action.u is not None: + if self._render_action and self.state.force is not None: velocity = rendering.Line( self.state.pos[env_index], self.state.pos[env_index] - + self.action.u[env_index] * 10 * self.shape.circumscribed_radius(), + + self.state.force[env_index] * 10 * self.shape.circumscribed_radius(), width=2, ) velocity.set_color(*self.color) @@ -1568,10 +1600,11 @@ def step(self): self.torque[:] = 0 for i, entity in enumerate(self.entities): - # apply agent force controls - self._apply_action_force(entity, i) - # apply agent torque controls - self._apply_action_torque(entity, i) + if isinstance(entity, Agent): + # apply agent force controls + self._apply_action_force(entity, i) + # apply agent torque controls + self._apply_action_torque(entity, i) # apply friction self._apply_friction_force(entity, i) # apply gravity @@ -1591,57 +1624,30 @@ def step(self): self._update_comm_state(agent) # gather agent action forces - def _apply_action_force(self, entity: Entity, index: int): - if isinstance(entity, Agent): - # set applied forces - if entity.movable: - noise = ( - torch.randn( - *entity.action.u.shape, device=self.device, dtype=torch.float32 - ) - * entity.u_noise - if entity.u_noise - else 0.0 + def _apply_action_force(self, agent: Agent, index: int): + if agent.movable: + if agent.max_f is not None: + agent.state.force = TorchUtils.clamp_with_norm( + agent.state.force, agent.max_f ) - entity.action.u += noise - if entity.max_f is not None: - entity.action.u = TorchUtils.clamp_with_norm( - entity.action.u, entity.max_f - ) - if entity.f_range is not None: - entity.action.u = torch.clamp( - entity.action.u, -entity.f_range, entity.f_range - ) - self.force[:, index] += entity.action.u - assert not self.force.isnan().any() - - def _apply_action_torque(self, entity: Entity, index: int): - if isinstance(entity, Agent) and entity.u_rot_range != 0: - # set applied forces - if entity.rotatable: - noise = ( - torch.randn( - *entity.action.u_rot.shape, - device=self.device, - dtype=torch.float32, - ) - * entity.action.u_rot_noise - if entity.action.u_rot_noise - else 0.0 + if agent.f_range is not None: + agent.state.force = torch.clamp( + agent.state.force, -agent.f_range, agent.f_range ) - entity.action.u_rot = entity.action.u_rot + noise - if len(entity.action.u_rot.shape) == 1: - entity.action.u_rot.unsqueeze_(-1) - if entity.max_t is not None: - entity.action.u_rot = TorchUtils.clamp_with_norm( - entity.action.u_rot, entity.max_t - ) - if entity.t_range is not None: - entity.action.u_rot = torch.clamp( - entity.action.u_rot, -entity.t_range, entity.t_range - ) - self.torque[:, index] += entity.action.u_rot - assert not self.torque.isnan().any() + self.force[:, index] += agent.state.force + + def _apply_action_torque(self, agent: Agent, index: int): + if agent.rotatable: + if agent.max_t is not None: + agent.state.torque = TorchUtils.clamp_with_norm( + agent.state.torque, agent.max_t + ) + if agent.t_range is not None: + agent.state.torque = torch.clamp( + agent.state.torque, -agent.t_range, agent.t_range + ) + + self.torque[:, index] += agent.state.torque def _apply_gravity(self, entity: Entity, index: int): if entity.movable: @@ -2452,15 +2458,7 @@ def _integrate_state(self, entity: Entity, index: int, substep: int): def _update_comm_state(self, agent): # set communication state (directly for now) if not agent.silent: - noise = ( - torch.randn( - *agent.action.c.shape, device=self.device, dtype=torch.float32 - ) - * agent.c_noise - if agent.c_noise - else 0.0 - ) - agent.state.c = agent.action.c + noise + agent.state.c = agent.action.c @override(TorchVectorizedObject) def to(self, device: torch.device): diff --git a/vmas/simulator/dynamics/common.py b/vmas/simulator/dynamics/common.py new file mode 100644 index 00000000..05a5c086 --- /dev/null +++ b/vmas/simulator/dynamics/common.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024. +# ProrokLab (https://www.proroklab.org/) +# All rights reserved. +import abc +from abc import ABC +from typing import Union + +from torch import Tensor + + +class Dynamics(ABC): + def __init__( + self, + ): + self._agent = None + + def reset(self, index: Union[Tensor, int] = None): + pass + + @property + def agent(self): + if self._agent is None: + raise ValueError( + "You need to add the dynamics to an agent during construction before accessing its properties" + ) + return self._agent + + @agent.setter + def agent(self, value): + if self._agent is not None: + raise ValueError("Agent in dynamics has already been set") + if value.action_size < self.needed_action_size: + raise ValueError( + f"Agent action size {value.action_size} is less than the required dynamics action size {self.needed_action_size}" + ) + self._agent = value + + @property + @abc.abstractmethod + def needed_action_size(self) -> int: + raise NotImplementedError + + @abc.abstractmethod + def process_action(self): + raise NotImplementedError diff --git a/vmas/simulator/dynamics/diff_drive.py b/vmas/simulator/dynamics/diff_drive.py index 83bed685..ebf556d0 100644 --- a/vmas/simulator/dynamics/diff_drive.py +++ b/vmas/simulator/dynamics/diff_drive.py @@ -1,34 +1,27 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import math -from typing import Union import torch -from torch import Tensor + import vmas.simulator.core import vmas.simulator.utils +from vmas.simulator.dynamics.common import Dynamics -class DiffDriveDynamics: +class DiffDrive(Dynamics): def __init__( self, - agent: vmas.simulator.core.Agent, world: vmas.simulator.core.World, integration: str = "rk4", # one of "euler", "rk4" ): + super().__init__() assert integration == "rk4" or integration == "euler" - assert ( - agent.action.u_rot_range != 0 - ), "Agent with diff drive dynamics needs non zero u_rot_range" - self.agent = agent - self.world = world self.dt = world.dt self.integration = integration - - def reset(self, index: Union[Tensor, int] = None): - pass + self.world = world def euler(self, f, rot): return f(rot) @@ -41,9 +34,13 @@ def runge_kutta(self, f, rot): return (1 / 6) * (k1 + 2 * k2 + 2 * k3 + k4) - def process_force(self): - u_forward = self.agent.action.u[:, vmas.simulator.utils.X] - u_rot = self.agent.action.u_rot.squeeze(-1) + @property + def needed_action_size(self) -> int: + return 2 + + def process_action(self): + u_forward = self.agent.action.u[:, 0] + u_rot = self.agent.action.u[:, 1] def f(rot): return torch.stack( @@ -55,5 +52,6 @@ def f(rot): else: u = self.runge_kutta(f, self.agent.state.rot.squeeze(-1)) - self.agent.action.u[:, vmas.simulator.utils.X] = u[vmas.simulator.utils.X] - self.agent.action.u[:, vmas.simulator.utils.Y] = u[vmas.simulator.utils.Y] + self.agent.state.force[:, vmas.simulator.utils.X] = u[vmas.simulator.utils.X] + self.agent.state.force[:, vmas.simulator.utils.Y] = u[vmas.simulator.utils.Y] + self.agent.state.torque = u_rot.unsqueeze(-1) diff --git a/vmas/simulator/dynamics/holonomic.py b/vmas/simulator/dynamics/holonomic.py new file mode 100644 index 00000000..16ddabe2 --- /dev/null +++ b/vmas/simulator/dynamics/holonomic.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022-2024. +# ProrokLab (https://www.proroklab.org/) +# All rights reserved. + +from vmas.simulator.dynamics.common import Dynamics + + +class Holonomic(Dynamics): + @property + def needed_action_size(self) -> int: + return 2 + + def process_action(self): + self.agent.state.force = self.agent.action.u[:, : self.needed_action_size] diff --git a/vmas/simulator/dynamics/holonomic_with_rot.py b/vmas/simulator/dynamics/holonomic_with_rot.py new file mode 100644 index 00000000..5d41d0e1 --- /dev/null +++ b/vmas/simulator/dynamics/holonomic_with_rot.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2024. +# ProrokLab (https://www.proroklab.org/) +# All rights reserved. + +from vmas.simulator.dynamics.common import Dynamics + + +class HolonomicWithRotation(Dynamics): + @property + def needed_action_size(self) -> int: + return 3 + + def process_action(self): + self.agent.state.force = self.agent.action.u[:, :2] + self.agent.state.torque = self.agent.action.u[:, 2].unsqueeze(-1) diff --git a/vmas/simulator/dynamics/kinematic_bicycle.py b/vmas/simulator/dynamics/kinematic_bicycle.py index 5514d0d3..244f11fe 100644 --- a/vmas/simulator/dynamics/kinematic_bicycle.py +++ b/vmas/simulator/dynamics/kinematic_bicycle.py @@ -1,21 +1,20 @@ -# Copyright (c) 2023. +# Copyright (c) 2023-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. from typing import Union import torch -from torch import Tensor import vmas.simulator.core import vmas.simulator.utils +from vmas.simulator.dynamics.common import Dynamics -class KinematicBicycleDynamics: +class KinematicBicycle(Dynamics): # For the implementation of the kinematic bicycle model, see the equation (2) of the paper Polack, Philip, et al. "The kinematic bicycle model: A consistent model for planning feasible trajectories for autonomous vehicles?." 2017 IEEE intelligent vehicles symposium (IV). IEEE, 2017. def __init__( self, - agent: vmas.simulator.core.Agent, world: vmas.simulator.core.World, width: float, l_f: float, @@ -23,21 +22,18 @@ def __init__( max_steering_angle: float, integration: str = "euler", # one of "euler", "rk4" ): + super().__init__() assert integration in ( "rk4", "euler", ), "Integration method must be 'euler' or 'rk4'." - self.agent = agent - self.world = world self.width = width self.l_f = l_f # Distance between the front axle and the center of gravity self.l_r = l_r # Distance between the rear axle and the center of gravity self.max_steering_angle = max_steering_angle self.dt = world.dt self.integration = integration - - def reset(self, index: Union[Tensor, int] = None): - pass + self.world = world def euler(self, f, state): # Update the state using Euler's method @@ -53,10 +49,15 @@ def runge_kutta(self, f, state): k4 = f(state + self.dt * k3) return state + (self.dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4) - def process_force(self): + @property + def needed_action_size(self) -> int: + return 2 + + def process_action(self): # Extracts the velocity and steering angle from the agent's actions and convert them to physical force and torque - velocity = self.agent.action.u[:, vmas.simulator.utils.X] - steering_angle = self.agent.action.u_rot.squeeze(-1) + velocity = self.agent.action.u[:, 0] + steering_angle = self.agent.action.u[:, 1] + # Ensure steering angle is within bounds steering_angle = torch.clamp( steering_angle, -self.max_steering_angle, self.max_steering_angle @@ -99,6 +100,6 @@ def f(state): torque = self.agent.moment_of_inertia * angular_acceleration # Update the physical force and torque required for the user inputs - self.agent.action.u[:, vmas.simulator.utils.X] = force_x - self.agent.action.u[:, vmas.simulator.utils.Y] = force_y - self.agent.action.u_rot[:, 0] = torque + self.agent.state.force[:, vmas.simulator.utils.X] = force_x + self.agent.state.force[:, vmas.simulator.utils.Y] = force_y + self.agent.state.torque = torque.unsqueeze(-1) diff --git a/vmas/simulator/dynamics/waypoint_tracker.py b/vmas/simulator/dynamics/waypoint_tracker.py new file mode 100644 index 00000000..13211dcb --- /dev/null +++ b/vmas/simulator/dynamics/waypoint_tracker.py @@ -0,0 +1,71 @@ +""" +Implements a waypoint tracker, assuming a DifferentialDrive robot. + +Waypoints are given as relative position and heading, or u=(dx, dy, dtheta). +""" + +import math + +import torch + +import vmas.simulator.core +import vmas.simulator.utils +from vmas.simulator.dynamics.common import Dynamics +from vmas.simulator.dynamics.diff_drive import DiffDrive + + +class WaypointTracker(Dynamics): + def __init__( + self, + world: vmas.simulator.core.World, + speed=1e-1, + ): + super().__init__() + + self.dt = world.dt + self.world = world + self.speed = speed + + @property + def needed_action_size(self) -> int: + return 2 + + def process_action(self): + # TODO: add input queue, see VelocityController + + # convert relative position (dx, dy) to (forward velocity, rotational velocity) + # TODO: verify FLU + dx = self.agent.action.u[:, 0] + dy = self.agent.action.u[:, 1] + + # round dx/dy to zero if near 0 + dx = torch.where(torch.abs(dx) < 1e-3, 0, dx) + dy = torch.where(torch.abs(dy) < 1e-3, 0, dy) + + # when x is near 0, set angular_velocity to either -pi/2 or pi/2, depending on sign of y + # otherwise, set it to clamp(arctan(dy/dx) / dt), clamped to +/- pi + angular_velocity = torch.where(torch.abs(dx) < 1e-2, + torch.sign(dy) * torch.pi/2, + torch.clamp(torch.arctan(dy/dx) / self.dt, -torch.pi, torch.pi)) + + forward_velocity = dx / self.dt + # print("vel inputs", forward_velocity, angular_velocity) + + robot_angle = self.agent.state.rot + linear_accel = torch.cat([forward_velocity * torch.cos(robot_angle), forward_velocity * torch.sin(robot_angle)], dim=0) / self.dt + angular_accel = angular_velocity / self.dt + # print("accel", linear_accel, angular_accel) + + # print(self.agent.mass, self.agent.moment_of_inertia) + linear_force = self.agent.mass * linear_accel + angular_force = self.agent.moment_of_inertia * angular_accel + print("forces", linear_force, angular_force) + + force_factor = self.speed + self.agent.state.force[:, 0] = linear_force[0] * force_factor + self.agent.state.force[:, 1] = linear_force[1] * force_factor + self.agent.state.torque = angular_force.unsqueeze(-1) * force_factor + + + + diff --git a/vmas/simulator/environment/environment.py b/vmas/simulator/environment/environment.py index d9268ae8..436328f1 100644 --- a/vmas/simulator/environment/environment.py +++ b/vmas/simulator/environment/environment.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import random @@ -41,8 +41,15 @@ def __init__( continuous_actions: bool = True, seed: Optional[int] = None, dict_spaces: bool = False, + multidiscrete_actions: bool = False, + clamp_actions: bool = False, **kwargs, ): + if multidiscrete_actions: + assert ( + not continuous_actions + ), "When asking for multidiscrete_actions, make sure continuous_actions=False" + self.scenario = scenario self.num_envs = num_envs TorchVectorizedObject.__init__(self, num_envs, torch.device(device)) @@ -53,10 +60,12 @@ def __init__( self.max_steps = max_steps self.continuous_actions = continuous_actions self.dict_spaces = dict_spaces + self.clamp_action = clamp_actions self.reset(seed=seed) # configure spaces + self.multidiscrete_actions = multidiscrete_actions self.action_space = self.get_action_space() self.observation_space = self.get_observation_space() @@ -291,48 +300,47 @@ def get_observation_space(self): ) def get_agent_action_size(self, agent: Agent): - return ( - self.world.dim_p - + (1 if agent.action.u_rot_range != 0 else 0) - + (self.world.dim_c if not agent.silent else 0) - if self.continuous_actions - else 1 - + (1 if agent.action.u_rot_range != 0 else 0) - + (1 if not agent.silent else 0) - ) + if self.continuous_actions: + return agent.action.action_size + ( + self.world.dim_c if not agent.silent else 0 + ) + elif self.multidiscrete_actions: + return agent.action_size + ( + 1 if not agent.silent and self.world.dim_c != 0 else 0 + ) + else: + return 1 def get_agent_action_space(self, agent: Agent): if self.continuous_actions: return spaces.Box( low=np.array( - [-agent.u_range] * self.world.dim_p - + [-agent.u_rot_range] * (1 if agent.u_rot_range != 0 else 0) + (-agent.action.u_range_tensor).tolist() + [0] * (self.world.dim_c if not agent.silent else 0), dtype=np.float32, ), high=np.array( - [agent.u_range] * self.world.dim_p - + [agent.u_rot_range] * (1 if agent.u_rot_range != 0 else 0) + agent.action.u_range_tensor.tolist() + [1] * (self.world.dim_c if not agent.silent else 0), dtype=np.float32, ), shape=(self.get_agent_action_size(agent),), dtype=np.float32, ) + elif self.multidiscrete_actions: + actions = [3] * agent.action_size + ( + [self.world.dim_c] if not agent.silent and self.world.dim_c != 0 else [] + ) + return spaces.MultiDiscrete(actions) else: - if (self.world.dim_c == 0 or agent.silent) and agent.u_rot_range == 0.0: - return spaces.Discrete(self.world.dim_p * 2 + 1) - else: - actions = ( - [self.world.dim_p * 2 + 1] - + ([3] if agent.u_rot_range != 0 else []) - + ( - [self.world.dim_c] - if not agent.silent and self.world.dim_c != 0 - else [] - ) + return spaces.Discrete( + 3**agent.action_size + * ( + self.world.dim_c + if not agent.silent and self.world.dim_c != 0 + else 1 ) - return spaces.MultiDiscrete(actions) + ) def get_agent_observation_space(self, agent: Agent, obs: AGENT_OBS_TYPE): if isinstance(obs, Tensor): @@ -365,82 +373,81 @@ def _set_action(self, action, agent): action = action.clone().detach().to(self.device) assert not action.isnan().any() agent.action.u = torch.zeros( - self.batch_dim, self.world.dim_p, device=self.device, dtype=torch.float32 + self.batch_dim, agent.action_size, device=self.device, dtype=torch.float32 ) assert action.shape[1] == self.get_agent_action_size(agent), ( f"Agent {agent.name} has wrong action size, got {action.shape[1]}, " f"expected {self.get_agent_action_size(agent)}" ) + if self.clamp_action and self.continuous_actions: + action = action.clamp(-agent.action.u_range, agent.action.u_range) + action_index = 0 if self.continuous_actions: - physical_action = action[:, action_index : action_index + self.world.dim_p] + physical_action = action[:, action_index : action_index + agent.action_size] action_index += self.world.dim_p - #TODO: FIX ME, I remove this because it was failing for heterogeneous agents - # the handling of different actions should be done before this statement. - # Note: actions are loaded before preprocessed, so putting in the "process_actions" - # doesn't work - - # assert not torch.any( - # torch.abs(physical_action) > agent.u_range - # ), f"Physical actions of agent {agent.name} are out of its range {agent.u_range}" - - agent.action.u = physical_action.to(torch.float32) - else: - physical_action = action[:, action_index].unsqueeze(-1) - action_index += 1 - self._check_discrete_action( - physical_action, - low=0, - high=self.world.dim_p * 2 + 1, - type="physical", - ) - arr1 = physical_action == 1 - arr2 = physical_action == 2 - arr3 = physical_action == 3 - arr4 = physical_action == 4 + assert not torch.any( + torch.abs(physical_action) > agent.action.u_range_tensor + ), f"Physical actions of agent {agent.name} are out of its range {agent.u_range}" - disc_action_value = agent.u_range - agent.action.u[:, X] -= disc_action_value * arr1.squeeze(-1) - agent.action.u[:, X] += disc_action_value * arr2.squeeze(-1) - agent.action.u[:, Y] -= disc_action_value * arr3.squeeze(-1) - agent.action.u[:, Y] += disc_action_value * arr4.squeeze(-1) - - agent.action.u *= agent.u_multiplier - if agent.u_rot_range != 0: - if self.continuous_actions: - physical_action = action[:, action_index].unsqueeze(-1) - action_index += 1 - assert not torch.any( - torch.abs(physical_action) > agent.u_rot_range - ), f"Physical rotation actions of agent {agent.name} are out of its range {agent.u_rot_range}" - agent.action.u_rot = physical_action.to(torch.float32) + agent.action.u = physical_action.to(torch.float32) - else: - agent.action.u_rot = torch.zeros( - self.batch_dim, 1, device=self.device, dtype=torch.float32 + else: + if not self.multidiscrete_actions: + # This bit of code translates the discrete action (taken from a space that + # is the cartesian product of all action spaces) into a multi discrete action. + # For example, if agent.action_size=4, it will mean that the agent will have + # 4 actions each with 3 possibilities (stay, decrement, increment). + # The env will have a space Discrete(3**4). + # This code will translate the action (with shape [n_envs,1] and range [0,3**4)) to an + # action with shape [n_envs,4] and range [0,3). + n_actions = self.get_agent_action_space(agent).n + action_range = torch.arange(n_actions, device=self.device).expand( + self.world.batch_dim, n_actions ) + physical_action = action + action_range = torch.where(action_range == physical_action, 1.0, 0.0) + action_range = action_range.view( + (self.world.batch_dim,) + + (3,) * agent.action_size + + (self.world.dim_c,) + * (1 if not agent.silent and self.world.dim_c != 0 else 0) + ) + action = action_range.nonzero()[:, 1:] + + # Now we have an action with shape [n_envs, action_size+comms_actions] + for _ in range(agent.action_size): physical_action = action[:, action_index].unsqueeze(-1) - action_index += 1 self._check_discrete_action( physical_action, low=0, high=3, - type="rotation", + type="physical", ) arr1 = physical_action == 1 arr2 = physical_action == 2 - disc_action_value = agent.u_rot_range + disc_action_value = agent.action.u_range_tensor[action_index] + agent.action.u[:, action_index] -= disc_action_value * arr1.squeeze(-1) + agent.action.u[:, action_index] += disc_action_value * arr2.squeeze(-1) + + action_index += 1 - agent.action.u_rot[:, 0] -= disc_action_value * arr1.squeeze(-1) - agent.action.u_rot[:, 0] += disc_action_value * arr2.squeeze(-1) + agent.action.u *= agent.action.u_multiplier_tensor - agent.action.u_rot *= agent.u_rot_multiplier + if agent.action.u_noise > 0: + noise = ( + torch.randn( + *agent.action.u.shape, device=self.device, dtype=torch.float32 + ) + * agent.u_noise + ) + agent.action.u += noise if self.world.dim_c > 0 and not agent.silent: if not self.continuous_actions: comm_action = action[:, action_index:] @@ -462,6 +469,14 @@ def _set_action(self, action, agent): comm_action < 0 ), "Comm actions are out of range [0,1]" agent.action.c = comm_action + if agent.c_noise > 0: + noise = ( + torch.randn( + *agent.action.c.shape, device=self.device, dtype=torch.float32 + ) + * agent.c_noise + ) + agent.action.c += noise def render( self, diff --git a/vmas/simulator/scenario.py b/vmas/simulator/scenario.py index f46c0ede..c6261138 100644 --- a/vmas/simulator/scenario.py +++ b/vmas/simulator/scenario.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023. +# Copyright (c) 2022-2024. # ProrokLab (https://www.proroklab.org/) # All rights reserved. import typing @@ -63,7 +63,9 @@ def env_process_action(self, agent: Agent): """Do not override""" if agent.action_script is not None: agent.action_callback(self.world) + # Customizable action processor self.process_action(agent) + agent.dynamics.process_action() @abstractmethod def make_world(self, batch_dim: int, device: torch.device, **kwargs) -> World: