diff --git a/mujoco_controllers/build_env.py b/mujoco_controllers/build_env.py index e90ff09..a63c374 100644 --- a/mujoco_controllers/build_env.py +++ b/mujoco_controllers/build_env.py @@ -20,9 +20,8 @@ def build_arena(name: str) -> composer.Arena: """Build a MuJoCo arena.""" arena = empty.Arena(name=name) - arena.mjcf_model.option.timestep = 0.0001 + arena.mjcf_model.option.timestep = 0.001 arena.mjcf_model.option.gravity = (0.0, 0.0, -9.8) - arena.mjcf_model.option.timestep = 0.0001 arena.mjcf_model.option.noslip_iterations = 3 arena.mjcf_model.size.nconmax = 1000 arena.mjcf_model.size.njmax = 2000 diff --git a/mujoco_controllers/config/controllers/osc.yaml b/mujoco_controllers/config/controllers/osc.yaml index 5e3320c..6793f18 100644 --- a/mujoco_controllers/config/controllers/osc.yaml +++ b/mujoco_controllers/config/controllers/osc.yaml @@ -5,10 +5,17 @@ osc: kp: 200.0 kd: 0 # calculate from damping ratio orientation: + damping_ratio: 0.1 + kp: 1000.0 + kd: 0 # calculate from damping ratio + nullspace: damping_ratio: 1.0 - kp: 200.0 + kp: [100, 100, 100, 100, 100, 100, 100] kd: 0 # calculate from damping ratio + + nullspace: + joint_config: [0, -0.785, 0, -2.356, 0, 1.571, 0.785] convergence: position_threshold: 1e-3 - orientation_threshold: 1e-2 + orientation_threshold: 1e-3 diff --git a/mujoco_controllers/genetic_tuner.py b/mujoco_controllers/genetic_tuner.py index e99e1bf..60b2bf8 100644 --- a/mujoco_controllers/genetic_tuner.py +++ b/mujoco_controllers/genetic_tuner.py @@ -1 +1,116 @@ """Tuning control params with genetic algorithm.""" + +from typing import Tuple, Dict, Optional, Union, List + +import numpy as np +from scipy.spatial.transform import Rotation as R +import pandas as pd + +import mujoco +from mujoco import viewer + +from dm_control import composer, mjcf +from dm_robotics.moma.models.arenas import empty +from dm_robotics.moma import robot +from dm_robotics.transformations.transformations import mat_to_quat, quat_to_mat, quat_to_euler +from dm_robotics.transformations import transformations as tr + +from mujoco_controllers.build_env import construct_physics +from mujoco_controllers.osc import OSC + +from rearrangement_benchmark.env_components.props import Rectangle +from ikpy.chain import Chain +from hydra import compose, initialize +from hydra.utils import instantiate + +import numpy as np +import jax +from evosax import CMA_ES + + +if __name__=="__main__": + # save default configs + ready_config = np.array([0, -0.785, 0, -2.356, 0, 1.571, 0.785]) + grasp_pose_config = np.array([-3.95380744e-04, 2.37985323e-01, 3.52180384e-04, -2.55912981e+00, + -2.42755642e-04, 2.79711454e+00, 7.85573570e-01]) + + # load different robot configurations + initialize(version_base=None, config_path="./config", job_name="default_config") + POSITION_CONFIG = compose(config_name="controller_tuning", overrides=["robots=default"]) + VELOCITY_CONFIG = compose(config_name="controller_tuning", overrides=["robots=velocity"]) + MOTOR_CONFIG = compose(config_name="controller_tuning", overrides=["robots=motor"]) + IKPY_URDF_PATH = "./models/arms/robot.urdf" + + # for now assign default cfg + cfg = MOTOR_CONFIG + kinematic_chain = Chain.from_urdf_file(IKPY_URDF_PATH, base_elements=["panda_link0"]) + + physics, passive_view, arm, gripper = construct_physics(cfg) + + # define controller + osc = OSC(physics, arm, MOTOR_CONFIG["controllers"]["osc"]) + + # define GA for tuning params + + def fitness_fn(params, controller): + """ + Evaluate time to reach target and convergence. + + For now the target is fixed but in future test more diverse targets. + """ + # overwrite control params + controller.controller_gains["position"]["kp"] = float(params.at[0].get()) + controller.controller_gains["orientation"]["kp"] = float(params.at[2].get()) + #controller.controller_gains["position"]["damping_ratio"] = float(params.at[1].get()) + #controller.controller_gains["orientation"]["damping_ratio"] = float(params.at[3].get()) + + # reset the simulation + controller.physics.reset() + controller.physics.data.qpos[:7] = np.array(cfg.robots.arm_configs.ready) + + # compute the eef targets + target_eef_pose = np.array([0.45,0.0,0.6]) + target_orientation = R.from_euler('xyz', [0, 180, 0], degrees=True).as_matrix() + target_quat = tr.mat_to_quat(target_orientation) + + controller.eef_target_position = target_eef_pose + controller.eef_target_velocity = np.zeros(3) + controller.eef_target_quat = target_quat + controller.eef_target_angular_velocity = np.zeros(3) + + # run controller to target + start_time = controller.physics.data.time + status = osc.run_controller(3.0) + end_time = controller.physics.data.time + + # compute fitness + time_to_target = end_time - start_time + return time_to_target + (status*10000) + + rng = jax.random.PRNGKey(0) + strategy = CMA_ES( + popsize=10, + num_dims=2, + sigma_init=250, + ) + params = strategy.params_strategy + params = params.replace(init_min=150, init_max=250) + state = strategy.initialize(rng, params) + + for t in range(10): + rng, rng_gen, rng_eval = jax.random.split(rng, 3) + x, state = strategy.ask(rng_gen, state, params) + print("x: ", x) + print("state: ", state) + fitness = [] + for param in x: + fitness.append(fitness_fn(param, osc)) + state = strategy.tell(x, fitness, state, params) + + # get best params and fitness + best_params = state.best_params + best_fitness = state.best_fitness + + print("Best params: ", best_params) + print("Best fitness: ", best_fitness) + diff --git a/mujoco_controllers/osc.py b/mujoco_controllers/osc.py index 6e3883d..cf87fbb 100644 --- a/mujoco_controllers/osc.py +++ b/mujoco_controllers/osc.py @@ -20,6 +20,7 @@ from dm_robotics.transformations import transformations as tr from mujoco_controllers.build_env import construct_physics +from mujoco_controllers.trajectory import LinearTrajectory from rearrangement_benchmark.env_components.props import Rectangle from ikpy.chain import Chain @@ -28,17 +29,20 @@ class OSC(object): - def __init__(self, physics, passive_view, arm, controller_config): + def __init__(self, physics, arm, hand, controller_config, passive_view=None): # core simulation instance self.physics = physics self.passive_view = passive_view # controller gains self.controller_gains = controller_config["gains"] + self.nullspace_config = np.array(controller_config["nullspace"]["joint_config"]) self.position_threshold = controller_config["convergence"]["position_threshold"] self.orientation_threshold = controller_config["convergence"]["orientation_threshold"] # get site and actuator details from arm + self.arm = arm + self.hand = hand self.eef_site = arm.attachment_site self.arm_joints = arm.joints self.arm_joint_ids = np.array(physics.bind(self.arm_joints).dofadr) @@ -48,7 +52,7 @@ def __init__(self, physics, passive_view, arm, controller_config): self._eef_target_velocity = None self._eef_target_quat = None self._eef_target_angular_velocity = None - self._gripper_status = "open" + self._gripper_status = "idle" # idle, opening, closing # control equation variables self._eef_mass_matrix = None @@ -91,7 +95,8 @@ def _compute_eef_mass_matrix(self): M = np.zeros((nv, nv)) mujoco.mj_fullM(self.physics.model.ptr, M, self.physics.data.qM) M = M[self.arm_joint_ids, :][:, self.arm_joint_ids] - + self.arm_mass_matrix = M + M_inv = np.linalg.inv(M) Mx_inv = np.dot(self._eef_jacobian, np.dot(M_inv, self._eef_jacobian.T)) if abs(np.linalg.det(Mx_inv)) >= 1e-2: @@ -129,6 +134,15 @@ def _orientation_error( def _calc_damping(self, gains: Dict[str, float]) -> np.ndarray: return gains["damping_ratio"] * 2 * np.sqrt(gains["kp"]) + def current_orientation_error(self): + eef_orientation = self.physics.bind(self.eef_site).xmat.reshape(3, 3) + eef_orientation = tr.mat_to_quat(eef_orientation) + return np.max(abs(self._orientation_error(eef_orientation, self.eef_target_quat))) + + def current_position_error(self): + eef_position = self.physics.bind(self.eef_site).xpos + return np.linalg.norm(eef_position - self.eef_target_position) + def pd_control( self, x: np.array, @@ -143,7 +157,10 @@ def pd_control( gains = self.controller_gains["position"] except: raise ValueError("Invalid controller gains") - error = gains["kp"] * (x_desired - x) + self._calc_damping(gains) * (dx_desired - dx) + + pos_error = np.clip(x_desired - x, -0.05, 0.05) + vel_error = dx_desired - dx + error = gains["kp"] * pos_error + self._calc_damping(gains) * vel_error # considered limiting error term return error @@ -152,8 +169,20 @@ def pd_control( gains = self.controller_gains["orientation"] except: raise ValueError("Invalid controller gains") - error = gains["kp"] * self._orientation_error(x, x_desired) + self._calc_damping(gains) * (dx_desired - dx) + + # negative sign for kp arises due to how orientation error is currently calculated + error = -gains["kp"] * self._orientation_error(x, x_desired) + self._calc_damping(gains) * (dx_desired - dx) return error + + elif mode == "nullspace": + try: + gains = self.controller_gains["nullspace"] + except: + raise ValueError("Invalid controller gains") + + error = gains["kp"] * (x_desired - x) + self._calc_damping(gains) * (dx_desired - dx) + return error + else: raise ValueError("Invalid mode for pd control") @@ -164,10 +193,10 @@ def compute_control_output(self): self._compute_eef_mass_matrix() # get joint velocities - current_joint_velocity = physics.data.qvel[self.arm_joint_ids] + current_joint_velocity = self.physics.data.qvel[self.arm_joint_ids] # calculate position pd control - eef_current_position = physics.bind(self.eef_site).xpos.copy() + eef_current_position = self.physics.bind(self.eef_site).xpos.copy() eef_current_velocity = self._eef_jacobian[:3,:] @ current_joint_velocity position_pd = self.pd_control( x=eef_current_position, @@ -179,7 +208,7 @@ def compute_control_output(self): ) # calculate orientation pd control - eef_quat = mat_to_quat(physics.bind(self.eef_site).xmat.reshape(3,3).copy()) + eef_quat = mat_to_quat(self.physics.bind(self.eef_site).xmat.reshape(3,3).copy()) eef_angular_vel = self._eef_jacobian[3:,:] @ current_joint_velocity orientation_pd = self.pd_control( x=eef_quat, @@ -192,17 +221,37 @@ def compute_control_output(self): pd_error = np.hstack([position_pd, orientation_pd]) tau = self._eef_jacobian.T @ self._eef_mass_matrix @ pd_error - tau += self.physics.data.qfrc_bias[self.arm_joint_ids] + # nullspace projection + nullspace_error = self.pd_control( + x=self.physics.data.qpos[self.arm_joint_ids], + x_desired=self.nullspace_config, + dx=self.physics.data.qvel[self.arm_joint_ids], + dx_desired=np.zeros(len(self.arm_joints)), + gains=self.controller_gains["nullspace"], + mode="nullspace" + ) + + null_jacobian = np.linalg.inv(self.arm_mass_matrix) @ self._eef_jacobian.T @ self._eef_mass_matrix + tau += (np.eye(len(self.arm_joints)) - self._eef_jacobian.T @ null_jacobian.T) @ nullspace_error + + # compensate for external forces + tau += self.physics.data.qfrc_bias[self.arm_joint_ids] + # compute effective torque actuator_moment_inv = np.linalg.pinv(self.physics.data.actuator_moment) actuator_moment_inv = actuator_moment_inv[self.arm_joint_ids, :][:, self.arm_joint_ids] tau = tau @ actuator_moment_inv - if self._gripper_status == "open": + # include gripper status in command + if self._gripper_status == "idle": tau = np.concatenate([tau, [0.0]]) - else: + elif self._gripper_status == "closing": tau = np.concatenate([tau, [255.0]]) + elif self._gripper_status == "opening": + tau = np.concatenate([tau, [-255.0]]) + else: + raise ValueError("Invalid gripper status") return tau @@ -210,24 +259,36 @@ def run_controller(self, duration): converged = False start_time = self.physics.data.time while (self.physics.data.time - start_time < duration) and (not converged): + # compute control command control_command = self.compute_control_output() + + # step the simulation self.physics.set_control(control_command) self.physics.step() - self.passive_view.sync() + if self.passive_view is not None: + self.passive_view.sync() - # TODO: move to separate function - # check if we have converged - eef_pos = physics.bind(self.eef_site).xpos.copy() - eef_quat = mat_to_quat(physics.bind(self.eef_site).xmat.reshape(3,3).copy()) - eef_vel = self._eef_jacobian[:3,:] @ physics.data.qvel[self.arm_joint_ids] - eef_angular_vel = self._eef_jacobian[3:,:] @ physics.data.qvel[self.arm_joint_ids] - position_error = np.linalg.norm(eef_pos - self._eef_target_position) - orientation_error = tr.quat_dist(eef_quat, self._eef_target_quat) - - # TODO: add velocity convergence - if (position_error < self.position_threshold) and (orientation_error < self.orientation_threshold): + # check for convergence + eef_pos = self.physics.bind(self.eef_site).xpos.copy() + eef_quat = mat_to_quat(self.physics.bind(self.eef_site).xmat.reshape(3,3).copy()) + eef_vel = self._eef_jacobian[:3,:] @ self.physics.data.qvel[self.arm_joint_ids] + eef_angular_vel = self._eef_jacobian[3:,:] @ self.physics.data.qvel[self.arm_joint_ids] + + grasp_sensor = self.hand.grasp_sensor_callable(self.physics) + gripper_converged = True if self._gripper_status == "idle" or (self._gripper_status == "closing" and grasp_sensor==2) else False + + print(self.physics.named.data.qpos) + if gripper_converged: + self._gripper_status = "idle" + print(grasp_sensor) + print(gripper_converged) + # TODO: add conditions for velocity convergence + if (self.current_position_error() < self.position_threshold) and (self.current_orientation_error() < self.orientation_threshold) and (gripper_converged): converged = True + break + + return converged if __name__ == "__main__": @@ -250,7 +311,7 @@ def run_controller(self, duration): physics, passive_view, arm, gripper = construct_physics(cfg) # run the controller - osc = OSC(physics, passive_view, arm, MOTOR_CONFIG["controllers"]["osc"]) + osc = OSC(physics, arm, gripper, MOTOR_CONFIG["controllers"]["osc"], passive_view) # compute the eef targets target_eef_pose = np.array([0.45,0.0,0.6]) @@ -259,13 +320,40 @@ def run_controller(self, duration): # above target osc.eef_target_position = target_eef_pose - osc.eef_target_velocity = np.array([0.0, 0.0, 0.0]) + osc.eef_target_velocity = np.zeros(3) osc.eef_target_quat = target_quat - osc.eef_target_angular_velocity = 0.0 - osc.run_controller(1.0) + osc.eef_target_angular_velocity = np.zeros(3) + traj = LinearTrajectory(osc, target_eef_pose, target_quat, 4) + #traj.execute_trajectory(5.0) + osc.run_controller(3.0) # pregrasp pose - osc.eef_target_position = target_eef_pose - np.array([0.0, 0.0, 0.4]) - osc.run_controller(1.0) - + osc.eef_target_position = target_eef_pose - np.array([0.0, 0.0, 0.425]) + traj = LinearTrajectory(osc, target_eef_pose - np.array([0.0, 0.0, 0.4]), target_quat, 4) + #traj.execute_trajectory(3.0) + osc.run_controller(3.0) + + # close gripper + osc._gripper_status = "closing" + osc.run_controller(3.0) + + # pregrasp pose + osc.eef_target_position = target_eef_pose + osc.eef_target_velocity = np.zeros(3) + osc.eef_target_quat = target_quat + osc.eef_target_angular_velocity = np.zeros(3) + traj = LinearTrajectory(osc, target_eef_pose, target_quat, 4) + osc.run_controller(3.0) + # pregrasp pose + osc.eef_target_position = target_eef_pose - np.array([0.0, 0.0, 0.4]) + traj = LinearTrajectory(osc, target_eef_pose - np.array([0.0, 0.0, 0.4]), target_quat, 4) + osc.run_controller(4.0) + + # open gripper + osc._gripper_status = "opening" + osc.run_controller(3.0) + + # pregrasp pose + osc.eef_target_position = target_eef_pose + osc.run_controller(4.0) diff --git a/mujoco_controllers/trajectory.py b/mujoco_controllers/trajectory.py index 8f93f5c..79547b2 100644 --- a/mujoco_controllers/trajectory.py +++ b/mujoco_controllers/trajectory.py @@ -1 +1,41 @@ """Basic trajectory generation functions.""" + +import numpy as np + +from dm_robotics.transformations import transformations as tr + +class LinearTrajectory: + """ + Generate a basic linear trajectory from current config to target config with osc. + + Note: this trajectory has no velocity or time parametrization and is for basic testing only. + """ + + def __init__(self, controller, target_pos, target_ori, num_points): + self.controller = controller + self.start_pos = controller.physics.bind(controller.eef_site).xpos.copy() + self.end_pos = target_pos + self.start_ori = controller.physics.bind(controller.eef_site).xmat.copy().reshape(3,3) + self.start_ori = tr.mat_to_quat(self.start_ori) + self.end_ori = target_ori + self.num_points = num_points + + # linerly interpolate position + self.positions = np.linspace(self.start_pos, self.end_pos, self.num_points) + + # slerp to interpolate orientation + self.orientations = np.array([tr.quat_slerp(self.start_ori, self.end_ori, i/self.num_points) for i in range(self.num_points)]) + + def execute_trajectory(self, duration): + """Run the controller for a given duration.""" + step_duration = duration / self.num_points + for position, orientation in zip(self.positions, self.orientations): + self.controller.eef_target_position = position + self.controller.eef_target_quat = orientation + status = self.controller.run_controller(step_duration) + if status: + print("position error: ", self.controller.current_position_error()) + print("orientation error: ", self.controller.current_orientation_error()) + print("controller returned") + else: + raise RuntimeError("Controller failed to converge") diff --git a/pyproject.toml b/pyproject.toml index fb93d86..f50196a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,8 @@ ipywidgets = "^8.1.1" jupyterlab = "^4.0.9" simple-pid = "^2.0.0" cmaes = "^0.10.0" +evojax = "^0.2.16" +evosax = "^0.1.5" [tool.black] line-length = 120