From a6b0bb5cca182c236e387b8fb170324c8d694efc Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Tue, 24 May 2022 08:19:16 +0800 Subject: [PATCH] dmc swimmer (#140) * feat(dmc swimmer): add doc * feat(dmc swimmer): miss make swimmer xml * fix(dmc swimmer): fix make swimmer util * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): finish swimmer without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): fix bazel test * fix(dmc swimmer): fix bazel test * polish * fix a bug --- docs/env/dm_control.rst | 14 ++ envpool/make_test.py | 2 + envpool/mujoco/BUILD | 1 + envpool/mujoco/dmc/__init__.py | 8 + envpool/mujoco/dmc/mujoco_dmc_align_test.py | 14 ++ .../dmc/mujoco_dmc_deterministic_test.py | 7 + envpool/mujoco/dmc/mujoco_envpool.cc | 5 + envpool/mujoco/dmc/reacher.h | 2 +- envpool/mujoco/dmc/registration.py | 2 + envpool/mujoco/dmc/swimmer.h | 225 ++++++++++++++++++ envpool/mujoco/dmc/utils.cc | 68 ++++++ envpool/mujoco/dmc/utils.h | 1 + 12 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 envpool/mujoco/dmc/swimmer.h diff --git a/docs/env/dm_control.rst b/docs/env/dm_control.rst index 86be6273..687e7bb1 100644 --- a/docs/env/dm_control.rst +++ b/docs/env/dm_control.rst @@ -199,6 +199,20 @@ ReacherEasy-v1, ReacherHard-v1 - ``max_episode_steps``: 1000; +SwimmerSwimmer6-v1, SwimmerSwimmer15-v1 +--------------------------------------- + +`dm_control suite swimmer source code +`_ + +- Observation spec: a namedtuple with three keys: ``joints (5 for swimmer6, + 14 for swimmer15)``, ``to_target (2)``, and ``body_velocities (18 for + swimmer6, 45 for swimmer15)``; +- Action spec: ``(5 for swimmer6, 14 for swimmer15)``, with range ``[-1, 1]``; +- ``frame_skip``: 15; +- ``max_episode_steps``: 1000; + + WalkerRun-v1, WalkerStand-v1, WalkerWalk-v1 ------------------------------------------- diff --git a/envpool/make_test.py b/envpool/make_test.py index 5d9bb83c..73cad487 100644 --- a/envpool/make_test.py +++ b/envpool/make_test.py @@ -157,6 +157,8 @@ def test_make_mujoco_dmc(self) -> None: "PointMassHard-v1", "ReacherEasy-v1", "ReacherHard-v1", + "SwimmerSwimmer6-v1", + "SwimmerSwimmer15-v1", "WalkerRun-v1", "WalkerStand-v1", "WalkerWalk-v1", diff --git a/envpool/mujoco/BUILD b/envpool/mujoco/BUILD index 48523957..44510688 100644 --- a/envpool/mujoco/BUILD +++ b/envpool/mujoco/BUILD @@ -80,6 +80,7 @@ cc_library( "dmc/pendulum.h", "dmc/point_mass.h", "dmc/reacher.h", + "dmc/swimmer.h", "dmc/utils.h", "dmc/walker.h", ], diff --git a/envpool/mujoco/dmc/__init__.py b/envpool/mujoco/dmc/__init__.py index 2971026d..8e1abc00 100644 --- a/envpool/mujoco/dmc/__init__.py +++ b/envpool/mujoco/dmc/__init__.py @@ -38,6 +38,8 @@ _DmcPointMassEnvSpec, _DmcReacherEnvPool, _DmcReacherEnvSpec, + _DmcSwimmerEnvPool, + _DmcSwimmerEnvSpec, _DmcWalkerEnvPool, _DmcWalkerEnvSpec, ) @@ -81,6 +83,9 @@ DmcReacherEnvSpec, DmcReacherDMEnvPool, DmcReacherGymEnvPool = py_env( _DmcReacherEnvSpec, _DmcReacherEnvPool ) +DmcSwimmerEnvSpec, DmcSwimmerDMEnvPool, DmcSwimmerGymEnvPool = py_env( + _DmcSwimmerEnvSpec, _DmcSwimmerEnvPool +) DmcWalkerEnvSpec, DmcWalkerDMEnvPool, DmcWalkerGymEnvPool = py_env( _DmcWalkerEnvSpec, _DmcWalkerEnvPool ) @@ -122,6 +127,9 @@ "DmcReacherEnvSpec", "DmcReacherDMEnvPool", "DmcReacherGymEnvPool", + "DmcSwimmerEnvSpec", + "DmcSwimmerDMEnvPool", + "DmcSwimmerGymEnvPool", "DmcWalkerEnvSpec", "DmcWalkerDMEnvPool", "DmcWalkerGymEnvPool", diff --git a/envpool/mujoco/dmc/mujoco_dmc_align_test.py b/envpool/mujoco/dmc/mujoco_dmc_align_test.py index a47726ac..9156d5b8 100644 --- a/envpool/mujoco/dmc/mujoco_dmc_align_test.py +++ b/envpool/mujoco/dmc/mujoco_dmc_align_test.py @@ -46,6 +46,8 @@ DmcPointMassEnvSpec, DmcReacherDMEnvPool, DmcReacherEnvSpec, + DmcSwimmerDMEnvPool, + DmcSwimmerEnvSpec, DmcWalkerDMEnvPool, DmcWalkerEnvSpec, ) @@ -82,6 +84,12 @@ def reset_state( target = ts.observation.target[0] env.physics.named.model.geom_pos["target", "x"] = target[0] env.physics.named.model.geom_pos["target", "y"] = target[1] + elif domain == "swimmer": + xpos, ypos = ts.observation.target0[0] + env.physics.named.model.geom_pos["target", "x"] = xpos + env.physics.named.model.geom_pos["target", "y"] = ypos + env.physics.named.model.light_pos['target_light', 'x'] = xpos + env.physics.named.model.light_pos['target_light', 'y'] = ypos elif domain == "fish" and task == "swim": target = ts.observation.target0[0] env.physics.named.model.geom_pos["target", "x"] = target[0] @@ -254,6 +262,12 @@ def test_reacher(self) -> None: "reacher", ["easy", "hard"], DmcReacherEnvSpec, DmcReacherDMEnvPool ) + def test_swimmer(self) -> None: + self.run_align_check_entry( + "swimmer", ["swimmer6", "swimmer15"], DmcSwimmerEnvSpec, + DmcSwimmerDMEnvPool + ) + def test_walker(self) -> None: self.run_align_check_entry( "walker", ["run", "stand", "walk"], DmcWalkerEnvSpec, DmcWalkerDMEnvPool diff --git a/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py b/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py index 02a84fb8..68d8bab8 100644 --- a/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py +++ b/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py @@ -44,6 +44,8 @@ DmcPointMassEnvSpec, DmcReacherDMEnvPool, DmcReacherEnvSpec, + DmcSwimmerDMEnvPool, + DmcSwimmerEnvSpec, DmcWalkerDMEnvPool, DmcWalkerEnvSpec, ) @@ -189,6 +191,11 @@ def test_reacher(self) -> None: for task in ["easy", "hard"]: self.check(DmcReacherEnvSpec, DmcReacherDMEnvPool, task, obs_keys) + def test_swimmer(self) -> None: + obs_keys = ["joints", "to_target", "body_velocities"] + for task in ["swimmer6", "swimmer15"]: + self.check(DmcSwimmerEnvSpec, DmcSwimmerDMEnvPool, task, obs_keys) + def test_walker(self) -> None: obs_keys = ["orientations", "height", "velocity"] for task in ["run", "stand", "walk"]: diff --git a/envpool/mujoco/dmc/mujoco_envpool.cc b/envpool/mujoco/dmc/mujoco_envpool.cc index ab3923ac..c9ab7e7e 100644 --- a/envpool/mujoco/dmc/mujoco_envpool.cc +++ b/envpool/mujoco/dmc/mujoco_envpool.cc @@ -25,6 +25,7 @@ #include "envpool/mujoco/dmc/pendulum.h" #include "envpool/mujoco/dmc/point_mass.h" #include "envpool/mujoco/dmc/reacher.h" +#include "envpool/mujoco/dmc/swimmer.h" #include "envpool/mujoco/dmc/walker.h" using DmcAcrobotEnvSpec = PyEnvSpec; @@ -63,6 +64,9 @@ using DmcPointMassEnvPool = PyEnvPool; using DmcReacherEnvSpec = PyEnvSpec; using DmcReacherEnvPool = PyEnvPool; +using DmcSwimmerEnvSpec = PyEnvSpec; +using DmcSwimmerEnvPool = PyEnvPool; + using DmcWalkerEnvSpec = PyEnvSpec; using DmcWalkerEnvPool = PyEnvPool; @@ -79,5 +83,6 @@ PYBIND11_MODULE(mujoco_dmc_envpool, m) { REGISTER(m, DmcPendulumEnvSpec, DmcPendulumEnvPool) REGISTER(m, DmcPointMassEnvSpec, DmcPointMassEnvPool) REGISTER(m, DmcReacherEnvSpec, DmcReacherEnvPool) + REGISTER(m, DmcSwimmerEnvSpec, DmcSwimmerEnvPool) REGISTER(m, DmcWalkerEnvSpec, DmcWalkerEnvPool) } diff --git a/envpool/mujoco/dmc/reacher.h b/envpool/mujoco/dmc/reacher.h index 5d2e24dc..3deea0ed 100644 --- a/envpool/mujoco/dmc/reacher.h +++ b/envpool/mujoco/dmc/reacher.h @@ -50,7 +50,7 @@ class ReacherEnvFns { "obs:velocity"_.Bind(Spec({2})), #ifdef ENVPOOL_TEST "info:qpos0"_.Bind(Spec({2})), - "info:target"_.Bind(Spec({30})), + "info:target"_.Bind(Spec({2})), #endif "discount"_.Bind(Spec({-1}, {0.0, 1.0}))); } diff --git a/envpool/mujoco/dmc/registration.py b/envpool/mujoco/dmc/registration.py index 93ac6ecb..7d3462e8 100644 --- a/envpool/mujoco/dmc/registration.py +++ b/envpool/mujoco/dmc/registration.py @@ -53,6 +53,8 @@ ("point_mass", "hard"), ("reacher", "easy"), ("reacher", "hard"), + ("swimmer", "swimmer6"), + ("swimmer", "swimmer15"), ("walker", "run"), ("walker", "stand"), ("walker", "walk"), diff --git a/envpool/mujoco/dmc/swimmer.h b/envpool/mujoco/dmc/swimmer.h new file mode 100644 index 00000000..db9a5351 --- /dev/null +++ b/envpool/mujoco/dmc/swimmer.h @@ -0,0 +1,225 @@ +/* + * Copyright 2022 Garena Online Private Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// https://github.com/deepmind/dm_control/blob/1.0.2/dm_control/suite/swimmer.py + +#ifndef ENVPOOL_MUJOCO_DMC_SWIMMER_H_ +#define ENVPOOL_MUJOCO_DMC_SWIMMER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "envpool/core/async_envpool.h" +#include "envpool/core/env.h" +#include "envpool/mujoco/dmc/mujoco_env.h" +#include "envpool/mujoco/dmc/utils.h" + +namespace mujoco_dmc { + +std::string GetSwimmerXML(const std::string& base_path, + const std::string& task_name) { + auto content = GetFileContent(base_path, "swimmer.xml"); + if (task_name == "swimmer6") { + return XMLMakeSwimmer(content, 6); + } + if (task_name == "swimmer15") { + return XMLMakeSwimmer(content, 15); + } + return content; +} + +class SwimmerEnvFns { + public: + static decltype(auto) DefaultConfig() { + return MakeDict("max_episode_steps"_.Bind(1000), "frame_skip"_.Bind(15), + "task_name"_.Bind(std::string("swimmer6"))); + } + template + static decltype(auto) StateSpec(const Config& conf) { + const std::string task_name = conf["task_name"_]; + int n_bodies = 3; + if (task_name == "swimmer6") { + n_bodies = 6; + } else if (task_name == "swimmer15") { + n_bodies = 15; + } else { + throw std::runtime_error("Unknown task_name " + task_name + + " for dmc swimmer."); + } + return MakeDict("obs:joints"_.Bind(Spec({n_bodies - 1})), + "obs:to_target"_.Bind(Spec({2})), + "obs:body_velocities"_.Bind(Spec({3 * n_bodies})), +#ifdef ENVPOOL_TEST + "info:qpos0"_.Bind(Spec({n_bodies + 2})), + "info:target0"_.Bind(Spec({2})), +#endif + "discount"_.Bind(Spec({-1}, {0.0, 1.0}))); + } + template + static decltype(auto) ActionSpec(const Config& conf) { + const std::string task_name = conf["task_name"_]; + int n_bodies = 3; + if (task_name == "swimmer6") { + n_bodies = 6; + } else if (task_name == "swimmer15") { + n_bodies = 15; + } + return MakeDict( + "action"_.Bind(Spec({-1, n_bodies - 1}, {-1.0, 1.0}))); + } +}; + +using SwimmerEnvSpec = EnvSpec; + +class SwimmerEnv : public Env, public MujocoEnv { + protected: + int id_head_, id_nose_, id_target_, id_target_light_; +#ifdef ENVPOOL_TEST + std::array target0_; +#endif + + public: + SwimmerEnv(const Spec& spec, int env_id) + : Env(spec, env_id), + MujocoEnv( + spec.config["base_path"_], + GetSwimmerXML(spec.config["base_path"_], spec.config["task_name"_]), + spec.config["frame_skip"_], spec.config["max_episode_steps"_]), + id_head_(mj_name2id(model_, mjOBJ_GEOM, "head")), + id_nose_(mj_name2id(model_, mjOBJ_GEOM, "nose")), + id_target_(mj_name2id(model_, mjOBJ_GEOM, "target")), + id_target_light_(mj_name2id(model_, mjOBJ_LIGHT, "target_light")) {} + + void TaskInitializeEpisode() override { + RandomizeLimitedAndRotationalJoints(&gen_); + mjtNum target_box = RandUniform(0, 1)(gen_) < 0.2 ? 0.3 : 2.0; + mjtNum xpos = RandUniform(-target_box, target_box)(gen_); + mjtNum ypos = RandUniform(-target_box, target_box)(gen_); + // physics.named.model.geom_pos['target', 'x'] = xpos + // physics.named.model.geom_pos['target', 'y'] = ypos + model_->geom_pos[id_target_ * 3 + 0] = xpos; + model_->geom_pos[id_target_ * 3 + 1] = ypos; + // physics.named.model.light_pos['target_light', 'x'] = xpos + // physics.named.model.light_pos['target_light', 'y'] = ypos + model_->light_pos[id_target_light_ * 3 + 0] = xpos; + model_->light_pos[id_target_light_ * 3 + 1] = ypos; +#ifdef ENVPOOL_TEST + std::memcpy(qpos0_.get(), data_->qpos, sizeof(mjtNum) * model_->nq); + target0_[0] = xpos; + target0_[1] = ypos; +#endif + } + + bool IsDone() override { return done_; } + + void Reset() override { + ControlReset(); + WriteState(); + } + + void Step(const Action& action) override { + mjtNum* act = static_cast(action["action"_].Data()); + ControlStep(act); + WriteState(); + } + + float TaskGetReward() override { + mjtNum target_size = model_->geom_size[id_target_ * 3]; + return static_cast(RewardTolerance(NoseToTargetDist(), 0.0, + target_size, 5 * target_size, 0.1, + SigmoidType::kLongTail)); + } + + bool TaskShouldTerminateEpisode() override { return false; } + + private: + void WriteState() { + const auto& joints = Joints(); + const auto& to_target = NoseToTarget(); + const auto& body_velocities = BodyVelocities(); + + State state = Allocate(); + state["reward"_] = reward_; + state["discount"_] = discount_; + // obs + state["obs:joints"_].Assign(joints.data(), joints.size()); + state["obs:to_target"_].Assign(to_target.begin(), to_target.size()); + state["obs:body_velocities"_].Assign(body_velocities.data(), + body_velocities.size()); + // info +#ifdef ENVPOOL_TEST + state["info:qpos0"_].Assign(qpos0_.get(), model_->nq); + state["info:target0"_].Assign(target0_.begin(), target0_.size()); +#endif + } + + std::array NoseToTarget() { + // nose_to_target = (self.named.data.geom_xpos['target'] - + // self.named.data.geom_xpos['nose']) + std::array nose_to_target_global; + for (int i = 0; i < 3; i++) { + nose_to_target_global[i] = (data_->geom_xpos[id_target_ * 3 + i] - + data_->geom_xpos[id_nose_ * 3 + i]); + } + // head_orientation = self.named.data.xmat['head'].reshape(3, 3) + // return nose_to_target.dot(head_orientation)[:2] + std::array nose_to_target; + for (int i = 0; i < 2; i++) { + nose_to_target[i] = + nose_to_target_global[0] * data_->geom_xmat[id_head_ * 9 + i + 0] + + nose_to_target_global[1] * data_->geom_xmat[id_head_ * 9 + i + 3] + + nose_to_target_global[2] * data_->geom_xmat[id_head_ * 9 + i + 6]; + } + return {nose_to_target[0], nose_to_target[1]}; + } + mjtNum NoseToTargetDist() { + // return np.linalg.norm(self.nose_to_target()) + const auto& nose_to_target = NoseToTarget(); + return std::sqrt(nose_to_target[0] * nose_to_target[0] + + nose_to_target[1] * nose_to_target[1]); + } + std::vector BodyVelocities() { + // returns local body velocities: x,y linear, z rotational. + std::vector result; + for (int i = 2; i < model_->nbody + 1; ++i) { + result.emplace_back(data_->sensordata[i * 6 + 0]); + result.emplace_back(data_->sensordata[i * 6 + 1]); + result.emplace_back(data_->sensordata[i * 6 + 5]); + } + return result; + } + + std::vector Joints() { + // return self.data.qpos[3:].copy() + std::vector result; + for (int i = 3; i < model_->nq; ++i) { + result.emplace_back(data_->qpos[i]); + } + return result; + } +}; + +using SwimmerEnvPool = AsyncEnvPool; + +} // namespace mujoco_dmc + +#endif // ENVPOOL_MUJOCO_DMC_SWIMMER_H_ diff --git a/envpool/mujoco/dmc/utils.cc b/envpool/mujoco/dmc/utils.cc index 383fb23c..074f7015 100644 --- a/envpool/mujoco/dmc/utils.cc +++ b/envpool/mujoco/dmc/utils.cc @@ -91,6 +91,74 @@ std::string XMLAddPoles(const std::string& content, int n_poles) { return writer.result; } +std::string XMLMakeSwimmer(const std::string& content, int n_bodies) { + pugi::xml_document doc; + doc.load_string(content.c_str()); + + pugi::xml_node mjc = doc.select_node("/mujoco").node(); + pugi::xml_node actuator = mjc.append_child("actuator"); + pugi::xml_node sensor = mjc.append_child("sensor"); + pugi::xml_node body = doc.select_node("//worldbody/body").node(); + std::string joint_range = std::to_string(360.0 / n_bodies); + joint_range = "-" + joint_range + " " + joint_range; + + for (int i = 0; i < n_bodies - 1; ++i) { + std::string id = std::to_string(i); + // motor + pugi::xml_node motor = actuator.append_child("motor"); + motor.append_attribute("joint") = ("joint_" + id).c_str(); + motor.append_attribute("name") = ("motor_" + id).c_str(); + // velocimeter + pugi::xml_node velocimeter = sensor.append_child("velocimeter"); + velocimeter.append_attribute("name") = ("velocimeter_" + id).c_str(); + velocimeter.append_attribute("site") = ("site_" + id).c_str(); + // gyro + pugi::xml_node gyro = sensor.append_child("gyro"); + gyro.append_attribute("name") = ("gyro_" + id).c_str(); + gyro.append_attribute("site") = ("site_" + id).c_str(); + // body + pugi::xml_node child = body.append_child("body"); + child.append_attribute("name") = ("segment_" + id).c_str(); + child.append_attribute("pos") = "0 .1 0"; + body = child; + pugi::xml_node geom = body.append_child("geom"); + geom.append_attribute("class") = "visual"; + geom.append_attribute("name") = ("visual_" + id).c_str(); + geom = body.append_child("geom"); + geom.append_attribute("class") = "inertial"; + geom.append_attribute("name") = ("inertial_" + id).c_str(); + pugi::xml_node site = body.append_child("site"); + site.append_attribute("name") = ("site_" + id).c_str(); + pugi::xml_node joint = body.append_child("joint"); + joint.append_attribute("name") = ("joint_" + id).c_str(); + joint.append_attribute("range") = joint_range.c_str(); + } + + double scale = n_bodies / 6.0; + pugi::xpath_node_set cameras = doc.select_nodes("//worldbody/body/camera"); + for (const pugi::xpath_node& c : cameras) { + std::string mode = c.node().attribute("mode").value(); + if (mode != "trackcom") { + continue; + } + std::istringstream in(c.node().attribute("pos").value()); + std::ostringstream out; + for (int i = 0; i < 3; ++i) { + double x; + in >> x; + if (i > 0) { + out << " "; + } + out << x * scale; + } + c.node().attribute("pos").set_value(out.str().c_str()); + } + + XMLStringWriter writer; + doc.print(writer); + return writer.result; +} + int GetQposId(mjModel* model, const std::string& name) { return model->jnt_qposadr[mj_name2id(model, mjOBJ_JOINT, name.c_str())]; } diff --git a/envpool/mujoco/dmc/utils.h b/envpool/mujoco/dmc/utils.h index d2329bee..97dede1b 100644 --- a/envpool/mujoco/dmc/utils.h +++ b/envpool/mujoco/dmc/utils.h @@ -36,6 +36,7 @@ std::string GetFileContent(const std::string& base_path, std::string XMLRemoveByBodyName(const std::string& content, const std::vector& body_names); std::string XMLAddPoles(const std::string& content, int n_poles); +std::string XMLMakeSwimmer(const std::string& content, int n_bodies); // the following id is not 1 on 1 mapping int GetQposId(mjModel* model, const std::string& name);