dmc swimmer (#140)

* feat(dmc swimmer): add doc * feat(dmc swimmer): miss make swimmer xml * fix(dmc swimmer): fix make swimmer util * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): fix swimmer.h * fix(dmc swimmer): finish swimmer without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): without test * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): fix lint * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): pass lint, fix test * fix(dmc swimmer): fix bazel test * fix(dmc swimmer): fix bazel test * polish * fix a bug
sail-sg · May 24, 2022 · a6b0bb5 · a6b0bb5
1 parent d69d82d
commit a6b0bb5
Show file tree

Hide file tree

Showing 12 changed files with 348 additions and 1 deletion.
diff --git a/docs/env/dm_control.rst b/docs/env/dm_control.rst
@@ -199,6 +199,20 @@ ReacherEasy-v1, ReacherHard-v1
 - ``max_episode_steps``: 1000;
 
 
+SwimmerSwimmer6-v1, SwimmerSwimmer15-v1
+---------------------------------------
+
+`dm_control suite swimmer source code
+<https://github.com/deepmind/dm_control/blob/1.0.2/dm_control/suite/swimmer.py>`_
+
+- Observation spec: a namedtuple with three keys: ``joints (5 for swimmer6,
+  14 for swimmer15)``, ``to_target (2)``, and ``body_velocities (18 for
+  swimmer6, 45 for swimmer15)``;
+- Action spec: ``(5 for swimmer6, 14 for swimmer15)``, with range ``[-1, 1]``;
+- ``frame_skip``: 15;
+- ``max_episode_steps``: 1000;
+
+
 WalkerRun-v1, WalkerStand-v1, WalkerWalk-v1
 -------------------------------------------
 

diff --git a/envpool/make_test.py b/envpool/make_test.py
@@ -157,6 +157,8 @@ def test_make_mujoco_dmc(self) -> None:
         "PointMassHard-v1",
         "ReacherEasy-v1",
         "ReacherHard-v1",
+        "SwimmerSwimmer6-v1",
+        "SwimmerSwimmer15-v1",
         "WalkerRun-v1",
         "WalkerStand-v1",
         "WalkerWalk-v1",

diff --git a/envpool/mujoco/BUILD b/envpool/mujoco/BUILD
@@ -80,6 +80,7 @@ cc_library(
         "dmc/pendulum.h",
         "dmc/point_mass.h",
         "dmc/reacher.h",
+        "dmc/swimmer.h",
         "dmc/utils.h",
         "dmc/walker.h",
     ],

diff --git a/envpool/mujoco/dmc/__init__.py b/envpool/mujoco/dmc/__init__.py
@@ -38,6 +38,8 @@
   _DmcPointMassEnvSpec,
   _DmcReacherEnvPool,
   _DmcReacherEnvSpec,
+  _DmcSwimmerEnvPool,
+  _DmcSwimmerEnvSpec,
   _DmcWalkerEnvPool,
   _DmcWalkerEnvSpec,
 )
@@ -81,6 +83,9 @@
 DmcReacherEnvSpec, DmcReacherDMEnvPool, DmcReacherGymEnvPool = py_env(
   _DmcReacherEnvSpec, _DmcReacherEnvPool
 )
+DmcSwimmerEnvSpec, DmcSwimmerDMEnvPool, DmcSwimmerGymEnvPool = py_env(
+  _DmcSwimmerEnvSpec, _DmcSwimmerEnvPool
+)
 DmcWalkerEnvSpec, DmcWalkerDMEnvPool, DmcWalkerGymEnvPool = py_env(
   _DmcWalkerEnvSpec, _DmcWalkerEnvPool
 )
@@ -122,6 +127,9 @@
   "DmcReacherEnvSpec",
   "DmcReacherDMEnvPool",
   "DmcReacherGymEnvPool",
+  "DmcSwimmerEnvSpec",
+  "DmcSwimmerDMEnvPool",
+  "DmcSwimmerGymEnvPool",
   "DmcWalkerEnvSpec",
   "DmcWalkerDMEnvPool",
   "DmcWalkerGymEnvPool",

diff --git a/envpool/mujoco/dmc/mujoco_dmc_align_test.py b/envpool/mujoco/dmc/mujoco_dmc_align_test.py
@@ -46,6 +46,8 @@
   DmcPointMassEnvSpec,
   DmcReacherDMEnvPool,
   DmcReacherEnvSpec,
+  DmcSwimmerDMEnvPool,
+  DmcSwimmerEnvSpec,
   DmcWalkerDMEnvPool,
   DmcWalkerEnvSpec,
 )
@@ -82,6 +84,12 @@ def reset_state(
         target = ts.observation.target[0]
         env.physics.named.model.geom_pos["target", "x"] = target[0]
         env.physics.named.model.geom_pos["target", "y"] = target[1]
+      elif domain == "swimmer":
+        xpos, ypos = ts.observation.target0[0]
+        env.physics.named.model.geom_pos["target", "x"] = xpos
+        env.physics.named.model.geom_pos["target", "y"] = ypos
+        env.physics.named.model.light_pos['target_light', 'x'] = xpos
+        env.physics.named.model.light_pos['target_light', 'y'] = ypos
       elif domain == "fish" and task == "swim":
         target = ts.observation.target0[0]
         env.physics.named.model.geom_pos["target", "x"] = target[0]
@@ -254,6 +262,12 @@ def test_reacher(self) -> None:
       "reacher", ["easy", "hard"], DmcReacherEnvSpec, DmcReacherDMEnvPool
     )
 
+  def test_swimmer(self) -> None:
+    self.run_align_check_entry(
+      "swimmer", ["swimmer6", "swimmer15"], DmcSwimmerEnvSpec,
+      DmcSwimmerDMEnvPool
+    )
+
   def test_walker(self) -> None:
     self.run_align_check_entry(
       "walker", ["run", "stand", "walk"], DmcWalkerEnvSpec, DmcWalkerDMEnvPool

diff --git a/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py b/envpool/mujoco/dmc/mujoco_dmc_deterministic_test.py
@@ -44,6 +44,8 @@
   DmcPointMassEnvSpec,
   DmcReacherDMEnvPool,
   DmcReacherEnvSpec,
+  DmcSwimmerDMEnvPool,
+  DmcSwimmerEnvSpec,
   DmcWalkerDMEnvPool,
   DmcWalkerEnvSpec,
 )
@@ -189,6 +191,11 @@ def test_reacher(self) -> None:
     for task in ["easy", "hard"]:
       self.check(DmcReacherEnvSpec, DmcReacherDMEnvPool, task, obs_keys)
 
+  def test_swimmer(self) -> None:
+    obs_keys = ["joints", "to_target", "body_velocities"]
+    for task in ["swimmer6", "swimmer15"]:
+      self.check(DmcSwimmerEnvSpec, DmcSwimmerDMEnvPool, task, obs_keys)
+
   def test_walker(self) -> None:
     obs_keys = ["orientations", "height", "velocity"]
     for task in ["run", "stand", "walk"]:

diff --git a/envpool/mujoco/dmc/mujoco_envpool.cc b/envpool/mujoco/dmc/mujoco_envpool.cc
@@ -25,6 +25,7 @@
 #include "envpool/mujoco/dmc/pendulum.h"
 #include "envpool/mujoco/dmc/point_mass.h"
 #include "envpool/mujoco/dmc/reacher.h"
+#include "envpool/mujoco/dmc/swimmer.h"
 #include "envpool/mujoco/dmc/walker.h"
 
 using DmcAcrobotEnvSpec = PyEnvSpec<mujoco_dmc::AcrobotEnvSpec>;
@@ -63,6 +64,9 @@ using DmcPointMassEnvPool = PyEnvPool<mujoco_dmc::PointMassEnvPool>;
 using DmcReacherEnvSpec = PyEnvSpec<mujoco_dmc::ReacherEnvSpec>;
 using DmcReacherEnvPool = PyEnvPool<mujoco_dmc::ReacherEnvPool>;
 
+using DmcSwimmerEnvSpec = PyEnvSpec<mujoco_dmc::SwimmerEnvSpec>;
+using DmcSwimmerEnvPool = PyEnvPool<mujoco_dmc::SwimmerEnvPool>;
+
 using DmcWalkerEnvSpec = PyEnvSpec<mujoco_dmc::WalkerEnvSpec>;
 using DmcWalkerEnvPool = PyEnvPool<mujoco_dmc::WalkerEnvPool>;
 
@@ -79,5 +83,6 @@ PYBIND11_MODULE(mujoco_dmc_envpool, m) {
   REGISTER(m, DmcPendulumEnvSpec, DmcPendulumEnvPool)
   REGISTER(m, DmcPointMassEnvSpec, DmcPointMassEnvPool)
   REGISTER(m, DmcReacherEnvSpec, DmcReacherEnvPool)
+  REGISTER(m, DmcSwimmerEnvSpec, DmcSwimmerEnvPool)
   REGISTER(m, DmcWalkerEnvSpec, DmcWalkerEnvPool)
 }
diff --git a/envpool/mujoco/dmc/reacher.h b/envpool/mujoco/dmc/reacher.h
@@ -50,7 +50,7 @@ class ReacherEnvFns {
                     "obs:velocity"_.Bind(Spec<mjtNum>({2})),
 #ifdef ENVPOOL_TEST
                     "info:qpos0"_.Bind(Spec<mjtNum>({2})),
-                    "info:target"_.Bind(Spec<mjtNum>({30})),
+                    "info:target"_.Bind(Spec<mjtNum>({2})),
 #endif
                     "discount"_.Bind(Spec<float>({-1}, {0.0, 1.0})));
   }

diff --git a/envpool/mujoco/dmc/registration.py b/envpool/mujoco/dmc/registration.py
@@ -53,6 +53,8 @@
   ("point_mass", "hard"),
   ("reacher", "easy"),
   ("reacher", "hard"),
+  ("swimmer", "swimmer6"),
+  ("swimmer", "swimmer15"),
   ("walker", "run"),
   ("walker", "stand"),
   ("walker", "walk"),

diff --git a/envpool/mujoco/dmc/swimmer.h b/envpool/mujoco/dmc/swimmer.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2022 Garena Online Private Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// https://github.com/deepmind/dm_control/blob/1.0.2/dm_control/suite/swimmer.py
+
+#ifndef ENVPOOL_MUJOCO_DMC_SWIMMER_H_
+#define ENVPOOL_MUJOCO_DMC_SWIMMER_H_
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <memory>
+#include <random>
+#include <regex>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "envpool/core/async_envpool.h"
+#include "envpool/core/env.h"
+#include "envpool/mujoco/dmc/mujoco_env.h"
+#include "envpool/mujoco/dmc/utils.h"
+
+namespace mujoco_dmc {
+
+std::string GetSwimmerXML(const std::string& base_path,
+                          const std::string& task_name) {
+  auto content = GetFileContent(base_path, "swimmer.xml");
+  if (task_name == "swimmer6") {
+    return XMLMakeSwimmer(content, 6);
+  }
+  if (task_name == "swimmer15") {
+    return XMLMakeSwimmer(content, 15);
+  }
+  return content;
+}
+
+class SwimmerEnvFns {
+ public:
+  static decltype(auto) DefaultConfig() {
+    return MakeDict("max_episode_steps"_.Bind(1000), "frame_skip"_.Bind(15),
+                    "task_name"_.Bind(std::string("swimmer6")));
+  }
+  template <typename Config>
+  static decltype(auto) StateSpec(const Config& conf) {
+    const std::string task_name = conf["task_name"_];
+    int n_bodies = 3;
+    if (task_name == "swimmer6") {
+      n_bodies = 6;
+    } else if (task_name == "swimmer15") {
+      n_bodies = 15;
+    } else {
+      throw std::runtime_error("Unknown task_name " + task_name +
+                               " for dmc swimmer.");
+    }
+    return MakeDict("obs:joints"_.Bind(Spec<mjtNum>({n_bodies - 1})),
+                    "obs:to_target"_.Bind(Spec<mjtNum>({2})),
+                    "obs:body_velocities"_.Bind(Spec<mjtNum>({3 * n_bodies})),
+#ifdef ENVPOOL_TEST
+                    "info:qpos0"_.Bind(Spec<mjtNum>({n_bodies + 2})),
+                    "info:target0"_.Bind(Spec<mjtNum>({2})),
+#endif
+                    "discount"_.Bind(Spec<float>({-1}, {0.0, 1.0})));
+  }
+  template <typename Config>
+  static decltype(auto) ActionSpec(const Config& conf) {
+    const std::string task_name = conf["task_name"_];
+    int n_bodies = 3;
+    if (task_name == "swimmer6") {
+      n_bodies = 6;
+    } else if (task_name == "swimmer15") {
+      n_bodies = 15;
+    }
+    return MakeDict(
+        "action"_.Bind(Spec<mjtNum>({-1, n_bodies - 1}, {-1.0, 1.0})));
+  }
+};
+
+using SwimmerEnvSpec = EnvSpec<SwimmerEnvFns>;
+
+class SwimmerEnv : public Env<SwimmerEnvSpec>, public MujocoEnv {
+ protected:
+  int id_head_, id_nose_, id_target_, id_target_light_;
+#ifdef ENVPOOL_TEST
+  std::array<mjtNum, 2> target0_;
+#endif
+
+ public:
+  SwimmerEnv(const Spec& spec, int env_id)
+      : Env<SwimmerEnvSpec>(spec, env_id),
+        MujocoEnv(
+            spec.config["base_path"_],
+            GetSwimmerXML(spec.config["base_path"_], spec.config["task_name"_]),
+            spec.config["frame_skip"_], spec.config["max_episode_steps"_]),
+        id_head_(mj_name2id(model_, mjOBJ_GEOM, "head")),
+        id_nose_(mj_name2id(model_, mjOBJ_GEOM, "nose")),
+        id_target_(mj_name2id(model_, mjOBJ_GEOM, "target")),
+        id_target_light_(mj_name2id(model_, mjOBJ_LIGHT, "target_light")) {}
+
+  void TaskInitializeEpisode() override {
+    RandomizeLimitedAndRotationalJoints(&gen_);
+    mjtNum target_box = RandUniform(0, 1)(gen_) < 0.2 ? 0.3 : 2.0;
+    mjtNum xpos = RandUniform(-target_box, target_box)(gen_);
+    mjtNum ypos = RandUniform(-target_box, target_box)(gen_);
+    // physics.named.model.geom_pos['target', 'x'] = xpos
+    // physics.named.model.geom_pos['target', 'y'] = ypos
+    model_->geom_pos[id_target_ * 3 + 0] = xpos;
+    model_->geom_pos[id_target_ * 3 + 1] = ypos;
+    // physics.named.model.light_pos['target_light', 'x'] = xpos
+    // physics.named.model.light_pos['target_light', 'y'] = ypos
+    model_->light_pos[id_target_light_ * 3 + 0] = xpos;
+    model_->light_pos[id_target_light_ * 3 + 1] = ypos;
+#ifdef ENVPOOL_TEST
+    std::memcpy(qpos0_.get(), data_->qpos, sizeof(mjtNum) * model_->nq);
+    target0_[0] = xpos;
+    target0_[1] = ypos;
+#endif
+  }
+
+  bool IsDone() override { return done_; }
+
+  void Reset() override {
+    ControlReset();
+    WriteState();
+  }
+
+  void Step(const Action& action) override {
+    mjtNum* act = static_cast<mjtNum*>(action["action"_].Data());
+    ControlStep(act);
+    WriteState();
+  }
+
+  float TaskGetReward() override {
+    mjtNum target_size = model_->geom_size[id_target_ * 3];
+    return static_cast<float>(RewardTolerance(NoseToTargetDist(), 0.0,
+                                              target_size, 5 * target_size, 0.1,
+                                              SigmoidType::kLongTail));
+  }
+
+  bool TaskShouldTerminateEpisode() override { return false; }
+
+ private:
+  void WriteState() {
+    const auto& joints = Joints();
+    const auto& to_target = NoseToTarget();
+    const auto& body_velocities = BodyVelocities();
+
+    State state = Allocate();
+    state["reward"_] = reward_;
+    state["discount"_] = discount_;
+    // obs
+    state["obs:joints"_].Assign(joints.data(), joints.size());
+    state["obs:to_target"_].Assign(to_target.begin(), to_target.size());
+    state["obs:body_velocities"_].Assign(body_velocities.data(),
+                                         body_velocities.size());
+    // info
+#ifdef ENVPOOL_TEST
+    state["info:qpos0"_].Assign(qpos0_.get(), model_->nq);
+    state["info:target0"_].Assign(target0_.begin(), target0_.size());
+#endif
+  }
+
+  std::array<mjtNum, 2> NoseToTarget() {
+    // nose_to_target = (self.named.data.geom_xpos['target'] -
+    //                   self.named.data.geom_xpos['nose'])
+    std::array<mjtNum, 3> nose_to_target_global;
+    for (int i = 0; i < 3; i++) {
+      nose_to_target_global[i] = (data_->geom_xpos[id_target_ * 3 + i] -
+                                  data_->geom_xpos[id_nose_ * 3 + i]);
+    }
+    // head_orientation = self.named.data.xmat['head'].reshape(3, 3)
+    // return nose_to_target.dot(head_orientation)[:2]
+    std::array<mjtNum, 2> nose_to_target;
+    for (int i = 0; i < 2; i++) {
+      nose_to_target[i] =
+          nose_to_target_global[0] * data_->geom_xmat[id_head_ * 9 + i + 0] +
+          nose_to_target_global[1] * data_->geom_xmat[id_head_ * 9 + i + 3] +
+          nose_to_target_global[2] * data_->geom_xmat[id_head_ * 9 + i + 6];
+    }
+    return {nose_to_target[0], nose_to_target[1]};
+  }
+  mjtNum NoseToTargetDist() {
+    // return np.linalg.norm(self.nose_to_target())
+    const auto& nose_to_target = NoseToTarget();
+    return std::sqrt(nose_to_target[0] * nose_to_target[0] +
+                     nose_to_target[1] * nose_to_target[1]);
+  }
+  std::vector<mjtNum> BodyVelocities() {
+    // returns local body velocities: x,y linear, z rotational.
+    std::vector<mjtNum> result;
+    for (int i = 2; i < model_->nbody + 1; ++i) {
+      result.emplace_back(data_->sensordata[i * 6 + 0]);
+      result.emplace_back(data_->sensordata[i * 6 + 1]);
+      result.emplace_back(data_->sensordata[i * 6 + 5]);
+    }
+    return result;
+  }
+
+  std::vector<mjtNum> Joints() {
+    // return self.data.qpos[3:].copy()
+    std::vector<mjtNum> result;
+    for (int i = 3; i < model_->nq; ++i) {
+      result.emplace_back(data_->qpos[i]);
+    }
+    return result;
+  }
+};
+
+using SwimmerEnvPool = AsyncEnvPool<SwimmerEnv>;
+
+}  // namespace mujoco_dmc
+
+#endif  // ENVPOOL_MUJOCO_DMC_SWIMMER_H_