RL-12: Templetized TileCode

Massive refactoring. This should eliminate the possibility of a bug due to wrong dimension.
JoeyAndres · Dec 9, 2016 · a76fef2 · a76fef2
1 parent 18649fa
commit a76fef2
Show file tree

Hide file tree

Showing 75 changed files with 1,366 additions and 1,609 deletions.
diff --git a/include/agent/Actuator.h b/include/agent/Actuator.h
@@ -50,8 +50,6 @@ class Actuator : public ActionContainer<A> {
   explicit Actuator(const spActionSet<A>& actionSet);
 };
 
-typedef Actuator<actionCont> ActuatorSL;
-
 /*! \typedef spActuator
  *
  * Wraps Actuator with shared_ptr. @see Actuator

diff --git a/include/agent/Agent.h b/include/agent/Agent.h
@@ -80,6 +80,15 @@ class AgentSupervised {
   spLearningAlgorithm<S, A> _learningAlgorithm;
 };
 
+/*! \class AgentSupervisedGD
+ *  \brief AgentSupervised for Gradient Descent.
+ *  \tparam D Number of dimension.
+ *  \tparam STATE_DIM Number of state dimension.
+ */
+template<size_t D, size_t STATE_DIM = D-1>
+using AgentSupervisedGD =
+AgentSupervised<floatArray<STATE_DIM>, floatArray<D - STATE_DIM>>;
+
 /*! \class Agent
  *  \brief A class that represent an rl agent.
  *
@@ -178,17 +187,13 @@ class Agent {
                               //!< postExecute.
 };
 
-/*! \typedef AgentSL
- *  \brief Agent for Supervised Learning.
- *  \tparam D data type of Supervised Learning agent.
- *
- *  Supervised Learning usually deals with multi-dimension states and action,
- *  hence the specific typedef of Agent.
- *
- *  TODO: Made by young me, probably under a lot of stress so above statement doesn't makes sense. Remove this crap.
+/*! \class AgentGD
+ *  \brief Agent for Gradient Descent.
+ *  \tparam D Number of dimension.
+ *  \tparam STATE_DIM Number of state dimension.
  */
-template<class D = FLOAT>
-using AgentSL = Agent<vector<D>, vector<D>>;
+template<size_t D, size_t STATE_DIM = D-1>
+using AgentGD = Agent<floatArray<STATE_DIM>, floatArray<D - STATE_DIM>>;
 
 template<class S, class A>
 Agent<S, A>::Agent(const spEnvironment<S, A>& environment,
@@ -230,7 +235,7 @@ void Agent<S, A>::preExecute() {
 
 template<class S, class A>
 void Agent<S, A>::execute() {
-  // todo: Acquire last state and reward here.
+  // todo(jandres): Acquire last state and reward here.
   this->applyAction(_currentAction);
   spState<S> nextState = std::move(getLastObservedState());
   FLOAT reward = this->_environment->getSensor()->getLastObservedReward();

diff --git a/include/algorithm/gradient-descent/GradientDescent.h b/include/algorithm/gradient-descent/GradientDescent.h
@@ -18,27 +18,87 @@
 
 #pragma once
 
+#include <algorithm>
+#include <vector>
+#include <array>
+
 #include "../../declares.h"
 #include "../../coding/TileCode.h"
 #include "GradientDescentAbstract.h"
 
+using std::vector;
+using std::array;
+
+using rl::coding::spTileCode;
+
 namespace rl {
-using coding::spTileCode;
 namespace algorithm {
 
 /*! \class GradientDescent
  *  \brief Gradient Descent implementation.
+ *  \tparam D Number of dimension.
+ *  \tparam NUM_TILINGS Number of tilings.
+ *  \tparam STATE_DIM Number of dimension in State.
+ *                    This also implies ACTION_DIM = D - STATE_DIM.
  */
-class GradientDescent : public GradientDescentAbstract {
+template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
+class GradientDescent :
+  public GradientDescentAbstract<D, NUM_TILINGS, STATE_DIM> {
  public:
-  using GradientDescentAbstract::GradientDescentAbstract;
+  using GradientDescentAbstract<
+    D, NUM_TILINGS, STATE_DIM>::GradientDescentAbstract;
 
-  void updateWeights(const spStateCont& currentStateVector,
-                     const spActionCont& currentActionVector,
-                     const spStateCont& nextStateVector,
-                     const FLOAT nextActionValue,
-                     const FLOAT reward) override;
+  void updateWeights(
+    const typename GradientDescentAbstract<
+      D,
+      NUM_TILINGS,
+      STATE_DIM>::spStateParam& currentStateVector,
+    const typename GradientDescentAbstract<
+      D,
+      NUM_TILINGS,
+      STATE_DIM>::spActionParam& currentActionVector,
+    const typename GradientDescentAbstract<
+      D,
+      NUM_TILINGS,
+      STATE_DIM>::spStateParam& nextStateVector,
+    const FLOAT nextActionValue,
+    const FLOAT reward) override;
 };
 
+template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
+void GradientDescent<D, NUM_TILINGS, STATE_DIM>::updateWeights(
+  const typename GradientDescentAbstract<
+    D,
+    NUM_TILINGS,
+    STATE_DIM>::spStateParam& currentStateVector,
+  const typename GradientDescentAbstract<
+    D,
+    NUM_TILINGS,
+    STATE_DIM>::spActionParam& currentActionVector,
+  const typename GradientDescentAbstract<
+    D,
+    NUM_TILINGS,
+    STATE_DIM>::spStateParam& nextStateVector,
+  const FLOAT nextActionValue,
+  const FLOAT reward) {
+  floatArray<D> currentParams;
+  std::copy(currentStateVector->begin(),
+            currentStateVector->end(),
+            currentParams.begin());
+  std::copy(currentActionVector->begin(),
+            currentActionVector->end(),
+            currentParams.begin() + currentStateVector->size());
+
+  FEATURE_VECTOR currentStateFv =
+    std::move(this->getFeatureVector(currentParams));
+
+  FLOAT tdError = reward + this->_discountRate * nextActionValue
+    - this->getValueFromFeatureVector(currentStateFv);
+
+  for (auto f : currentStateFv) {
+    this->_w[f] += tdError * this->_stepSize;
+  }
+}
+
 }  // namespace algorithm
 }  // namespace rl