RL-19: Added SarsaGD and QLearningGD

Added non-eligibility-traces version of SarsaGDET and QLearningGDET.
JoeyAndres · Nov 29, 2016 · 593342b · 593342b
1 parent 156113b
commit 593342b
Show file tree

Hide file tree

Showing 30 changed files with 1,157 additions and 497 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,3 +17,4 @@ windows/
 
 # Node modules.
 /node_modules/
+/cmake-build-debug/
diff --git a/CMakeFiles/license.h b/CMakeFiles/license.h
@@ -1,5 +1,5 @@
 /**
- * rl - Reinforcment Learning
+ * rl - Reinforcement Learning
  * Copyright (C) 2016  Joey Andres<yeojserdna@gmail.com>
  *
  * This program is free software: you can redistribute it and/or modify

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -7,7 +7,7 @@ include(CMakeFiles/CMakeGenerateMainHeader)
 
 set(intrinsic "none" CACHE STRING "The possible intrinsics are the following: none, mmx, sse, sse2, sse3, sse4, sse4.1, sse4.2, sse4a, avx.")
 
-set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS} -g")
+set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS} -O3 -g")
 
 add_subdirectory(src)
 add_subdirectory(lib)

diff --git a/include/algorithm/gradient-descent/GradientDescent.h b/include/algorithm/gradient-descent/GradientDescent.h
@@ -1,9 +1,21 @@
-/*
- * GradientDescent.h
+/**
+ * rl - Reinforcement Learning
+ * Copyright (C) 2016  Joey Andres<yeojserdna@gmail.com>
  *
- *  Created on: Jun 11, 2014
- *      Author: jandres
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+
 #ifndef _GRADIENT_DESCENT_H_
 #define _GRADIENT_DESCENT_H_
 
@@ -15,127 +27,26 @@
 
 #include "../../declares.h"
 #include "../../coding/TileCode.h"
+#include "GradientDescentAbstract.h"
 
 using namespace std;
 
 namespace rl {
-
-using namespace coding;
-
+using coding::TileCode;
 namespace algorithm {
 
 /*! \class GradientDescent
- *  \brief Early implementation of Gradient Descent specialized for Tile Coding.
+ *  \brief Gradient Descent implementation.
  */
-class GradientDescent {
+class GradientDescent : public GradientDescentAbstract {
  public:
-  /**
-   * @param tileCode Type of tile coding.
-   * @param stepSize Step size for gradient descent.
-   * @param discountRate discount rate for gradient descent.
-   * @param lambda How influential is current state-action to ther state-action.
-   */
-  GradientDescent(TileCode& tileCode, rl::FLOAT stepSize,
-                  rl::FLOAT discountRate, rl::FLOAT lambda);
-
-  virtual ~GradientDescent();
-
-  /**
-   * @return Size of both weight vector and traces vector.
-   */
-  size_t getSize() const;
-
-  /**
-   * Get the value of the parameters in the real space.
-   * @param parameters
-   * @return corresponding value.
-   */
-  FLOAT getValueFromParameters(const floatVector& parameters) const;
-
-  /**
-   * Get the value of the parameters in the real space.
-   * @param featureVector
-   * @return corresponding value.
-   */
-  FLOAT getValueFromFeatureVector(const FEATURE_VECTOR& fv) const;
-
-  /**
-   * @param parameters parameters.
-   * @param fv feature vector output. Feature vector are samples taken around
-   *           the parameters in the n-dimension tilecde.
-   */
-  FEATURE_VECTOR getFeatureVector(const floatVector& parameters) const;
+  using GradientDescentAbstract::GradientDescentAbstract;
 
-  /**
-   * Increase the eligibility traces of a given feature vector.
-   * Note: This is loads faster than replace elibility traces.
-   * @param fv feature vector.
-   */
-  void incrementEligibilityTraces(const FEATURE_VECTOR& fv);
-
-  /**
-   * Replace the eligibility traces for each feature vector by 1.0F.
-   * @param fv feature vector.
-   */
-  void replaceEligibilityTraces(const FEATURE_VECTOR& fv);
-
-  /**
-   * Decrease each eligibility traces by eligibility traces and discount rate $(\lambda)$
-   */
-  void decreaseEligibilityTraces();
-
-  /**
-   * Make all eligibility trace to 0.0F.
-   */
-  void resetEligibilityTraces();
-
-  /**
-   * @param currentStateVector array of current states.
-   * @param actionVector action taken to get to nextStateVector.
-   * @param nextStateVector array of next states.
-   * @param reward reward for taking nextAction.
-   */
   void updateWeights(const spStateCont& currentStateVector,
                      const spActionCont& currentActionVector,
                      const spStateCont& nextStateVector,
-                     const FLOAT nextActionValue, const FLOAT reward);
-
-  /**
-   * @param actionSet set of actions.
-   * @param param array of current state.
-   * @param actionVectorValueMap state-action to value mapping to be returned.
-   * @param maxAction max action calculated while building action value map.
-   */
-  void buildActionValues(
-      const spActionSet<actionCont>& actionSet, const spStateCont& param,
-      spActionValueMap<actionCont>& actionVectorValueMap,
-      spActionCont& maxAction) const;
-
-  /**
-   * @param actionValueMap state-action to value mapping.
-   * @return value.
-   */
-  FLOAT getMaxValue(
-      const spActionValueMap<actionCont>& actionValueMap) const;
-
-  /**
-   * Update weights with tderror.
-   * @param tdError
-   */
-  virtual void backUpWeights(FLOAT tdError);
-
- protected:
-  TileCode& _tileCode;  //!< Tile Code.
-  rl::FLOAT* _w;  //!< Vector of weights.
-  rl::FLOAT* _e;  //!< Vector of eligibility traces.
-  rl::FLOAT _stepSize;  //!< Step Size of the weight update.
-  rl::FLOAT _discountRate;  //!< Discount rate, mix with _lambda on how past states
-  //!< influence current.
-  rl::FLOAT _lambda;  //!< lambda, mix with _lambda on how past states influence current.
-
-  // Optimization.
-  rl::FLOAT _discountRateTimesLambda;
-  rl::FLOAT* _discountRateTimesLambdaArray;
+                     const FLOAT nextActionValue,
+                     const FLOAT reward) override;
 };
 
 } // namespace algorithm

diff --git a/include/algorithm/gradient-descent/GradientDescentAbstract.h b/include/algorithm/gradient-descent/GradientDescentAbstract.h
@@ -0,0 +1,120 @@
+/**
+ * rl - Reinforcement Learning
+ * Copyright (C) 2016  Joey Andres<yeojserdna@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <set>
+#include <map>
+#include <vector>
+#include <memory>
+#include <cstdlib>
+
+#include "../../declares.h"
+#include "../../coding/TileCode.h"
+
+namespace rl {
+using coding::TileCode;
+namespace algorithm {
+
+class GradientDescentAbstract {
+ public:
+  /**
+   * @param tileCode Type of tile coding.
+   * @param stepSize Step size for gradient descent.
+   * @param discountRate discount rate for gradient descent.
+   * @param lambda How influential is current state-action to ther state-action.
+   */
+  GradientDescentAbstract(TileCode& tileCode,
+                          rl::FLOAT stepSize,
+                          rl::FLOAT discountRate,
+                          rl::FLOAT lambda);
+
+  /**
+   * @return Size of both weight vector and traces vector.
+   */
+  size_t getSize() const;
+
+  /**
+   * Get the value of the parameters in the real space.
+   * @param parameters
+   * @return corresponding value.
+   */
+  FLOAT getValueFromParameters(const floatVector& parameters) const;
+
+  /**
+   * Get the value of the parameters in the real space.
+   * @param featureVector
+   * @return corresponding value.
+   */
+  FLOAT getValueFromFeatureVector(const FEATURE_VECTOR& fv) const;
+
+  /**
+   * @param parameters parameters.
+   * @param fv feature vector output. Feature vector are samples taken around
+   *           the parameters in the n-dimension tilecde.
+   */
+  FEATURE_VECTOR getFeatureVector(const floatVector& parameters) const;
+
+  /**
+   * @param currentStateVector array of current states.
+   * @param actionVector action taken to get to nextStateVector.
+   * @param nextStateVector array of next states.
+   * @param reward reward for taking nextAction.
+   */
+  virtual void updateWeights(const spStateCont& currentStateVector,
+                             const spActionCont& currentActionVector,
+                             const spStateCont& nextStateVector,
+                             const FLOAT nextActionValue,
+                             const FLOAT reward) = 0;
+
+  /**
+   * @param actionSet set of actions.
+   * @param param array of current state.
+   * @param actionVectorValueMap state-action to value mapping to be returned.
+   * @param maxAction max action calculated while building action value map.
+   */
+  void buildActionValues(
+    const spActionSet<actionCont>& actionSet,
+    const spStateCont& param,
+    spActionValueMap<actionCont>& actionVectorValueMap,
+    spActionCont& maxAction) const;
+
+  /**
+   * @param actionValueMap state-action to value mapping.
+   * @return value.
+   */
+  FLOAT getMaxValue(
+    const spActionValueMap<actionCont>& actionValueMap) const;
+
+ protected:
+  TileCode& _tileCode;  //!< Tile Code.
+  std::vector<rl::FLOAT> _w;  //!< Vector of weights.
+  rl::FLOAT _stepSize;  //!< Step Size of the weight update.
+  rl::FLOAT _discountRate;  //!< Discount rate, mix with _lambda on how past states
+  //!< influence current.
+  rl::FLOAT _lambda;  //!< lambda, mix with _lambda on how past states influence current.
+
+  // Optimization.
+  rl::FLOAT _discountRateTimesLambda;
+  rl::FLOAT* _discountRateTimesLambdaArray;
+};
+
+using spGradientDescentAbstract = std::shared_ptr<GradientDescentAbstract>;
+
+} // namespace algorithm
+} // namespace rl
diff --git a/include/algorithm/gradient-descent/GradientDescentET.h b/include/algorithm/gradient-descent/GradientDescentET.h
@@ -0,0 +1,91 @@
+/**
+ * rl - Reinforcement Learning
+ * Copyright (C) 2016  Joey Andres<yeojserdna@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "../../declares.h"
+#include "../../coding/TileCode.h"
+#include "GradientDescent.h"
+
+namespace rl {
+using coding::TileCode;
+namespace algorithm {
+
+/*! \class GradientDescentET
+ *  \brief Gradient Descent eligibility traces.
+ */
+class GradientDescentET : public GradientDescentAbstract {
+ public:
+  /**
+   * @param tileCode Type of tile coding.
+   * @param stepSize Step size for gradient descent.
+   * @param discountRate discount rate for gradient descent.
+   * @param lambda How influential is current state-action to ther state-action.
+   */
+  GradientDescentET(TileCode& tileCode,
+                    rl::FLOAT stepSize,
+                    rl::FLOAT discountRate,
+                    rl::FLOAT lambda);
+
+  /**
+   * Increase the eligibility traces of a given feature vector.
+   * Note: This is loads faster than replace elibility traces.
+   * @param fv feature vector.
+   */
+  void incrementEligibilityTraces(const FEATURE_VECTOR& fv);
+
+  /**
+   * Replace the eligibility traces for each feature vector by 1.0F.
+   * @param fv feature vector.
+   */
+  void replaceEligibilityTraces(const FEATURE_VECTOR& fv);
+
+  /**
+   * Decrease each eligibility traces by eligibility traces and discount rate $(\lambda)$
+   */
+  void decreaseEligibilityTraces();
+
+  /**
+   * Make all eligibility trace to 0.0F.
+   */
+  void resetEligibilityTraces();
+
+  /**
+   * @param currentStateVector array of current states.
+   * @param actionVector action taken to get to nextStateVector.
+   * @param nextStateVector array of next states.
+   * @param reward reward for taking nextAction.
+   */
+  void updateWeights(const spStateCont& currentStateVector,
+                     const spActionCont& currentActionVector,
+                     const spStateCont& nextStateVector,
+                     const FLOAT nextActionValue,
+                     const FLOAT reward) override;
+
+  /**
+   * Update weights with tderror.
+   * @param tdError
+   */
+  virtual void backUpWeights(FLOAT tdError);
+
+ protected:
+  std::vector<rl::FLOAT> _e;  //!< Vector of eligibility traces.
+};
+
+} // namespace algorithm
+} // namespace rl
Original file line number	Diff line number	Diff line change
Expand Up		@@ -17,3 +17,4 @@ windows/

		# Node modules.
		/node_modules/
		/cmake-build-debug/