Skip to content

Commit

Permalink
RL-19: Added SarsaGD and QLearningGD
Browse files Browse the repository at this point in the history
Added non-eligibility-traces version of SarsaGDET and QLearningGDET.
  • Loading branch information
Joey Andres committed Nov 29, 2016
1 parent 156113b commit 593342b
Show file tree
Hide file tree
Showing 30 changed files with 1,157 additions and 497 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ windows/

# Node modules.
/node_modules/
/cmake-build-debug/
2 changes: 1 addition & 1 deletion CMakeFiles/license.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* rl - Reinforcment Learning
* rl - Reinforcement Learning
* Copyright (C) 2016 Joey Andres<yeojserdna@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(CMakeFiles/CMakeGenerateMainHeader)

set(intrinsic "none" CACHE STRING "The possible intrinsics are the following: none, mmx, sse, sse2, sse3, sse4, sse4.1, sse4.2, sse4a, avx.")

set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS} -g")
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS} -O3 -g")

add_subdirectory(src)
add_subdirectory(lib)
Expand Down
135 changes: 23 additions & 112 deletions include/algorithm/gradient-descent/GradientDescent.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
/*
* GradientDescent.h
/**
* rl - Reinforcement Learning
* Copyright (C) 2016 Joey Andres<yeojserdna@gmail.com>
*
* Created on: Jun 11, 2014
* Author: jandres
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef _GRADIENT_DESCENT_H_
#define _GRADIENT_DESCENT_H_

Expand All @@ -15,127 +27,26 @@

#include "../../declares.h"
#include "../../coding/TileCode.h"
#include "GradientDescentAbstract.h"

using namespace std;

namespace rl {

using namespace coding;

using coding::TileCode;
namespace algorithm {

/*! \class GradientDescent
* \brief Early implementation of Gradient Descent specialized for Tile Coding.
* \brief Gradient Descent implementation.
*/
class GradientDescent {
class GradientDescent : public GradientDescentAbstract {
public:
/**
* @param tileCode Type of tile coding.
* @param stepSize Step size for gradient descent.
* @param discountRate discount rate for gradient descent.
* @param lambda How influential is current state-action to ther state-action.
*/
GradientDescent(TileCode& tileCode, rl::FLOAT stepSize,
rl::FLOAT discountRate, rl::FLOAT lambda);

virtual ~GradientDescent();

/**
* @return Size of both weight vector and traces vector.
*/
size_t getSize() const;

/**
* Get the value of the parameters in the real space.
* @param parameters
* @return corresponding value.
*/
FLOAT getValueFromParameters(const floatVector& parameters) const;

/**
* Get the value of the parameters in the real space.
* @param featureVector
* @return corresponding value.
*/
FLOAT getValueFromFeatureVector(const FEATURE_VECTOR& fv) const;

/**
* @param parameters parameters.
* @param fv feature vector output. Feature vector are samples taken around
* the parameters in the n-dimension tilecde.
*/
FEATURE_VECTOR getFeatureVector(const floatVector& parameters) const;
using GradientDescentAbstract::GradientDescentAbstract;

/**
* Increase the eligibility traces of a given feature vector.
* Note: This is loads faster than replace elibility traces.
* @param fv feature vector.
*/
void incrementEligibilityTraces(const FEATURE_VECTOR& fv);

/**
* Replace the eligibility traces for each feature vector by 1.0F.
* @param fv feature vector.
*/
void replaceEligibilityTraces(const FEATURE_VECTOR& fv);

/**
* Decrease each eligibility traces by eligibility traces and discount rate $(\lambda)$
*/
void decreaseEligibilityTraces();

/**
* Make all eligibility trace to 0.0F.
*/
void resetEligibilityTraces();

/**
* @param currentStateVector array of current states.
* @param actionVector action taken to get to nextStateVector.
* @param nextStateVector array of next states.
* @param reward reward for taking nextAction.
*/
void updateWeights(const spStateCont& currentStateVector,
const spActionCont& currentActionVector,
const spStateCont& nextStateVector,
const FLOAT nextActionValue, const FLOAT reward);

/**
* @param actionSet set of actions.
* @param param array of current state.
* @param actionVectorValueMap state-action to value mapping to be returned.
* @param maxAction max action calculated while building action value map.
*/
void buildActionValues(
const spActionSet<actionCont>& actionSet, const spStateCont& param,
spActionValueMap<actionCont>& actionVectorValueMap,
spActionCont& maxAction) const;

/**
* @param actionValueMap state-action to value mapping.
* @return value.
*/
FLOAT getMaxValue(
const spActionValueMap<actionCont>& actionValueMap) const;

/**
* Update weights with tderror.
* @param tdError
*/
virtual void backUpWeights(FLOAT tdError);

protected:
TileCode& _tileCode; //!< Tile Code.
rl::FLOAT* _w; //!< Vector of weights.
rl::FLOAT* _e; //!< Vector of eligibility traces.
rl::FLOAT _stepSize; //!< Step Size of the weight update.
rl::FLOAT _discountRate; //!< Discount rate, mix with _lambda on how past states
//!< influence current.
rl::FLOAT _lambda; //!< lambda, mix with _lambda on how past states influence current.

// Optimization.
rl::FLOAT _discountRateTimesLambda;
rl::FLOAT* _discountRateTimesLambdaArray;
const FLOAT nextActionValue,
const FLOAT reward) override;
};

} // namespace algorithm
Expand Down
120 changes: 120 additions & 0 deletions include/algorithm/gradient-descent/GradientDescentAbstract.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/**
* rl - Reinforcement Learning
* Copyright (C) 2016 Joey Andres<yeojserdna@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include <set>
#include <map>
#include <vector>
#include <memory>
#include <cstdlib>

#include "../../declares.h"
#include "../../coding/TileCode.h"

namespace rl {
using coding::TileCode;
namespace algorithm {

class GradientDescentAbstract {
public:
/**
* @param tileCode Type of tile coding.
* @param stepSize Step size for gradient descent.
* @param discountRate discount rate for gradient descent.
* @param lambda How influential is current state-action to ther state-action.
*/
GradientDescentAbstract(TileCode& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda);

/**
* @return Size of both weight vector and traces vector.
*/
size_t getSize() const;

/**
* Get the value of the parameters in the real space.
* @param parameters
* @return corresponding value.
*/
FLOAT getValueFromParameters(const floatVector& parameters) const;

/**
* Get the value of the parameters in the real space.
* @param featureVector
* @return corresponding value.
*/
FLOAT getValueFromFeatureVector(const FEATURE_VECTOR& fv) const;

/**
* @param parameters parameters.
* @param fv feature vector output. Feature vector are samples taken around
* the parameters in the n-dimension tilecde.
*/
FEATURE_VECTOR getFeatureVector(const floatVector& parameters) const;

/**
* @param currentStateVector array of current states.
* @param actionVector action taken to get to nextStateVector.
* @param nextStateVector array of next states.
* @param reward reward for taking nextAction.
*/
virtual void updateWeights(const spStateCont& currentStateVector,
const spActionCont& currentActionVector,
const spStateCont& nextStateVector,
const FLOAT nextActionValue,
const FLOAT reward) = 0;

/**
* @param actionSet set of actions.
* @param param array of current state.
* @param actionVectorValueMap state-action to value mapping to be returned.
* @param maxAction max action calculated while building action value map.
*/
void buildActionValues(
const spActionSet<actionCont>& actionSet,
const spStateCont& param,
spActionValueMap<actionCont>& actionVectorValueMap,
spActionCont& maxAction) const;

/**
* @param actionValueMap state-action to value mapping.
* @return value.
*/
FLOAT getMaxValue(
const spActionValueMap<actionCont>& actionValueMap) const;

protected:
TileCode& _tileCode; //!< Tile Code.
std::vector<rl::FLOAT> _w; //!< Vector of weights.
rl::FLOAT _stepSize; //!< Step Size of the weight update.
rl::FLOAT _discountRate; //!< Discount rate, mix with _lambda on how past states
//!< influence current.
rl::FLOAT _lambda; //!< lambda, mix with _lambda on how past states influence current.

// Optimization.
rl::FLOAT _discountRateTimesLambda;
rl::FLOAT* _discountRateTimesLambdaArray;
};

using spGradientDescentAbstract = std::shared_ptr<GradientDescentAbstract>;

} // namespace algorithm
} // namespace rl
91 changes: 91 additions & 0 deletions include/algorithm/gradient-descent/GradientDescentET.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* rl - Reinforcement Learning
* Copyright (C) 2016 Joey Andres<yeojserdna@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include "../../declares.h"
#include "../../coding/TileCode.h"
#include "GradientDescent.h"

namespace rl {
using coding::TileCode;
namespace algorithm {

/*! \class GradientDescentET
* \brief Gradient Descent eligibility traces.
*/
class GradientDescentET : public GradientDescentAbstract {
public:
/**
* @param tileCode Type of tile coding.
* @param stepSize Step size for gradient descent.
* @param discountRate discount rate for gradient descent.
* @param lambda How influential is current state-action to ther state-action.
*/
GradientDescentET(TileCode& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda);

/**
* Increase the eligibility traces of a given feature vector.
* Note: This is loads faster than replace elibility traces.
* @param fv feature vector.
*/
void incrementEligibilityTraces(const FEATURE_VECTOR& fv);

/**
* Replace the eligibility traces for each feature vector by 1.0F.
* @param fv feature vector.
*/
void replaceEligibilityTraces(const FEATURE_VECTOR& fv);

/**
* Decrease each eligibility traces by eligibility traces and discount rate $(\lambda)$
*/
void decreaseEligibilityTraces();

/**
* Make all eligibility trace to 0.0F.
*/
void resetEligibilityTraces();

/**
* @param currentStateVector array of current states.
* @param actionVector action taken to get to nextStateVector.
* @param nextStateVector array of next states.
* @param reward reward for taking nextAction.
*/
void updateWeights(const spStateCont& currentStateVector,
const spActionCont& currentActionVector,
const spStateCont& nextStateVector,
const FLOAT nextActionValue,
const FLOAT reward) override;

/**
* Update weights with tderror.
* @param tdError
*/
virtual void backUpWeights(FLOAT tdError);

protected:
std::vector<rl::FLOAT> _e; //!< Vector of eligibility traces.
};

} // namespace algorithm
} // namespace rl
Loading

0 comments on commit 593342b

Please sign in to comment.