Skip to content

Commit

Permalink
RL-12: Templetized TileCode
Browse files Browse the repository at this point in the history
Massive refactoring. This should eliminate the possibility
of a bug due to wrong dimension.
  • Loading branch information
Joey Andres committed Dec 9, 2016
1 parent 18649fa commit a76fef2
Show file tree
Hide file tree
Showing 75 changed files with 1,366 additions and 1,609 deletions.
2 changes: 0 additions & 2 deletions include/agent/Actuator.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ class Actuator : public ActionContainer<A> {
explicit Actuator(const spActionSet<A>& actionSet);
};

typedef Actuator<actionCont> ActuatorSL;

/*! \typedef spActuator
*
* Wraps Actuator with shared_ptr. @see Actuator
Expand Down
27 changes: 16 additions & 11 deletions include/agent/Agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ class AgentSupervised {
spLearningAlgorithm<S, A> _learningAlgorithm;
};

/*! \class AgentSupervisedGD
* \brief AgentSupervised for Gradient Descent.
* \tparam D Number of dimension.
* \tparam STATE_DIM Number of state dimension.
*/
template<size_t D, size_t STATE_DIM = D-1>
using AgentSupervisedGD =
AgentSupervised<floatArray<STATE_DIM>, floatArray<D - STATE_DIM>>;

/*! \class Agent
* \brief A class that represent an rl agent.
*
Expand Down Expand Up @@ -178,17 +187,13 @@ class Agent {
//!< postExecute.
};

/*! \typedef AgentSL
* \brief Agent for Supervised Learning.
* \tparam D data type of Supervised Learning agent.
*
* Supervised Learning usually deals with multi-dimension states and action,
* hence the specific typedef of Agent.
*
* TODO: Made by young me, probably under a lot of stress so above statement doesn't makes sense. Remove this crap.
/*! \class AgentGD
* \brief Agent for Gradient Descent.
* \tparam D Number of dimension.
* \tparam STATE_DIM Number of state dimension.
*/
template<class D = FLOAT>
using AgentSL = Agent<vector<D>, vector<D>>;
template<size_t D, size_t STATE_DIM = D-1>
using AgentGD = Agent<floatArray<STATE_DIM>, floatArray<D - STATE_DIM>>;

template<class S, class A>
Agent<S, A>::Agent(const spEnvironment<S, A>& environment,
Expand Down Expand Up @@ -230,7 +235,7 @@ void Agent<S, A>::preExecute() {

template<class S, class A>
void Agent<S, A>::execute() {
// todo: Acquire last state and reward here.
// todo(jandres): Acquire last state and reward here.
this->applyAction(_currentAction);
spState<S> nextState = std::move(getLastObservedState());
FLOAT reward = this->_environment->getSensor()->getLastObservedReward();
Expand Down
76 changes: 68 additions & 8 deletions include/algorithm/gradient-descent/GradientDescent.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,87 @@

#pragma once

#include <algorithm>
#include <vector>
#include <array>

#include "../../declares.h"
#include "../../coding/TileCode.h"
#include "GradientDescentAbstract.h"

using std::vector;
using std::array;

using rl::coding::spTileCode;

namespace rl {
using coding::spTileCode;
namespace algorithm {

/*! \class GradientDescent
* \brief Gradient Descent implementation.
* \tparam D Number of dimension.
* \tparam NUM_TILINGS Number of tilings.
* \tparam STATE_DIM Number of dimension in State.
* This also implies ACTION_DIM = D - STATE_DIM.
*/
class GradientDescent : public GradientDescentAbstract {
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
class GradientDescent :
public GradientDescentAbstract<D, NUM_TILINGS, STATE_DIM> {
public:
using GradientDescentAbstract::GradientDescentAbstract;
using GradientDescentAbstract<
D, NUM_TILINGS, STATE_DIM>::GradientDescentAbstract;

void updateWeights(const spStateCont& currentStateVector,
const spActionCont& currentActionVector,
const spStateCont& nextStateVector,
const FLOAT nextActionValue,
const FLOAT reward) override;
void updateWeights(
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spStateParam& currentStateVector,
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spActionParam& currentActionVector,
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spStateParam& nextStateVector,
const FLOAT nextActionValue,
const FLOAT reward) override;
};

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
void GradientDescent<D, NUM_TILINGS, STATE_DIM>::updateWeights(
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spStateParam& currentStateVector,
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spActionParam& currentActionVector,
const typename GradientDescentAbstract<
D,
NUM_TILINGS,
STATE_DIM>::spStateParam& nextStateVector,
const FLOAT nextActionValue,
const FLOAT reward) {
floatArray<D> currentParams;
std::copy(currentStateVector->begin(),
currentStateVector->end(),
currentParams.begin());
std::copy(currentActionVector->begin(),
currentActionVector->end(),
currentParams.begin() + currentStateVector->size());

FEATURE_VECTOR currentStateFv =
std::move(this->getFeatureVector(currentParams));

FLOAT tdError = reward + this->_discountRate * nextActionValue
- this->getValueFromFeatureVector(currentStateFv);

for (auto f : currentStateFv) {
this->_w[f] += tdError * this->_stepSize;
}
}

} // namespace algorithm
} // namespace rl
Loading

0 comments on commit a76fef2

Please sign in to comment.