Skip to content

Commit

Permalink
RL-7: Integrated cassandra via TileCodeContainer.
Browse files Browse the repository at this point in the history
- TileCodeContainer is a container data structure for holding
  weight vector in db (cassandra).
- Note: Heavy refactoring is still to be done so it becomes
  an stl like container and also clean up the interface a bit.
  • Loading branch information
JoeyAndres committed Jan 2, 2017
1 parent 7168c0a commit 114b40a
Show file tree
Hide file tree
Showing 54 changed files with 2,190 additions and 254 deletions.
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ set(intrinsic "none" CACHE STRING "The possible intrinsics are the following: no

# Default c++ flags.
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -Wall --pedantic ${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -O0 -Wall --pedantic ${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 ${CMAKE_CXX_FLAGS_RELEASE}")

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_definitions("-DDEBUG")
endif()

if (ENABLE_DB STREQUAL "true")
MESSAGE(STATUS "DB Enabled: ${ENABLE_DB}")
add_definitions("-DENABLE_DB")
set(ENABLE_DB "true")
endif()

add_subdirectory(src)
add_subdirectory(lib)
add_subdirectory(test)
Expand Down
30 changes: 17 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,36 @@ rl

[![Build Status](http://ci.joeyandres.com/job/rl-unit-test-master/badge/icon)](http://ci.joeyandres.com/job/rl-unit-test-master/)

Modularized various Reinforcement Learning Algorithm library.
See test/include and test/src for examples.

Note: This is currently only built for Linux systems.
Some threading libraries are linux specific (even the latest c++ standard which is supposed to be platform independent).
Modularized various Reinforcement Learning Algorithm library.

# Compilation and Installation

### Caveat
**rl** have some minor _double precision floating point_ issues in older compilers and os.
The mountain car problem in test won't converge to a an optimal solution in osx and
old g++ compiler (e.g. g++ 4.2.1). For optimal performance, use linux and new g++ compiler.

### Dependency:
#### Required
* g++-4.9 or greater or clang.
* cmake 3.2.2 or greater.
* boost v1.62 (might work for version < 1.62)
* boost v1.59 or greater.
### Optional: To enable cassandradb
* cassandra v3.9 or greater.
* [datastax-cpp-driver](https://github.com/datastax/cpp-driver) v2.5 or greater.

### Installing dependencies Ubuntu 16.04:
`sudo apt install g++ cmake libboost-all-dev`

### Building
// TODO: Installing dependencies from http://downloads.datastax.com/cpp-driver/ubuntu/16.04/
// TODO: Make a script to do this?

### Building (no cassandradb)
1. `mkdir build`
2. `cd build`
3. `cmake .. -DCMAKE_BUILD_TYPE=Release`
4. `make -j16`
5. `sudo make install`

### Building (with cassandradb)
1. `mkdir build`
2. `cd build`
3. `cmake ..`
3. `cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_DB=true`
4. `make -j16`
5. `sudo make install`

Expand Down
17 changes: 12 additions & 5 deletions include/algorithm/gradient-descent/GradientDescentTileCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,21 @@ namespace algorithm {
* \brief Gradient Descent implementation.
* \tparam D Number of dimension.
* \tparam NUM_TILINGS Number of tilings.
* \tparam WEIGHT_CONT The container object to store the weights.
* \tparam STATE_DIM Number of dimension in State.
* This also implies ACTION_DIM = D - STATE_DIM.
*/
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <
size_t D,
size_t NUM_TILINGS,
class WEIGHT_CONT,
size_t STATE_DIM>
class GradientDescentTileCode :
public GradientDescentTileCodeAbstract<D, NUM_TILINGS, STATE_DIM> {
public GradientDescentTileCodeAbstract<
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM> {
public:
using GradientDescentTileCodeAbstract<
D, NUM_TILINGS, STATE_DIM>::GradientDescentTileCodeAbstract;
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::GradientDescentTileCodeAbstract;

void updateWeights(
const typename GradientDescentAbstract<
Expand All @@ -63,8 +69,9 @@ class GradientDescentTileCode :
const FLOAT reward) override;
};

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
void GradientDescentTileCode<D, NUM_TILINGS, STATE_DIM>::updateWeights(
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void GradientDescentTileCode<
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::updateWeights(
const typename GradientDescentAbstract<
D,
STATE_DIM>::spStateParam& currentStateVector,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ namespace algorithm {
*
* \tparam D Number of dimension.
* \tparam NUM_TILINGS Number of tilings.
* \tparam WEIGHT_CONT The container object to store the weights.
* \tparam STATE_DIM Number of dimension in State.
* This also implies ACTION_DIM = D - STATE_DIM.
*/
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <
size_t D,
size_t NUM_TILINGS,
class WEIGHT_CONT,
size_t STATE_DIM>
class GradientDescentTileCodeAbstract :
public GradientDescentAbstract<D, STATE_DIM> {
public:
Expand All @@ -45,10 +50,11 @@ class GradientDescentTileCodeAbstract :
* @param discountRate discount rate for gradient descent.
* @param lambda How influential is current state-action to ther state-action.
*/
GradientDescentTileCodeAbstract(const spTileCode<D, NUM_TILINGS>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda);
GradientDescentTileCodeAbstract(
const spTileCode<D, NUM_TILINGS, WEIGHT_CONT>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda);

/**
* Get the value of the parameters in the real space.
Expand All @@ -69,13 +75,13 @@ class GradientDescentTileCodeAbstract :
* \brief Refers to the same object as _courseCode but this one is downcast'd
* to spTileCode allowing access to tile code specific methods.
*/
spTileCode<D, NUM_TILINGS> _tileCode;
spTileCode<D, NUM_TILINGS, WEIGHT_CONT> _tileCode;
};

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
GradientDescentTileCodeAbstract<
D, NUM_TILINGS, STATE_DIM>::GradientDescentTileCodeAbstract(
const spTileCode <D, NUM_TILINGS> &tileCode,
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::GradientDescentTileCodeAbstract(
const spTileCode <D, NUM_TILINGS, WEIGHT_CONT> &tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda) :
Expand All @@ -88,18 +94,18 @@ GradientDescentTileCodeAbstract<
_tileCode = tileCode;
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
FLOAT
GradientDescentTileCodeAbstract<
D, NUM_TILINGS, STATE_DIM>::getValueFromFeatureVector(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::getValueFromFeatureVector(
const FEATURE_VECTOR& fv) const {
return _tileCode->getValueFromFeatureVector(fv);
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
FEATURE_VECTOR
GradientDescentTileCodeAbstract<
D, NUM_TILINGS, STATE_DIM>::getFeatureVector(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::getFeatureVector(
const floatArray<D>& parameters) const {
return _tileCode->getFeatureVector(parameters);
}
Expand Down
45 changes: 26 additions & 19 deletions include/algorithm/gradient-descent/GradientDescentTileCodeET.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,18 @@ namespace algorithm {
/*! \class GradientDescentTileCodeET
* \brief Gradient Descent eligibility traces.
* \tparam D Number of dimension.
* \tparam WEIGHT_CONT The container object to store the weights.
* \tparam STATE_DIM Number of dimension in State.
* This also implies ACTION_DIM = D - STATE_DIM.
*/
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <
size_t D,
size_t NUM_TILINGS,
class WEIGHT_CONT,
size_t STATE_DIM>
class GradientDescentTileCodeET :
public GradientDescentTileCodeAbstract<D, NUM_TILINGS, STATE_DIM> {
public GradientDescentTileCodeAbstract<
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM> {
public:
/**
* @param tileCode Type of tile coding.
Expand All @@ -52,7 +58,7 @@ class GradientDescentTileCodeET :
* @param lambda How influential is current state-action to their state-action.
*/
GradientDescentTileCodeET(
const spTileCode<D, NUM_TILINGS>& tileCode,
const spTileCode<D, NUM_TILINGS, WEIGHT_CONT>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda);
Expand Down Expand Up @@ -109,60 +115,61 @@ class GradientDescentTileCodeET :
std::vector<rl::FLOAT> _e; //!< Vector of eligibility traces.
};

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
GradientDescentTileCodeET<D, NUM_TILINGS, STATE_DIM>::GradientDescentTileCodeET(
const spTileCode<D, NUM_TILINGS>& tileCode,
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
GradientDescentTileCodeET<
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::GradientDescentTileCodeET(
const spTileCode<D, NUM_TILINGS, WEIGHT_CONT>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda) :
GradientDescentTileCodeAbstract<
D, NUM_TILINGS, STATE_DIM>::GradientDescentTileCodeAbstract(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::GradientDescentTileCodeAbstract(
tileCode, stepSize, discountRate, lambda) {
_e = floatVector(this->_courseCode->getSize(), 0);
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::incrementEligibilityTraces(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::incrementEligibilityTraces(
const FEATURE_VECTOR& fv) {
for (rl::INT f : fv) {
++(this->_e)[f];
}
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::replaceEligibilityTraces(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::replaceEligibilityTraces(
const FEATURE_VECTOR& fv) {
for (rl::INT f : fv) {
this->_e[f] = 1;
}
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void
GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::decreaseEligibilityTraces() {
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::decreaseEligibilityTraces() {
size_t n = this->getSize();
for (size_t i = 0; i < n; i++) {
this->_e[i] *= this->_discountRateTimesLambda;
}
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void
GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::backUpWeights(FLOAT tdError) {
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::backUpWeights(FLOAT tdError) {
rl::FLOAT multiplier = (this->_stepSize / NUM_TILINGS) * tdError;
size_t n = this->getSize();
for (size_t i = 0; i < n-1; i++) {
this->_tileCode->at(i) += multiplier*_e[i];
}
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::updateWeights(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::updateWeights(
const typename GradientDescentAbstract<
D,
STATE_DIM>::spStateParam& currentStateVector,
Expand All @@ -188,9 +195,9 @@ void GradientDescentTileCodeET<
decreaseEligibilityTraces();
}

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
void GradientDescentTileCodeET<
D, NUM_TILINGS, STATE_DIM>::resetEligibilityTraces() {
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::resetEligibilityTraces() {
std::fill(&this->_e[0], &this->_e[0] + this->getSize(), 0);
}

Expand Down
19 changes: 12 additions & 7 deletions include/algorithm/gradient-descent/QLearningETGD.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,31 +30,36 @@ namespace algorithm {
* and learning policy).
* \tparam D Number of dimensions.
* \tparam NUM_TILINGS Number of tilings.
* \tparam WEIGHT_CONT The container object to store the weights.
* \tparam STATE_DIM Number of dimension in State. This defaults to D-1.
* This also implies ACTION_DIM = D - STATE_DIM.
*/
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM = D-1>
template <
size_t D,
size_t NUM_TILINGS,
class WEIGHT_CONT = coding::DEFAULT_TILE_CONT,
size_t STATE_DIM = D-1>
class QLearningETGD :
public ReinforcementLearningGDET<D, NUM_TILINGS, STATE_DIM> {
public ReinforcementLearningGDET<D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM> {
public:
QLearningETGD(const spTileCode<D, NUM_TILINGS>& tileCode,
QLearningETGD(const spTileCode<D, NUM_TILINGS, WEIGHT_CONT>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda,
const typename ReinforcementLearningGDAbstract<
D, STATE_DIM>::spPolicy& policy);
};

template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM>
QLearningETGD<D, NUM_TILINGS, STATE_DIM>::QLearningETGD(
const spTileCode<D, NUM_TILINGS>& tileCode,
template <size_t D, size_t NUM_TILINGS, class WEIGHT_CONT, size_t STATE_DIM>
QLearningETGD<D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::QLearningETGD(
const spTileCode<D, NUM_TILINGS, WEIGHT_CONT>& tileCode,
rl::FLOAT stepSize,
rl::FLOAT discountRate,
rl::FLOAT lambda,
const typename ReinforcementLearningGDAbstract<
D, STATE_DIM>::spPolicy& controlPolicy) :
ReinforcementLearningGDET<
D, NUM_TILINGS, STATE_DIM>::ReinforcementLearningGDET(
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM>::ReinforcementLearningGDET(
tileCode, stepSize, discountRate, lambda, controlPolicy) {
}

Expand Down
15 changes: 12 additions & 3 deletions include/algorithm/gradient-descent/QLearningETGDFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,25 @@ namespace algorithm {
* \brief Factory method for QLearningETGD.
* \tparam D Number of dimension.
* \tparam NUM_TILINGS Number of tilings.
* \tparam WEIGHT_CONT The container object to store the weights.
* \tparam STATE_DIM Number of dimension in state.
* Implies that action is D - STATE_DIM.
*/
template <size_t D, size_t NUM_TILINGS, size_t STATE_DIM = D-1>
template <
size_t D,
size_t NUM_TILINGS,
class WEIGHT_CONT = coding::DEFAULT_TILE_CONT,
size_t STATE_DIM = D-1>
class QLearningETGDFactory :
public ReinforcementLearningGDFactory<
D, NUM_TILINGS, STATE_DIM, QLearningETGD> {
D, NUM_TILINGS, WEIGHT_CONT, STATE_DIM, QLearningETGD> {
public:
using ReinforcementLearningGDFactory<
D, NUM_TILINGS, STATE_DIM, QLearningETGD>::ReinforcementLearningGDFactory;
D,
NUM_TILINGS,
WEIGHT_CONT,
STATE_DIM,
QLearningETGD>::ReinforcementLearningGDFactory;
};

} // namespace algorithm
Expand Down
Loading

0 comments on commit 114b40a

Please sign in to comment.