Skip to content

Commit

Permalink
RL-19: std:shared_ptr is now utilized for less memory leaks.
Browse files Browse the repository at this point in the history
  • Loading branch information
Joey Andres committed Nov 20, 2016
1 parent 6c890b6 commit 156113b
Show file tree
Hide file tree
Showing 73 changed files with 740 additions and 546 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ MESSAGE(STATUS "Generated global header file")
set(INSTALL_DIRECTORY /usr/local/include)
install(FILES ${PROJECT_BINARY_DIR}/rl
DESTINATION ${INSTALL_DIRECTORY})
install(DIRECTORY ${PROJECT_BINARY_DIR}/${SUB_HEADERS_PREFIX}
install(DIRECTORY ${PROJECT_BINARY_DIR}/${SUB_HEADERS_PREFIX}/
DESTINATION ${INSTALL_DIRECTORY}/${SUB_HEADERS_PREFIX})
21 changes: 11 additions & 10 deletions include/agent/ActionSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <set>

#include "../declares.h"

using namespace std;

namespace rl {
Expand All @@ -27,46 +29,45 @@ class ActionSet {
/**
* @param actionSet Initial set of action.
*/
ActionSet(const set<A> &actionSet);
ActionSet(const spActionSet<A> &actionSet);

/**
* @return set of actions.
*/
const set<A> &getActionSet() const;
const spActionSet<A> &getActionSet() const;

/**
* @param data A to be added.
*/
void addAction(const A &data);
void addAction(const rl::spAction<A> &data);

/**
* @param dataSet replace the action set with a new one.
*/
void setActionSet(set<A> dataSet);
void setActionSet(const spActionSet<A>& dataSet);

protected:
set<A> _actionData;

spActionSet<A> _actionData;
};

template<class A>
ActionSet<A>::ActionSet() {}

template<class A>
ActionSet<A>::ActionSet(const set<A> &actionSet) : _actionData(actionSet) {}
ActionSet<A>::ActionSet(const spActionSet<A> &actionSet) : _actionData(actionSet) {}

template<class A>
void ActionSet<A>::addAction(const A &data) {
void ActionSet<A>::addAction(const spAction<A> &data) {
_actionData.insert(data);
}

template<class A>
const set<A> &ActionSet<A>::getActionSet() const {
const spActionSet<A> &ActionSet<A>::getActionSet() const {
return _actionData;
}

template<class A>
void ActionSet<A>::setActionSet(set<A> dataSet) {
void ActionSet<A>::setActionSet(const spActionSet<A>& dataSet) {
_actionData = dataSet;
}

Expand Down
8 changes: 4 additions & 4 deletions include/agent/Actuator.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ class Actuator : public ActionSet<A> {
* Constructor for when actions (or some actions) are known.
* @param actionSet Set of actions.
*/
Actuator(const set<A>& actionSet);
Actuator(const spActionSet<A>& actionSet);
};

typedef Actuator<ACTION_CONT> ActuatorSL;
typedef Actuator<actionCont> ActuatorSL;

template<class A>
rl::agent::Actuator<A>::Actuator() {
Actuator<A>::Actuator() {
}

template<class A>
rl::agent::Actuator<A>::Actuator(const set<A>& actionSet) : ActionSet<A>(actionSet) {
Actuator<A>::Actuator(const spActionSet<A>& actionSet) : ActionSet<A>(actionSet) {
}

} // namespace agent
Expand Down
46 changes: 28 additions & 18 deletions include/agent/Agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ class AgentSupervised {
* @param reward
* @param nextState
*/
virtual void train(const S& state, const A& action, FLOAT reward, const S& nextState) {
virtual void train(const spState<S>& state,
const spAction<A>& action,
FLOAT reward,
const spState<S>& nextState) {
this->_learningAlgorithm.update(StateAction<S, A>(state, action),
nextState,
reward,
Expand Down Expand Up @@ -92,7 +95,10 @@ class Agent {
* @param reward
* @param nextState
*/
virtual void train(const S& state, const A& action, FLOAT reward, const S& nextState);
virtual void train(const spState<S>& state,
const spAction<A>& action,
FLOAT reward,
const spState<S>& nextState);

/**
* Prepare agent prior to start execution.
Expand Down Expand Up @@ -131,12 +137,12 @@ class Agent {
/**
* @param action Agent applies action to the environment.
*/
virtual void applyAction(const A& action);
virtual void applyAction(const spAction<A>& action);

/**
* @return CurrentState of the agent.
*/
virtual S getLastObservedState() const;
virtual spState<S> getLastObservedState() const;

/**
* Calls the Environment<S, A>::reset
Expand All @@ -151,8 +157,8 @@ class Agent {

Environment<S, A>& _environment; // !< Aggregate environment obj.

S _currentState; //!< Keeps track of the current state.
A _currentAction; //!< Keeps track of the current action.
spState<S> _currentState; //!< Keeps track of the current state.
spAction<A> _currentAction; //!< Keeps track of the current action.

FLOAT _accumulativeReward; //!< Keeps track of accumulation of reward during
//!< the span of the episode.Specifically, after
Expand All @@ -173,7 +179,7 @@ template<class D = FLOAT>
using AgentSL = Agent<vector<D>, vector<D>>;

template<class S, class A>
rl::agent::Agent<S, A>::Agent(Environment<S, A>& environment,
Agent<S, A>::Agent(Environment<S, A>& environment,
algorithm::LearningAlgorithm<S, A>& learningAlgorithm)
: _environment(environment),
_learningAlgorithm(learningAlgorithm),
Expand All @@ -184,12 +190,16 @@ rl::agent::Agent<S, A>::Agent(Environment<S, A>& environment,
}

template<class S, class A>
S rl::agent::Agent<S, A>::getLastObservedState() const {
spState<S> Agent<S, A>::getLastObservedState() const {
return _environment.getSensor().getLastObservedState();
}

template<class S, class A>
void rl::agent::Agent<S, A>::train(const S& state, const A& action, FLOAT reward, const S& nextState) {
void Agent<S, A>::train(
const spState<S>& state,
const spAction<A>& action,
FLOAT reward,
const spState<S>& nextState) {
this->_learningAlgorithm.update(
StateAction<S, A>(state, action),
nextState,
Expand All @@ -198,7 +208,7 @@ void rl::agent::Agent<S, A>::train(const S& state, const A& action, FLOAT reward
}

template<class S, class A>
void rl::agent::Agent<S, A>::preExecute() {
void Agent<S, A>::preExecute() {
_currentState = std::move(getLastObservedState());
_currentAction = std::move(_learningAlgorithm.getAction(
_currentState, _environment.getActuator().getActionSet()));
Expand All @@ -207,10 +217,10 @@ void rl::agent::Agent<S, A>::preExecute() {
}

template<class S, class A>
void rl::agent::Agent<S, A>::execute() {
void Agent<S, A>::execute() {
// todo: Acquire last state and reward here.
this->applyAction(_currentAction);
S nextState = std::move(getLastObservedState());
spState<S> nextState = std::move(getLastObservedState());
FLOAT reward = this->_environment.getSensor().getLastObservedReward();

// Accumulate reward.
Expand All @@ -229,7 +239,7 @@ void rl::agent::Agent<S, A>::execute() {
}

template<class S, class A>
size_t rl::agent::Agent<S, A>::executeEpisode(UINT maxIter){
size_t Agent<S, A>::executeEpisode(UINT maxIter){
preExecute();
UINT i = 0;
for(; i < maxIter && episodeDone() == false; i++) {
Expand All @@ -240,27 +250,27 @@ size_t rl::agent::Agent<S, A>::executeEpisode(UINT maxIter){
}

template<class S, class A>
bool rl::agent::Agent<S, A>::episodeDone() {
bool Agent<S, A>::episodeDone() {
return _environment.getSensor().isTerminalState(_currentState);
}

template<class S, class A>
rl::FLOAT rl::agent::Agent<S, A>::postExecute() {
rl::FLOAT Agent<S, A>::postExecute() {
return _accumulativeReward;
}

template<class S, class A>
inline rl::FLOAT rl::agent::Agent<S, A>::getAccumulativeReward() const {
inline rl::FLOAT Agent<S, A>::getAccumulativeReward() const {
return _accumulativeReward;
}

template<class S, class A>
void rl::agent::Agent<S, A>::applyAction(const A& action) {
void Agent<S, A>::applyAction(const spAction<A>& action) {
this->_environment.applyAction(action);
}

template<class S, class A>
void rl::agent::Agent<S, A>::reset() {
void Agent<S, A>::reset() {
this->_environment.reset();
};

Expand Down
12 changes: 6 additions & 6 deletions include/agent/Environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ class Environment {
* @param stateAction Given this state-action, gives next state and reward.
* @return Next state and reward.
*/
virtual std::pair<S, FLOAT> getNextStateAndReward(const SA &stateAction) = 0;
virtual spStateAndReward<S> getNextStateAndReward(const SA &stateAction) = 0;

/**
* @see Actuator Documentation for example.
* @param action to be applied to environment.
*/
virtual StateAndReward<S> applyAction(const A &action);
virtual spStateAndReward<S> applyAction(const spAction<A> &action);

Actuator<A> &getActuator();
const Actuator<A> &getActuator() const;
Expand All @@ -65,10 +65,10 @@ inline Environment<S, A>::Environment(Actuator<A> &actuator, Sensor<S> &sensor)
}

template<class S, class A>
StateAndReward<S> Environment<S, A>::applyAction(const A &action) {
auto currentState = this->_sensor.getLastObservedState();
auto currentStateAction = StateAction<S, A>(currentState, action);
auto nextStateAndReward = this->getNextStateAndReward(currentStateAction);
spStateAndReward<S> Environment<S, A>::applyAction(const spAction<A> &action) {
spState<S> currentState = this->_sensor.getLastObservedState();
StateAction<S, A> currentStateAction (currentState, action);
spStateAndReward<S> nextStateAndReward = this->getNextStateAndReward(currentStateAction);
this->_sensor.setLastObservedStateAndReward(nextStateAndReward);

return nextStateAndReward;
Expand Down
26 changes: 13 additions & 13 deletions include/agent/Sensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ namespace agent {
template<class S>
class Sensor {
public:
Sensor(const S &initialState);
Sensor(const spState<S> &initialState);

/**
* @return current state of agent in environment.
*/
virtual const S &getLastObservedState() const;
virtual const spState<S> &getLastObservedState() const;

/**
* Maps sensorState to its corresponding reward.
Expand All @@ -50,13 +50,13 @@ class Sensor {
* Set the last observed state.
* @param s Last observed state.
*/
virtual void setLastObservedState(const S &s);
virtual void setLastObservedState(const spState<S> &s);

/**
* Changes initial state.
* @param s New initial state.
*/
virtual void setInitialState(const S &s);
virtual void setInitialState(const spState<S> &s);

/**
* Set the last observed reward
Expand All @@ -69,7 +69,7 @@ class Sensor {
* @param stateAndReward pair of state and reward.
*/
virtual void setLastObservedStateAndReward(
const StateAndReward<S> &stateAndReward);
const spStateAndReward<S> &stateAndReward);

/**
* Resets last observed state to initial state.
Expand All @@ -80,22 +80,22 @@ class Sensor {
* @param stateData to determine if it is a terminal state.
* @return true if its a terminal state.
*/
virtual bool isTerminalState(const S &stateData) const = 0;
virtual bool isTerminalState(const spState<S> &stateData) const = 0;

private:
FLOAT _lastObservedReward = NAN;
S _initialState;
S _lastObservedState;
spState<S> _initialState;
spState<S> _lastObservedState;
};

template<class S>
Sensor<S>::Sensor(const S &initialState) :
Sensor<S>::Sensor(const spState<S> &initialState) :
_initialState(initialState),
_lastObservedState(_initialState) {
}

template<class S>
const S &Sensor<S>::getLastObservedState() const {
const spState<S> &Sensor<S>::getLastObservedState() const {
return this->_lastObservedState;
}

Expand All @@ -105,12 +105,12 @@ rl::FLOAT Sensor<S>::getLastObservedReward() const {
}

template<class S>
void Sensor<S>::setLastObservedState(const S &s) {
void Sensor<S>::setLastObservedState(const spState<S> &s) {
this->_lastObservedState = s;
}

template<class S>
void Sensor<S>::setInitialState(const S &s) {
void Sensor<S>::setInitialState(const spState<S> &s) {
this->_initialState = s;
}

Expand All @@ -121,7 +121,7 @@ void Sensor<S>::setLastObservedReward(FLOAT r) {

template<class S>
void Sensor<S>::setLastObservedStateAndReward(
const StateAndReward<S> &stateAndReward) {
const spStateAndReward<S> &stateAndReward) {
this->setLastObservedState(std::get<0>(stateAndReward));
this->setLastObservedReward(std::get<1>(stateAndReward));
}
Expand Down
10 changes: 5 additions & 5 deletions include/agent/SensorDiscrete.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,26 @@ class SensorDiscrete : public Sensor<S> {
public:
using Sensor<S>::Sensor;

virtual bool isTerminalState(const S &stateData) const override;
virtual bool isTerminalState(const spState<S> &stateData) const override;

/**
* @param terminalData new terminal state to be added.
*/
virtual void addTerminalState(const S &terminalData);
virtual void addTerminalState(const spState<S> &terminalData);

private:
set<S> _terminalStates; // Must know when to stop.
spStateSet<S> _terminalStates; // Must know when to stop.
};

template<class S>
bool SensorDiscrete<S>::isTerminalState(
const S &stateData) const {
const spState<S> &stateData) const {
return _terminalStates.find(stateData) != _terminalStates.end();
}

template<class S>
void SensorDiscrete<S>::addTerminalState(
const S &terminalData) {
const spState<S> &terminalData) {
_terminalStates.insert(terminalData);
}

Expand Down
Loading

0 comments on commit 156113b

Please sign in to comment.