Skip to content

Commit

Permalink
gym api version update (not run)
Browse files Browse the repository at this point in the history
  • Loading branch information
opasche committed Nov 4, 2022
1 parent d819734 commit 114fd4e
Show file tree
Hide file tree
Showing 14 changed files with 312 additions and 140 deletions.
42 changes: 23 additions & 19 deletions CartPole/CartPoleFeatures/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@
#For reproductibility of the report's results
torch.manual_seed(1)

def train(environement='CartPole-v0' ,n_episodes=10000, n_timesteps=500,
def train(environement='CartPole-v1', n_episodes=10000, n_timesteps=500,
exploration_decay_rate = 0.001,
discount_rate = 0.999,
lr = 1e-3,
min_exploration_rate = 0.01,#0.001,
max_exploration_rate = 1):
max_exploration_rate = 1,
**kwarg):

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#create environment frozen lake
env = gym.make(environement)#.env
#create environment
env = gym.make(environement, **kwarg)#.env

state_shape = env.observation_space.shape
n_actions = env.action_space.n
Expand Down Expand Up @@ -73,26 +74,27 @@ def train(environement='CartPole-v0' ,n_episodes=10000, n_timesteps=500,
#training of agent
start_time = time.time()
for episode in range(n_episodes):
state = env.reset()
state, info = env.reset()
total_reward = 0
done = False
for timestep in range(n_timesteps):

#env.render()
#print(observation)

if done:
#print("Episode finished after {} timesteps".format(timestep + 1))
break

action = agent.eps_greedy_action(state)
#print(agent.greedy_eps)
new_state, reward, done, info = env.step(action)
new_state, reward, done, truncated, info = env.step(action)
agent.store_experience(state, action, reward, new_state, done)
agent.update_policy(episode)#, timestep)
state = new_state

# sum up the number of rewards after n episodes
total_reward += reward

if done:
#print("Episode finished after {} timesteps".format(timestep + 1))
break

agent.update_target(episode)
reward_list.append(total_reward)
Expand All @@ -114,12 +116,13 @@ def train(environement='CartPole-v0' ,n_episodes=10000, n_timesteps=500,



def play(agent, environement='CartPole-v0', n_episodes=5, n_timesteps=1000, plot_rewards=False):
def play(agent, environement='CartPole-v1', n_episodes=5, n_timesteps=1000, plot_rewards=False,
**kwarg):

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#create environment frozen lake
env = gym.make(environement).env
#create environment
env = gym.make(environement, **kwarg).env

state_shape = env.observation_space.shape
n_actions = env.action_space.n
Expand All @@ -133,23 +136,24 @@ def play(agent, environement='CartPole-v0', n_episodes=5, n_timesteps=1000, plot
#training of agent
start_time = time.time()
for episode in range(n_episodes):
state = env.reset()
state, info = env.reset()
total_reward = 0
done = False
for timestep in range(n_timesteps):

env.render()
#print(observation)

if done:
#print("Episode finished after {} timesteps".format(timestep + 1))
break

action = agent.make_action(state)
new_state, reward, done, info = env.step(action)
new_state, reward, done, truncated, info = env.step(action)
state = new_state

# sum up the number of rewards after n episodes
total_reward += reward

if done:
#print("Episode finished after {} timesteps".format(timestep + 1))
break

reward_list.append(total_reward)
if ((episode+1)%100==0):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
348 changes: 227 additions & 121 deletions FrozenLake/reinforcement-learning-frozen-lake.ipynb

Large diffs are not rendered by default.

62 changes: 62 additions & 0 deletions RLFramework_tabular/Tabular_Agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import random
import time
from IPython.display import clear_output




class Q_agent(object):
"""Basic Q-learning with epsilon-greedy policy."""

def __init__(self, n_states, n_actions,
greedy_eps = 1,
exploration_decay_rate = 0.001,
discount_rate = 0.99,
lr = 0.1,
min_exploration_rate = 0.001,
max_exploration_rate = 1):


self.greedy_eps0 = greedy_eps
self.greedy_eps = self.greedy_eps0
self.n_states = n_states
self.n_actions = n_actions

self.exploration_decay_rate = exploration_decay_rate
self.discount_rate = discount_rate
self.lr = lr
self.min_exploration_rate = min_exploration_rate
self.max_exploration_rate = max_exploration_rate

# initiliaze all Q-table values to 0
self.Q = np.zeros((self.n_states, self.n_actions))


def reset_Q(self, n_states, n_actions):
self.Q = np.zeros(self.n_states, self.n_actions)


def make_action(self, observation, exploit_only=False):
r = random.uniform(0, 1)

if r > self.greedy_eps or (exploit_only):
# exploit
action = np.argmax(self.Q[observation, :])

else:
# explore (take a random action)
action = np.random.randint(0,self.n_actions)

return action


def update_table(self, old_state, new_state, reward, action, episode, t=0):
self.Q[old_state,action] = (1-self.lr)*self.Q[old_state,action] + self.lr*(reward + self.discount_rate * np.max(self.Q[new_state, :]) )

# update greedy eps
self.greedy_eps = self.min_exploration_rate + (self.max_exploration_rate - self.min_exploration_rate) * np.exp(-self.exploration_decay_rate * episode)

Empty file added RLFramework_tabular/__init__.py
Empty file.

0 comments on commit 114fd4e

Please sign in to comment.