Skip to content

Commit

Permalink
interactive cartpole update
Browse files Browse the repository at this point in the history
  • Loading branch information
opasche committed Nov 21, 2022
1 parent 174b573 commit fc966c5
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 63 deletions.
Binary file not shown.
71 changes: 8 additions & 63 deletions Examples/CartPole/InteractiveCartPole/InteractiveCartPole.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,17 @@
from __future__ import print_function

import sys, gym, time
import pygame
from gym.utils.play import play, PlayPlot

env = gym.make('CartPole-v0')
fps = 30

ACTIONS = env.action_space.n
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}

human_agent_action = 0
human_wants_restart = False
human_sets_pause = False
def callback(obs_t, obs_tp, action, reward, terminated, truncated, info):
return [reward,]
plotter = PlayPlot(callback, 30 * 5, ["reward"])

def key_press(key, mod):
global human_agent_action, human_wants_restart, human_sets_pause
if key==0xff0d: human_wants_restart = True
if key==32: human_sets_pause = not human_sets_pause
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
human_agent_action = a

def key_release(key, mod):
global human_agent_action
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
if human_agent_action == a:
human_agent_action = 0

env.render()
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release

def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
obser = env.reset()
skip = 0
total_reward = 0
total_timesteps = 0
while 1:
if not skip:
#print("taking action {}".format(human_agent_action))
a = human_agent_action
total_timesteps += 1
skip = SKIP_CONTROL
else:
skip -= 1

obser, r, done, info = env.step(a)
if r != 0:
print("reward %0.3f" % r)
total_reward += r
window_still_open = env.render()
if window_still_open==False: return False
if done: break
if human_wants_restart: break
while human_sets_pause:
env.render()
time.sleep(0.1)
time.sleep(0.1)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))

print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")

while 1:
window_still_open = rollout(env)
if window_still_open==False: break
play(gym.make("CartPole-v1", render_mode="rgb_array"), fps=fps, zoom=1.0, keys_to_action=mapping, callback=plotter.callback, noop=0)


72 changes: 72 additions & 0 deletions Examples/CartPole/InteractiveCartPole/InteractiveCartPole_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from __future__ import print_function

import sys, gym, time

env = gym.make('CartPole-v0')

ACTIONS = env.action_space.n
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.

human_agent_action = 0
human_wants_restart = False
human_sets_pause = False

def key_press(key, mod):
global human_agent_action, human_wants_restart, human_sets_pause
if key==0xff0d: human_wants_restart = True
if key==32: human_sets_pause = not human_sets_pause
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
human_agent_action = a

def key_release(key, mod):
global human_agent_action
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
if human_agent_action == a:
human_agent_action = 0

env.render()
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release

def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
obser = env.reset()
skip = 0
total_reward = 0
total_timesteps = 0
while 1:
if not skip:
#print("taking action {}".format(human_agent_action))
a = human_agent_action
total_timesteps += 1
skip = SKIP_CONTROL
else:
skip -= 1

obser, r, done, info = env.step(a)
if r != 0:
print("reward %0.3f" % r)
total_reward += r
window_still_open = env.render()
if window_still_open==False: return False
if done: break
if human_wants_restart: break
while human_sets_pause:
env.render()
time.sleep(0.1)
time.sleep(0.1)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))

print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")

while 1:
window_still_open = rollout(env)
if window_still_open==False: break


0 comments on commit fc966c5

Please sign in to comment.