interactive cartpole update

opasche · Nov 21, 2022 · fc966c5 · fc966c5
1 parent 174b573
commit fc966c5
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 63 deletions.
diff --git a/Examples/CartPole/CartPoleFeatures/.pylint.d/main1.stats b/Examples/CartPole/CartPoleFeatures/.pylint.d/main1.stats
diff --git a/Examples/CartPole/InteractiveCartPole/InteractiveCartPole.py b/Examples/CartPole/InteractiveCartPole/InteractiveCartPole.py
@@ -1,72 +1,17 @@
 from __future__ import print_function
 
 import sys, gym, time
+import pygame
+from gym.utils.play import play, PlayPlot
 
-env = gym.make('CartPole-v0')
+fps = 30
 
-ACTIONS = env.action_space.n
-SKIP_CONTROL = 0    # Use previous control decision SKIP_CONTROL times, that's how you
-                    # can test what skip is still usable.
+mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}
 
-human_agent_action = 0
-human_wants_restart = False
-human_sets_pause = False
+def callback(obs_t, obs_tp, action, reward, terminated, truncated, info):
+    return [reward,]
+plotter = PlayPlot(callback, 30 * 5, ["reward"])
 
-def key_press(key, mod):
-    global human_agent_action, human_wants_restart, human_sets_pause
-    if key==0xff0d: human_wants_restart = True
-    if key==32: human_sets_pause = not human_sets_pause
-    a = int( key - ord('0') )
-    if a <= 0 or a >= ACTIONS: return
-    human_agent_action = a
-
-def key_release(key, mod):
-    global human_agent_action
-    a = int( key - ord('0') )
-    if a <= 0 or a >= ACTIONS: return
-    if human_agent_action == a:
-        human_agent_action = 0
-
-env.render()
-env.unwrapped.viewer.window.on_key_press = key_press
-env.unwrapped.viewer.window.on_key_release = key_release
-
-def rollout(env):
-    global human_agent_action, human_wants_restart, human_sets_pause
-    human_wants_restart = False
-    obser = env.reset()
-    skip = 0
-    total_reward = 0
-    total_timesteps = 0
-    while 1:
-        if not skip:
-            #print("taking action {}".format(human_agent_action))
-            a = human_agent_action
-            total_timesteps += 1
-            skip = SKIP_CONTROL
-        else:
-            skip -= 1
-
-        obser, r, done, info = env.step(a)
-        if r != 0:
-            print("reward %0.3f" % r)
-        total_reward += r
-        window_still_open = env.render()
-        if window_still_open==False: return False
-        if done: break
-        if human_wants_restart: break
-        while human_sets_pause:
-            env.render()
-            time.sleep(0.1)
-        time.sleep(0.1)
-    print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
-
-print("ACTIONS={}".format(ACTIONS))
-print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
-print("No keys pressed is taking action 0")
-
-while 1:
-    window_still_open = rollout(env)
-    if window_still_open==False: break
+play(gym.make("CartPole-v1", render_mode="rgb_array"), fps=fps, zoom=1.0, keys_to_action=mapping, callback=plotter.callback, noop=0)
 
 
diff --git a/Examples/CartPole/InteractiveCartPole/InteractiveCartPole_old.py b/Examples/CartPole/InteractiveCartPole/InteractiveCartPole_old.py
@@ -0,0 +1,72 @@
+from __future__ import print_function
+
+import sys, gym, time
+
+env = gym.make('CartPole-v0')
+
+ACTIONS = env.action_space.n
+SKIP_CONTROL = 0    # Use previous control decision SKIP_CONTROL times, that's how you
+                    # can test what skip is still usable.
+
+human_agent_action = 0
+human_wants_restart = False
+human_sets_pause = False
+
+def key_press(key, mod):
+    global human_agent_action, human_wants_restart, human_sets_pause
+    if key==0xff0d: human_wants_restart = True
+    if key==32: human_sets_pause = not human_sets_pause
+    a = int( key - ord('0') )
+    if a <= 0 or a >= ACTIONS: return
+    human_agent_action = a
+
+def key_release(key, mod):
+    global human_agent_action
+    a = int( key - ord('0') )
+    if a <= 0 or a >= ACTIONS: return
+    if human_agent_action == a:
+        human_agent_action = 0
+
+env.render()
+env.unwrapped.viewer.window.on_key_press = key_press
+env.unwrapped.viewer.window.on_key_release = key_release
+
+def rollout(env):
+    global human_agent_action, human_wants_restart, human_sets_pause
+    human_wants_restart = False
+    obser = env.reset()
+    skip = 0
+    total_reward = 0
+    total_timesteps = 0
+    while 1:
+        if not skip:
+            #print("taking action {}".format(human_agent_action))
+            a = human_agent_action
+            total_timesteps += 1
+            skip = SKIP_CONTROL
+        else:
+            skip -= 1
+
+        obser, r, done, info = env.step(a)
+        if r != 0:
+            print("reward %0.3f" % r)
+        total_reward += r
+        window_still_open = env.render()
+        if window_still_open==False: return False
+        if done: break
+        if human_wants_restart: break
+        while human_sets_pause:
+            env.render()
+            time.sleep(0.1)
+        time.sleep(0.1)
+    print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
+
+print("ACTIONS={}".format(ACTIONS))
+print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
+print("No keys pressed is taking action 0")
+
+while 1:
+    window_still_open = rollout(env)
+    if window_still_open==False: break
+
+