From 1cec1562d254491e225a8eb71cab4ece1a49024e Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Wed, 15 May 2024 21:55:40 +0200
Subject: [PATCH] wip

Signed-off-by: sven1977 <svenmika1977@gmail.com>
---
 doc/source/rllib/rllib-cli.md                 |  4 +-
 rllib/BUILD                                   | 55 ++++++++++---------
 rllib/tests/test_rllib_train_and_evaluate.py  |  6 +-
 rllib/tuned_examples/create_plots.py          |  5 --
 ...tpole_dqn_envrunner.py => cartpole_dqn.py} |  0
 ...tpole_ppo_envrunner.py => cartpole_ppo.py} |  0
 ...vrunner.py => multi_agent_pendulum_ppo.py} |  0
 ...dulum_ppo_envrunner.py => pendulum_ppo.py} |  0
 ...dulum_sac_envrunner.py => pendulum_sac.py} |  0
 9 files changed, 33 insertions(+), 37 deletions(-)
 delete mode 100644 rllib/tuned_examples/create_plots.py
 rename rllib/tuned_examples/dqn/{cartpole_dqn_envrunner.py => cartpole_dqn.py} (100%)
 rename rllib/tuned_examples/ppo/{cartpole_ppo_envrunner.py => cartpole_ppo.py} (100%)
 rename rllib/tuned_examples/ppo/{multi_agent_pendulum_ppo_envrunner.py => multi_agent_pendulum_ppo.py} (100%)
 rename rllib/tuned_examples/ppo/{pendulum_ppo_envrunner.py => pendulum_ppo.py} (100%)
 rename rllib/tuned_examples/sac/{pendulum_sac_envrunner.py => pendulum_sac.py} (100%)
diff --git a/doc/source/rllib/rllib-cli.md b/doc/source/rllib/rllib-cli.md
index d91f48243c98..a254a35958cd 100644
--- a/doc/source/rllib/rllib-cli.md
+++ b/doc/source/rllib/rllib-cli.md
@@ -190,7 +190,7 @@ Here's an example that uses one of the examples hosted in the Ray GitHub reposit
     </span>
     <span data-ty="input">ray-project/ray/master/rllib/tuned_examples/\
     </span>
-    <span data-ty="input">ppo/cartpole_ppo_envrunner.py -t python
+    <span data-ty="input">ppo/cartpole_ppo.py -t python
     </span>
 </div>
 
@@ -200,7 +200,7 @@ The `-t` or `--type` option is used to specify the type of the configuration fil
 in this case `python`, since we're using a Python file.
 This is what the Python configuration of this example looks like:
 
-```{literalinclude} ../../../rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py
+```{literalinclude} ../../../rllib/tuned_examples/ppo/cartpole_ppo.py
 :language: python
 ```
 
diff --git a/rllib/BUILD b/rllib/BUILD
index 46995ff865fd..06d291eca1ad 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -296,10 +296,10 @@ py_test(
 
 py_test(
     name = "learning_tests_cartpole_dqn",
-    main = "tuned_examples/dqn/cartpole_dqn_envrunner.py",
+    main = "tuned_examples/dqn/cartpole_dqn.py",
     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
     size = "large",
-    srcs = ["tuned_examples/dqn/cartpole_dqn_envrunner.py"],
+    srcs = ["tuned_examples/dqn/cartpole_dqn.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
 
@@ -375,10 +375,10 @@ py_test(
 # PPO
 py_test(
     name = "learning_tests_cartpole_ppo",
-    main = "tuned_examples/ppo/cartpole_ppo_envrunner.py",
+    main = "tuned_examples/ppo/cartpole_ppo.py",
     tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "no_tf_static_graph"],
     size = "large",
-    srcs = ["tuned_examples/ppo/cartpole_ppo_envrunner.py"],
+    srcs = ["tuned_examples/ppo/cartpole_ppo.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
 
@@ -393,10 +393,10 @@ py_test(
 
 py_test(
     name = "learning_tests_pendulum_ppo",
-    main = "tuned_examples/ppo/pendulum_ppo_envrunner.py",
+    main = "tuned_examples/ppo/pendulum_ppo.py",
     tags = ["torch_only", "team:rllib", "exclusive", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous"],
     size = "large",
-    srcs = ["tuned_examples/ppo/pendulum_ppo_envrunner.py"],
+    srcs = ["tuned_examples/ppo/pendulum_ppo.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
 
@@ -413,10 +413,10 @@ py_test(
 
 py_test(
     name = "learning_tests_multi_agent_pendulum_ppo",
-    main = "tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py",
+    main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py",
     tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous", "torch_only"],
     size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer
-    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py"],
+    srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
 
@@ -445,10 +445,10 @@ py_test(
 # SAC
 py_test(
     name = "learning_tests_pendulum_sac",
-    main = "tuned_examples/sac/pendulum_sac_envrunner.py",
+    main = "tuned_examples/sac/pendulum_sac.py",
     tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_pendulum", "learning_tests_continuous"],
     size = "large",
-    srcs = ["tuned_examples/sac/pendulum_sac_envrunner.py"],
+    srcs = ["tuned_examples/sac/pendulum_sac.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
 
@@ -2337,7 +2337,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2346,7 +2346,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_duration_auto_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_duration_auto_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples", "examples_use_all_core"],
     size = "large",
@@ -2355,7 +2355,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_511_ts_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_511_ts_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2364,7 +2364,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_1001_ts_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_1001_ts_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2373,7 +2373,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2382,7 +2382,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_10_episodes_torch_envrunner",
+    name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_10_episodes_torch",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2392,7 +2392,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_tf",
+    name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_tf_old_api_stack",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2402,7 +2402,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf",
+    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf_old_api_stack",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2412,7 +2412,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch",
+    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch_old_api_stack",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2422,7 +2422,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf2",
+    name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf2_old_api_stack",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2432,7 +2432,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/evaluation/evaluation_parallel_to_training_211_ts_torch",
+    name = "examples/evaluation/evaluation_parallel_to_training_211_ts_torch_old_api_stack",
     main = "examples/evaluation/evaluation_parallel_to_training.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2773,7 +2773,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/self_play_with_open_spiel_connect_4_ppo_torch_envrunner",
+    name = "examples/multi_agent/self_play_with_open_spiel_connect_4_ppo_torch",
     main = "examples/multi_agent/self_play_with_open_spiel.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2782,7 +2782,7 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch_envrunner",
+    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
     main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "large",
@@ -2792,7 +2792,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/multi_agent/two_algorithms_tf",
+    name = "examples/multi_agent/two_algorithms_tf_old_api_stack",
     main = "examples/multi_agent/two_algorithms.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2802,7 +2802,7 @@ py_test(
 
 # @OldAPIStack
 py_test(
-    name = "examples/multi_agent/two_algorithms_torch",
+    name = "examples/multi_agent/two_algorithms_torch_old_api_stack",
     main = "examples/multi_agent/two_algorithms.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "small",
@@ -2834,7 +2834,7 @@ py_test(
 
 #@OldAPIStack
 py_test(
-    name = "examples/offline_rl/offline_rl_torch",
+    name = "examples/offline_rl/offline_rl_torch_old_api_stack",
     main = "examples/offline_rl/offline_rl.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
@@ -2884,12 +2884,13 @@ py_test(
 # ....................................
 #@OldAPIStack @HybridAPIStack
 py_test(
-    name = "examples/rl_modules/classes/mobilenet_rlm",
+    name = "examples/rl_modules/classes/mobilenet_rlm_hybrid_api_stack",
     main = "examples/rl_modules/classes/mobilenet_rlm.py",
     tags = ["team:rllib", "examples", "no_main"],
     size = "small",
     srcs = ["examples/rl_modules/classes/mobilenet_rlm.py"],
 )
+
 py_test(
     name = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm",
     main = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py",
diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py
index 4600d85c2d6f..164866b8ab92 100644
--- a/rllib/tests/test_rllib_train_and_evaluate.py
+++ b/rllib/tests/test_rllib_train_and_evaluate.py
@@ -309,9 +309,9 @@ def test_yaml_run(self):
     def test_python_run(self):
         assert os.popen(
             f"python {rllib_dir}/scripts.py train file tuned_examples/ppo/"
-            f"cartpole_ppo_envrunner.py "
-            f"--stop={{'num_env_steps_sampled_lifetime': 50000, "
-            f"'env_runners/episode_return_mean': 200}}"
+            f"cartpole_ppo.py "
+            f"--stop=\"{{'num_env_steps_sampled_lifetime': 50000, "
+            f"'env_runners/episode_return_mean': 200}}\""
         ).read()
 
     def test_all_example_files_exist(self):
diff --git a/rllib/tuned_examples/create_plots.py b/rllib/tuned_examples/create_plots.py
deleted file mode 100644
index aae042bb5b58..000000000000
--- a/rllib/tuned_examples/create_plots.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# TODO(sven):
-#  Add a simple script that takes n csv input files and generates plot(s)
-#  from these with: x-axis=ts OR wall-time; y-axis=any metric(s) (up to 2).
-#  ability to merge any m csv files (e.g. tf vs torch; or n seeds) together
-#  in one plot.
diff --git a/rllib/tuned_examples/dqn/cartpole_dqn_envrunner.py b/rllib/tuned_examples/dqn/cartpole_dqn.py
similarity index 100%
rename from rllib/tuned_examples/dqn/cartpole_dqn_envrunner.py
rename to rllib/tuned_examples/dqn/cartpole_dqn.py
diff --git a/rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py b/rllib/tuned_examples/ppo/cartpole_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py
rename to rllib/tuned_examples/ppo/cartpole_ppo.py
diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py b/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py
rename to rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
diff --git a/rllib/tuned_examples/ppo/pendulum_ppo_envrunner.py b/rllib/tuned_examples/ppo/pendulum_ppo.py
similarity index 100%
rename from rllib/tuned_examples/ppo/pendulum_ppo_envrunner.py
rename to rllib/tuned_examples/ppo/pendulum_ppo.py
diff --git a/rllib/tuned_examples/sac/pendulum_sac_envrunner.py b/rllib/tuned_examples/sac/pendulum_sac.py
similarity index 100%
rename from rllib/tuned_examples/sac/pendulum_sac_envrunner.py
rename to rllib/tuned_examples/sac/pendulum_sac.py