From 1cec1562d254491e225a8eb71cab4ece1a49024e Mon Sep 17 00:00:00 2001 From: sven1977 Date: Wed, 15 May 2024 21:55:40 +0200 Subject: [PATCH] wip Signed-off-by: sven1977 --- doc/source/rllib/rllib-cli.md | 4 +- rllib/BUILD | 55 ++++++++++--------- rllib/tests/test_rllib_train_and_evaluate.py | 6 +- rllib/tuned_examples/create_plots.py | 5 -- ...tpole_dqn_envrunner.py => cartpole_dqn.py} | 0 ...tpole_ppo_envrunner.py => cartpole_ppo.py} | 0 ...vrunner.py => multi_agent_pendulum_ppo.py} | 0 ...dulum_ppo_envrunner.py => pendulum_ppo.py} | 0 ...dulum_sac_envrunner.py => pendulum_sac.py} | 0 9 files changed, 33 insertions(+), 37 deletions(-) delete mode 100644 rllib/tuned_examples/create_plots.py rename rllib/tuned_examples/dqn/{cartpole_dqn_envrunner.py => cartpole_dqn.py} (100%) rename rllib/tuned_examples/ppo/{cartpole_ppo_envrunner.py => cartpole_ppo.py} (100%) rename rllib/tuned_examples/ppo/{multi_agent_pendulum_ppo_envrunner.py => multi_agent_pendulum_ppo.py} (100%) rename rllib/tuned_examples/ppo/{pendulum_ppo_envrunner.py => pendulum_ppo.py} (100%) rename rllib/tuned_examples/sac/{pendulum_sac_envrunner.py => pendulum_sac.py} (100%) diff --git a/doc/source/rllib/rllib-cli.md b/doc/source/rllib/rllib-cli.md index d91f48243c98..a254a35958cd 100644 --- a/doc/source/rllib/rllib-cli.md +++ b/doc/source/rllib/rllib-cli.md @@ -190,7 +190,7 @@ Here's an example that uses one of the examples hosted in the Ray GitHub reposit ray-project/ray/master/rllib/tuned_examples/\ - ppo/cartpole_ppo_envrunner.py -t python + ppo/cartpole_ppo.py -t python @@ -200,7 +200,7 @@ The `-t` or `--type` option is used to specify the type of the configuration fil in this case `python`, since we're using a Python file. This is what the Python configuration of this example looks like: -```{literalinclude} ../../../rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py +```{literalinclude} ../../../rllib/tuned_examples/ppo/cartpole_ppo.py :language: python ``` diff --git a/rllib/BUILD b/rllib/BUILD index 46995ff865fd..06d291eca1ad 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -296,10 +296,10 @@ py_test( py_test( name = "learning_tests_cartpole_dqn", - main = "tuned_examples/dqn/cartpole_dqn_envrunner.py", + main = "tuned_examples/dqn/cartpole_dqn.py", tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], size = "large", - srcs = ["tuned_examples/dqn/cartpole_dqn_envrunner.py"], + srcs = ["tuned_examples/dqn/cartpole_dqn.py"], args = ["--as-test", "--enable-new-api-stack"] ) @@ -375,10 +375,10 @@ py_test( # PPO py_test( name = "learning_tests_cartpole_ppo", - main = "tuned_examples/ppo/cartpole_ppo_envrunner.py", + main = "tuned_examples/ppo/cartpole_ppo.py", tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "no_tf_static_graph"], size = "large", - srcs = ["tuned_examples/ppo/cartpole_ppo_envrunner.py"], + srcs = ["tuned_examples/ppo/cartpole_ppo.py"], args = ["--as-test", "--enable-new-api-stack"] ) @@ -393,10 +393,10 @@ py_test( py_test( name = "learning_tests_pendulum_ppo", - main = "tuned_examples/ppo/pendulum_ppo_envrunner.py", + main = "tuned_examples/ppo/pendulum_ppo.py", tags = ["torch_only", "team:rllib", "exclusive", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous"], size = "large", - srcs = ["tuned_examples/ppo/pendulum_ppo_envrunner.py"], + srcs = ["tuned_examples/ppo/pendulum_ppo.py"], args = ["--as-test", "--enable-new-api-stack"] ) @@ -413,10 +413,10 @@ py_test( py_test( name = "learning_tests_multi_agent_pendulum_ppo", - main = "tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py", + main = "tuned_examples/ppo/multi_agent_pendulum_ppo.py", tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous", "torch_only"], size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer - srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py"], + srcs = ["tuned_examples/ppo/multi_agent_pendulum_ppo.py"], args = ["--as-test", "--enable-new-api-stack"] ) @@ -445,10 +445,10 @@ py_test( # SAC py_test( name = "learning_tests_pendulum_sac", - main = "tuned_examples/sac/pendulum_sac_envrunner.py", + main = "tuned_examples/sac/pendulum_sac.py", tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_pendulum", "learning_tests_continuous"], size = "large", - srcs = ["tuned_examples/sac/pendulum_sac_envrunner.py"], + srcs = ["tuned_examples/sac/pendulum_sac.py"], args = ["--as-test", "--enable-new-api-stack"] ) @@ -2337,7 +2337,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2346,7 +2346,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_duration_auto_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_duration_auto_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples", "examples_use_all_core"], size = "large", @@ -2355,7 +2355,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_511_ts_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_511_ts_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2364,7 +2364,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_1001_ts_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_1001_ts_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2373,7 +2373,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2382,7 +2382,7 @@ py_test( ) py_test( - name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_10_episodes_torch_envrunner", + name = "examples/evaluation/evaluation_parallel_to_training_multi_agent_10_episodes_torch", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2392,7 +2392,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_tf", + name = "examples/evaluation/evaluation_parallel_to_training_13_episodes_tf_old_api_stack", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2402,7 +2402,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf", + name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf_old_api_stack", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2412,7 +2412,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch", + name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_torch_old_api_stack", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2422,7 +2422,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf2", + name = "examples/evaluation/evaluation_parallel_to_training_duration_auto_tf2_old_api_stack", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2432,7 +2432,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/evaluation/evaluation_parallel_to_training_211_ts_torch", + name = "examples/evaluation/evaluation_parallel_to_training_211_ts_torch_old_api_stack", main = "examples/evaluation/evaluation_parallel_to_training.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2773,7 +2773,7 @@ py_test( ) py_test( - name = "examples/multi_agent/self_play_with_open_spiel_connect_4_ppo_torch_envrunner", + name = "examples/multi_agent/self_play_with_open_spiel_connect_4_ppo_torch", main = "examples/multi_agent/self_play_with_open_spiel.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2782,7 +2782,7 @@ py_test( ) py_test( - name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch_envrunner", + name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch", main = "examples/multi_agent/self_play_league_based_with_open_spiel.py", tags = ["team:rllib", "exclusive", "examples"], size = "large", @@ -2792,7 +2792,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/multi_agent/two_algorithms_tf", + name = "examples/multi_agent/two_algorithms_tf_old_api_stack", main = "examples/multi_agent/two_algorithms.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2802,7 +2802,7 @@ py_test( # @OldAPIStack py_test( - name = "examples/multi_agent/two_algorithms_torch", + name = "examples/multi_agent/two_algorithms_torch_old_api_stack", main = "examples/multi_agent/two_algorithms.py", tags = ["team:rllib", "exclusive", "examples"], size = "small", @@ -2834,7 +2834,7 @@ py_test( #@OldAPIStack py_test( - name = "examples/offline_rl/offline_rl_torch", + name = "examples/offline_rl/offline_rl_torch_old_api_stack", main = "examples/offline_rl/offline_rl.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", @@ -2884,12 +2884,13 @@ py_test( # .................................... #@OldAPIStack @HybridAPIStack py_test( - name = "examples/rl_modules/classes/mobilenet_rlm", + name = "examples/rl_modules/classes/mobilenet_rlm_hybrid_api_stack", main = "examples/rl_modules/classes/mobilenet_rlm.py", tags = ["team:rllib", "examples", "no_main"], size = "small", srcs = ["examples/rl_modules/classes/mobilenet_rlm.py"], ) + py_test( name = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm", main = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py", diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py index 4600d85c2d6f..164866b8ab92 100644 --- a/rllib/tests/test_rllib_train_and_evaluate.py +++ b/rllib/tests/test_rllib_train_and_evaluate.py @@ -309,9 +309,9 @@ def test_yaml_run(self): def test_python_run(self): assert os.popen( f"python {rllib_dir}/scripts.py train file tuned_examples/ppo/" - f"cartpole_ppo_envrunner.py " - f"--stop={{'num_env_steps_sampled_lifetime': 50000, " - f"'env_runners/episode_return_mean': 200}}" + f"cartpole_ppo.py " + f"--stop=\"{{'num_env_steps_sampled_lifetime': 50000, " + f"'env_runners/episode_return_mean': 200}}\"" ).read() def test_all_example_files_exist(self): diff --git a/rllib/tuned_examples/create_plots.py b/rllib/tuned_examples/create_plots.py deleted file mode 100644 index aae042bb5b58..000000000000 --- a/rllib/tuned_examples/create_plots.py +++ /dev/null @@ -1,5 +0,0 @@ -# TODO(sven): -# Add a simple script that takes n csv input files and generates plot(s) -# from these with: x-axis=ts OR wall-time; y-axis=any metric(s) (up to 2). -# ability to merge any m csv files (e.g. tf vs torch; or n seeds) together -# in one plot. diff --git a/rllib/tuned_examples/dqn/cartpole_dqn_envrunner.py b/rllib/tuned_examples/dqn/cartpole_dqn.py similarity index 100% rename from rllib/tuned_examples/dqn/cartpole_dqn_envrunner.py rename to rllib/tuned_examples/dqn/cartpole_dqn.py diff --git a/rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py b/rllib/tuned_examples/ppo/cartpole_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/cartpole_ppo_envrunner.py rename to rllib/tuned_examples/ppo/cartpole_ppo.py diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py b/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/multi_agent_pendulum_ppo_envrunner.py rename to rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py diff --git a/rllib/tuned_examples/ppo/pendulum_ppo_envrunner.py b/rllib/tuned_examples/ppo/pendulum_ppo.py similarity index 100% rename from rllib/tuned_examples/ppo/pendulum_ppo_envrunner.py rename to rllib/tuned_examples/ppo/pendulum_ppo.py diff --git a/rllib/tuned_examples/sac/pendulum_sac_envrunner.py b/rllib/tuned_examples/sac/pendulum_sac.py similarity index 100% rename from rllib/tuned_examples/sac/pendulum_sac_envrunner.py rename to rllib/tuned_examples/sac/pendulum_sac.py