diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py index e270cd2b..77401036 100644 --- a/compiler_opt/es/blackbox_learner.py +++ b/compiler_opt/es/blackbox_learner.py @@ -196,6 +196,10 @@ def _log_tf_summary(self, rewards: List[float]) -> None: with self._summary_writer.as_default(): tf.summary.scalar( 'reward/average_reward_train', np.mean(rewards), step=self._step) + + tf.summary.scalar( + 'reward/maximum_reward_train', np.max(rewards), step=self._step + ) tf.summary.histogram('reward/reward_train', rewards, step=self._step) diff --git a/compiler_opt/es/es_trainer_lib.py b/compiler_opt/es/es_trainer_lib.py index c1552d18..7ec14657 100644 --- a/compiler_opt/es/es_trainer_lib.py +++ b/compiler_opt/es/es_trainer_lib.py @@ -21,6 +21,7 @@ import gin import tensorflow as tf import os +import shutil from compiler_opt.distributed import worker from compiler_opt.distributed.local import local_worker_manager @@ -79,6 +80,7 @@ def __init__(self, *, all_gin): self._clang_path = '/usr/local/google/home/aidengrossman/programming/test_traces/clang' self._trace_path = '/usr/local/google/home/aidengrossman/programming/test_traces/execution_trace.pb' self._bb_trace_model_path = '/usr/local/google/home/aidengrossman/programming/test_traces/basic_block_trace_model' + self._models_for_test_path = '/usr/local/google/home/aidengrossman/programming/output_traces/' def es_compile(self, params: list[float], baseline_score: float) -> float: with tempfile.TemporaryDirectory() as tempdir: @@ -99,8 +101,13 @@ def es_compile(self, params: list[float], baseline_score: float) -> float: self._clang_path, tflitedir) score = trace_data_collector.evaluate_compiled_corpus( tempdir, self._trace_path, self._bb_trace_model_path) - print(score) + + reward = compilation_runner._calculate_reward(score, baseline_score) + print(reward) + output_path = os.path.join(self._models_for_test_path, "model" + str(reward)) + if reward > 0.2 and not os.path.exists(output_path): + shutil.copytree(tflitedir, output_path) return compilation_runner._calculate_reward(score, baseline_score) diff --git a/compiler_opt/es/gin_configs/blackbox_learner.gin b/compiler_opt/es/gin_configs/blackbox_learner.gin index b91915f0..34b97334 100644 --- a/compiler_opt/es/gin_configs/blackbox_learner.gin +++ b/compiler_opt/es/gin_configs/blackbox_learner.gin @@ -3,8 +3,8 @@ import compiler_opt.rl.gin_external_configurables import compiler_opt.es.blackbox_optimizers # Blackbox learner config -BlackboxLearnerConfig.total_steps = 1 -BlackboxLearnerConfig.total_num_perturbations = 5 +BlackboxLearnerConfig.total_steps = 100 +BlackboxLearnerConfig.total_num_perturbations = 100 BlackboxLearnerConfig.blackbox_optimizer = %blackbox_optimizers.Algorithm.MONTE_CARLO BlackboxLearnerConfig.est_type = %blackbox_optimizers.EstimatorType.ANTITHETIC # BlackboxLearnerConfig.est_type = %blackbox_optimizers.EstimatorType.FORWARD_FD