diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
index e270cd2b..77401036 100644
--- a/compiler_opt/es/blackbox_learner.py
+++ b/compiler_opt/es/blackbox_learner.py
@@ -196,6 +196,10 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
     with self._summary_writer.as_default():
       tf.summary.scalar(
           'reward/average_reward_train', np.mean(rewards), step=self._step)
+      
+      tf.summary.scalar(
+           'reward/maximum_reward_train', np.max(rewards), step=self._step
+      )
 
       tf.summary.histogram('reward/reward_train', rewards, step=self._step)
 
diff --git a/compiler_opt/es/es_trainer_lib.py b/compiler_opt/es/es_trainer_lib.py
index c1552d18..7ec14657 100644
--- a/compiler_opt/es/es_trainer_lib.py
+++ b/compiler_opt/es/es_trainer_lib.py
@@ -21,6 +21,7 @@
 import gin
 import tensorflow as tf
 import os
+import shutil
 
 from compiler_opt.distributed import worker
 from compiler_opt.distributed.local import local_worker_manager
@@ -79,6 +80,7 @@ def __init__(self, *, all_gin):
     self._clang_path = '/usr/local/google/home/aidengrossman/programming/test_traces/clang'
     self._trace_path = '/usr/local/google/home/aidengrossman/programming/test_traces/execution_trace.pb'
     self._bb_trace_model_path = '/usr/local/google/home/aidengrossman/programming/test_traces/basic_block_trace_model'
+    self._models_for_test_path = '/usr/local/google/home/aidengrossman/programming/output_traces/'
 
   def es_compile(self, params: list[float], baseline_score: float) -> float:
     with tempfile.TemporaryDirectory() as tempdir:
@@ -99,8 +101,13 @@ def es_compile(self, params: list[float], baseline_score: float) -> float:
                                           self._clang_path, tflitedir)
       score = trace_data_collector.evaluate_compiled_corpus(
           tempdir, self._trace_path, self._bb_trace_model_path)
-      print(score)
+      
+      reward = compilation_runner._calculate_reward(score, baseline_score)
+      print(reward)
 
+      output_path = os.path.join(self._models_for_test_path, "model" + str(reward))
+      if reward > 0.2 and not os.path.exists(output_path):
+        shutil.copytree(tflitedir, output_path)
       return compilation_runner._calculate_reward(score, baseline_score)
 
 
diff --git a/compiler_opt/es/gin_configs/blackbox_learner.gin b/compiler_opt/es/gin_configs/blackbox_learner.gin
index b91915f0..34b97334 100644
--- a/compiler_opt/es/gin_configs/blackbox_learner.gin
+++ b/compiler_opt/es/gin_configs/blackbox_learner.gin
@@ -3,8 +3,8 @@ import compiler_opt.rl.gin_external_configurables
 import compiler_opt.es.blackbox_optimizers
 
 # Blackbox learner config
-BlackboxLearnerConfig.total_steps = 1
-BlackboxLearnerConfig.total_num_perturbations = 5
+BlackboxLearnerConfig.total_steps = 100
+BlackboxLearnerConfig.total_num_perturbations = 100
 BlackboxLearnerConfig.blackbox_optimizer = %blackbox_optimizers.Algorithm.MONTE_CARLO
 BlackboxLearnerConfig.est_type = %blackbox_optimizers.EstimatorType.ANTITHETIC
 # BlackboxLearnerConfig.est_type = %blackbox_optimizers.EstimatorType.FORWARD_FD