paper_experiment_bigleduc_exploitability.py

from PokerRL.game.games import BigLeduc

from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver

if __name__ == '__main__':
    ctrl = Driver(t_prof=TrainingProfile(name="BIGLEDUC_EXPLOITABILITY",

                                         DISTRIBUTED=True,
                                         n_learner_actor_workers=11,
                                         eval_agent_export_freq=9999999,  # Don't export

                                         nn_type="feedforward",
                                         max_buffer_size_adv=3.636e5,  # 364k * 11 = ~4M
                                         max_buffer_size_avrg=3.636e5,  # 364k * 11 = ~4M

                                         # longer action sequences than FHP -> more samples/iter because external sampling.
                                         n_traversals_per_iter=800,  # 800 * 11 = 8,800

                                         n_batches_adv_training=1200,
                                         n_batches_avrg_training=10000,  # trained far more than necessary
                                         n_merge_and_table_layer_units_adv=64,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_adv=64,
                                         n_units_final_avrg=64,
                                         n_cards_state_units_adv=64,
                                         n_cards_state_units_avrg=64,
                                         mini_batch_size_adv=256,  # 256 * 11 = 2,816
                                         mini_batch_size_avrg=512,  # 512 * 11 = 5,632
                                         init_adv_model="last",  # warm start neural weights with init from last iter
                                         init_avrg_model="random",
                                         use_pre_layers_adv=True,
                                         use_pre_layers_avrg=True,

                                         game_cls=BigLeduc,

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                             EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
                                         ),
                                         h2h_args=H2HArgs(
                                             n_hands=500000,
                                         ),
                                         log_verbose=False,
                                         ),
                  eval_methods={
                      "br": 15,
                  },
                  n_iterations=None)
    ctrl.run()