-
Notifications
You must be signed in to change notification settings - Fork 62
/
paper_experiment_bigleduc_exploitability.py
52 lines (44 loc) · 2.87 KB
/
paper_experiment_bigleduc_exploitability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from PokerRL.game.games import BigLeduc
from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver
if __name__ == '__main__':
ctrl = Driver(t_prof=TrainingProfile(name="BIGLEDUC_EXPLOITABILITY",
DISTRIBUTED=True,
n_learner_actor_workers=11,
eval_agent_export_freq=9999999, # Don't export
nn_type="feedforward",
max_buffer_size_adv=3.636e5, # 364k * 11 = ~4M
max_buffer_size_avrg=3.636e5, # 364k * 11 = ~4M
# longer action sequences than FHP -> more samples/iter because external sampling.
n_traversals_per_iter=800, # 800 * 11 = 8,800
n_batches_adv_training=1200,
n_batches_avrg_training=10000, # trained far more than necessary
n_merge_and_table_layer_units_adv=64,
n_merge_and_table_layer_units_avrg=64,
n_units_final_adv=64,
n_units_final_avrg=64,
n_cards_state_units_adv=64,
n_cards_state_units_avrg=64,
mini_batch_size_adv=256, # 256 * 11 = 2,816
mini_batch_size_avrg=512, # 512 * 11 = 5,632
init_adv_model="last", # warm start neural weights with init from last iter
init_avrg_model="random",
use_pre_layers_adv=True,
use_pre_layers_avrg=True,
game_cls=BigLeduc,
# You can specify one or both modes. Choosing both is useful to compare them.
eval_modes_of_algo=(
EvalAgentDeepCFR.EVAL_MODE_SINGLE, # SD-CFR
EvalAgentDeepCFR.EVAL_MODE_AVRG_NET, # Deep CFR
),
h2h_args=H2HArgs(
n_hands=500000,
),
log_verbose=False,
),
eval_methods={
"br": 15,
},
n_iterations=None)
ctrl.run()