-
Notifications
You must be signed in to change notification settings - Fork 62
/
paper_experiment_sdcfr_vs_deepcfr_h2h.py
77 lines (60 loc) · 3.95 KB
/
paper_experiment_sdcfr_vs_deepcfr_h2h.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from PokerRL.eval.head_to_head.H2HArgs import H2HArgs
from PokerRL.game.games import Flop5Holdem
from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver
if __name__ == '__main__':
"""
Runs the experiment from The paper "Single Deep Counterfactual Regret Minimization" (Steinberger 2019).
Uses 24 cores.
"""
ctrl = Driver(t_prof=TrainingProfile(name="EXPERIMENT_SD-CFR_vs_Deep-CFR_FHP",
nn_type="feedforward", # We also support RNNs, but the paper uses FF
DISTRIBUTED=True,
CLUSTER=False,
n_learner_actor_workers=20, # 20 workers
# regulate exports
export_each_net=False,
checkpoint_freq=99999999,
eval_agent_export_freq=1, # produces around 15GB over 150 iterations!
n_actions_traverser_samples=3, # = external sampling in FHP
n_traversals_per_iter=15000,
n_batches_adv_training=4000,
mini_batch_size_adv=512, # *20=10240
init_adv_model="random",
use_pre_layers_adv=True,
n_cards_state_units_adv=192,
n_merge_and_table_layer_units_adv=64,
n_units_final_adv=64,
max_buffer_size_adv=2e6, # *20 LAs = 40M
lr_adv=0.001,
lr_patience_adv=99999999, # No lr decay
n_batches_avrg_training=20000,
mini_batch_size_avrg=1024, # *20=20480
init_avrg_model="random",
use_pre_layers_avrg=True,
n_cards_state_units_avrg=192,
n_merge_and_table_layer_units_avrg=64,
n_units_final_avrg=64,
max_buffer_size_avrg=2e6,
lr_avrg=0.001,
lr_patience_avrg=99999999, # No lr decay
# With the H2H evaluator, these two are evaluated against eachother.
eval_modes_of_algo=(
EvalAgentDeepCFR.EVAL_MODE_AVRG_NET, EvalAgentDeepCFR.EVAL_MODE_SINGLE
),
log_verbose=True,
game_cls=Flop5Holdem,
# enables simplified obs. Default works also for 3+ players
use_simplified_headsup_obs=True,
h2h_args=H2HArgs(
n_hands=1500000, # this is per seat; so in total 3M hands per eval
),
),
# Evaluate Head-to-Head every 15 iterations of both players (= every 30 alternating iterations)
eval_methods={"h2h": 15},
# 150 = 300 when 2 viewing alternating iterations as 2 (as usually done).
# This repo implements alternating iters as a single iter, which is why this says 150.
n_iterations=150,
)
ctrl.run()