-
Notifications
You must be signed in to change notification settings - Fork 1
/
CFRplus.py
112 lines (83 loc) · 3.13 KB
/
CFRplus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from KuhnPoker import *
from treelib import Node, Tree
from CfrNode import CfrNode
from GameTree import GameTree
from matplotlib import pyplot as plt
import Utils
import math
from collections import Counter
from math import sqrt
import random
import time
import pandas as pd
from NodeEstimator import Estimator
class CFRtrainer:
def __init__(self):
self.playerOneTree = GameTree(CfrNode)
self.playerTwoTree = GameTree(CfrNode)
self.kuhn = KuhnPoker()
self.stats = Counter()
#self.alpha = alpha
self.trainigXdata = []
self.trainigYdata = []
self.hists = []
self.avgStr = []
self.stratSum = 0
self.iter = 0
self.betRegrets = []
def CFR(self, p0, p1):
curPlayer = self.kuhn.GetCurrentPlayer()
if(self.kuhn.IsTerminateState()):
return self.kuhn.GetPayoff(curPlayer)
curPlayerProb = p0 if curPlayer == Players.one else p1
opProb = p1 if curPlayer == Players.one else p0
tree = self.playerOneTree if curPlayer == Players.one else self.playerTwoTree
cfrNode = tree.GetOrCreateDataNode(self.kuhn, curPlayer)
strategy = cfrNode.GetStrategy(curPlayerProb)
util = [0.0] * NUM_ACTIONS
nodeUtil = 0
infosetStr = self.kuhn.GetInfoset(curPlayer)
infosetBackup = self.kuhn.SaveInfoSet()
for action in range(NUM_ACTIONS):
self.kuhn.MakeAction(action)
if(curPlayer == Players.one):
util[action] += -self.CFR(p0 * strategy[action], p1)
else:
util[action] += -self.CFR(p0, p1 * strategy[action])
nodeUtil += strategy[action] * util[action]
self.kuhn.RestoreInfoSet(infosetBackup)
for action in range(NUM_ACTIONS):
regret = util[action] - nodeUtil
cfrNode.regretSum[action] = cfrNode.regretSum[action] + opProb * regret
if(('1 | uplayed;uplayed;uplayed' in infosetStr) and curPlayer == Players.one):
self.trainigXdata.append(np.array(strategy))
self.trainigYdata.append(nodeUtil)
self.betRegrets.append(cfrNode.regretSum[1])
self.stratSum += strategy[1]
self.avgStr.append(self.stratSum / (len(self.avgStr) + 1))
self.iter += 1
return nodeUtil
def running_mean(self, x, N):
cumsum = np.cumsum(np.insert(x, 0, 0))
return (cumsum[N:] - cumsum[:-N]) / N
def Train(self):
util = 0
cnt = 0
start_time = time.time()
results = []
# utils = []
for i in range(1, 500):
self.kuhn.NewRound()
curUtil = self.CFR(1, 1)
util += curUtil
if(cnt % 80 == 0):
results.append(util / i)
def CheckNash(self):
if (self.kuhn.IsPlayerOneCloseToNash(self.playerOneTree)):
print("Player one is in Nash")
else:
print("Player one is not in Nash")
if(self.kuhn.IsPlayerTwoCloseToNash(self.playerTwoTree)):
print("Player two is in Nash")
else:
print("Player two is not in Nash")