-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTournament.py
184 lines (156 loc) · 5.94 KB
/
Tournament.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from itertools import combinations
from random import shuffle
from dPrisoner import dPrisoner
from Prisoner import Prisoner
from randomPrisoner import randomPrisoner
from superPrisoner import superPrisoner
from ElGuason import ElGuason
import numpy as np
"""
Prisoners' dilemma tournament
"""
class Tournament():
"""
Initialize the tournament
Parameters
----------
competing: list of competing Prisoner subclasses
n_rounds: rounds per match
"""
def __init__(self, competing, n_rounds):
self.competing = competing
self.scores = len(competing)*[0]
self.n_rounds = n_rounds
"""
Score a single round
Parameters
----------
strategy1: bool
First Prisoner's strategy
strategy2: bool
Second Prisoner's strategy
Returns
-------
(score1, score2): (int, int)
(3,3) if both cooperate,
(1,1) if both defect, and
(5,0) or (0,5) if one cooperates and one defects
"""
def score(self, strategy1, strategy2):
if strategy1 and strategy2:
return (1, 1)
elif not strategy1 and strategy2:
return (2, -1)
elif strategy1 and not strategy2:
return (-1, 2)
else:
return (0, 0)
"""
Play a single match
Parameters
----------
prisoner1: subclass of Prisoner
First prisoner competing in the match
prisoner2: subclass of Prisoner
Second prisoner competing in the match
n_rounds: int, optional
Number of rounds in the match. If no value is
provided, the number of rounds defaults to
the default value for the tournament.
Returns
-------
(int, int): scores for prisoner1 and prisoner2
"""
def play_match(self, prisoner1, prisoner2, params,n_rounds = None,):
# Create instances of each prisoner
p1 = prisoner1(budget=params['budget'],window=params['window'],
p=params['p'],buffer_init=params['buffer_init'],k=4,
streak_size=params['streak_size'],tolerance=params['tolerance'],
initiative=params['initiative'],
update_tolerance=params['update_tolerance'],
update_initiative=params['update_initiative'],
op_initiative_low=0.2,op_initiative_high=0.8)
p2 = prisoner2(budget=0,window=5,
p=0.5,buffer_init=40,k=4,
streak_size=10,tolerance=8,
initiative=0.3,
update_tolerance=2,
update_initiative=0.9,
op_initiative_low=0.2,op_initiative_high=0.8)
# Initialize scores
score1 = 0
score2 = 0
# Play all rounds
if not n_rounds:
n_rounds = self.n_rounds
for n in range(n_rounds):
strategy1 = p1.pick_strategy()
strategy2 = p2.pick_strategy()
scores = self.score(strategy1, strategy2)
score1 += scores[0]
score2 += scores[1]
p1.process_results(strategy1, strategy2)
p2.process_results(strategy2, strategy1)
# Return scores
return (score1, score2)
"""
Play a round robin
"""
def round_robin(self,params):
# Create a list of all combinations of competing
matches = list(combinations(range(len(self.competing)), 2))
shuffle(matches)
# Play all matches
for match in matches:
(score1, score2) = self.play_match(
self.competing[match[0]],
self.competing[match[1]],
params)
self.scores[match[0]] += score1
self.scores[match[1]] += score2
def run(params):
competing = [superPrisoner,randomPrisoner]
a = Tournament(competing,300)
a.round_robin(params)
print(a.scores)
m=max(a.scores)
winners=[]
for i in range(len(competing)):
if a.scores[i] == m:
winners.extend([i])
print("ganadores: ",winners)
return winners, a.scores
# Asumo partidas de 300 rondas en promedio
#run({})
scores = []
strategy = []
results = []
for budget in [0,25,125]: # mas grande, más agresivo
for window in [5,10,20,40]: # ventana para iniciativa oponente, no debería ser tan grande
print(budget, window)
for p in [1,0.5]: # 1 mete toda la gancia al budget, 0.5 mete 1 cuando gana 2 unicamente
for buffer_init in [10,20,40,80]: # mas grande más conservador
for streak_size in [5,10,20,40]:
for tolerance in [1,2,4,8]:
for initiative in [0.3]:
for update_tolerance in [1.5,2]:
for update_initiative in [0.9]:
params ={'budget':budget,'window': window,'p' : p,
'buffer_init': buffer_init,'streak_size': streak_size,
'tolerance': tolerance, 'initiative': initiative,
'update_tolerance': update_tolerance,
'update_initiative': update_initiative}
s1 = 0
s2 = 0
seeds = np.arange(15,200,15)
for r in range(10):
np.random.seed(seeds[r])
_, s = run(params)
s1 += s[0]
s2 += s[1]
scores.append(-s1/10)
strategy.append(params)
results.append(s1 >= s2)
ix = np.argsort(scores)
top_10 = np.array(strategy)[ix][:50]
print(top_10, -1*np.sort(scores)[:50], np.array(results)[ix][:50])