forked from tensorflow/minigo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
selfplay_mcts.py
85 lines (72 loc) · 3.02 KB
/
selfplay_mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import sys
import time
from absl import flags
import coords
from gtp_wrapper import MCTSPlayer
def play(network, verbosity=0):
''' Plays out a self-play match, returning
- the final position
- the n x 362 tensor of floats representing the mcts search probabilities
- the n-ary tensor of floats representing the original value-net estimate
where n is the number of moves in the game'''
readouts = flags.FLAGS.num_readouts # defined in strategies.py
# Disable resign in 5% of games
if random.random() < 0.05:
resign_threshold = -1.0
else:
resign_threshold = None
player = MCTSPlayer(network,
verbosity=verbosity,
resign_threshold=resign_threshold)
player.initialize_game()
# Must run this once at the start, so that noise injection actually
# affects the first move of the game.
first_node = player.root.select_leaf()
prob, val = network.run(first_node.position)
first_node.incorporate_results(prob, val, first_node)
while True:
start = time.time()
player.root.inject_noise()
current_readouts = player.root.N
# we want to do "X additional readouts", rather than "up to X readouts".
while player.root.N < current_readouts + readouts:
player.tree_search()
if (verbosity >= 3):
print(player.root.position)
print(player.root.describe())
if player.should_resign():
player.set_result(-1 * player.root.position.to_play,
was_resign=True)
break
move = player.pick_move()
player.play_move(move)
if player.root.is_done():
player.set_result(player.root.position.result(), was_resign=False)
break
if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
print("Q: {:.5f}".format(player.root.Q))
dur = time.time() - start
print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
if verbosity >= 3:
print("Played >>",
coords.to_kgs(coords.from_flat(player.root.fmove)))
if verbosity >= 2:
print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
print(player.root.position,
player.root.position.score(), file=sys.stderr)
return player