-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyse_solutions.py
76 lines (59 loc) · 2.24 KB
/
analyse_solutions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import matplotlib.pyplot as plt
import numpy as np
def str_to_list(sol):
sol_list = []
sol_count = 0
for char in sol:
if char != "_":
sol_list.append(char)
sol_count += 1
else:
sol_list[sol_count-1] = sol_list[sol_count-1] + "_"
return sol_list
def list_to_str(sol_list):
return "".join(sol_list)
def count_move_occurrences(moves):
move_count = {}
for move in moves:
move_count[move] = move_count.get(move, 0) + 1
return move_count
def find_chunks(solutions, chunk_size):
return [solutions[i:i + chunk_size]
for i in range(len(solutions) - chunk_size + 1)]
def get_chunk_freq():
solution_files = os.listdir(solutions_dir)
all_str_chunks = []
total_sol_count = 0
for sol_f in solution_files:
print("Reading: ", sol_f)
with open(solutions_dir + sol_f, 'r') as sol_file:
solutions = [line.strip() for line in sol_file.readlines()]
total_sol_count += len(solutions)
for sol in solutions:
chunks = find_chunks(str_to_list(sol), chunk_size)
str_chunks = list(map(list_to_str, chunks))
all_str_chunks += str_chunks
return count_move_occurrences(all_str_chunks), total_sol_count
# Config
solutions_dir = "solutions/"
chunk_size = 3 # Adjust to change shingle / chunk size (1 - 5)
min_size = 0 # Ignores frequencies less than this value
chunk_freq, total_sol_count = get_chunk_freq()
print("Processing {} solutions".format(total_sol_count))
print("Top 10 chunks:")
print(list(sorted(chunk_freq.items(), key=lambda x: x[1], reverse=True))[:20])
unique_moves, move_count = list(zip(*sorted(chunk_freq.items(), key=lambda x: x[1], reverse=True)))
move_count = [n for n in move_count if n > min_size]
unique_moves = unique_moves[:len(move_count)]
ranks = [n for n in range(1, len(unique_moves)+1)]
plt.bar(ranks, move_count)
# Zipfts Law
# plt.plot(ranks, [((1/(i+1))*c*2) for i, c in enumerate(move_count)], 'r-')
plt.xlabel("moves - (rank size={})".format(chunk_size))
plt.ylabel("frequency")
# Plot log variants
# plt.plot(np.log(ranks), np.log(move_count))
# plt.xlabel("log(rank) - (size={})".format(chunk_size))
# plt.ylabel("log(frequency)")
plt.show()