-
Notifications
You must be signed in to change notification settings - Fork 0
/
MCTS_cg_Best.m
442 lines (379 loc) · 16.7 KB
/
MCTS_cg_Best.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
%% MCTS.m is a monte carlos tree search that runs in ColorGraph.m
% This version of MCTS keeps track of the best found so far and reports
% that at the end
% really good tutorial https://www.youtube.com/watch?v=onBYsen2_eA
% if policy is incomplete, it does a random rollout for the remaining nodes
%% clean up
startTime=clock
COUNTRUNS=0;
%% Define the Problem
%Define the design tree that we are searching
% We start from a seed of no color, this is indexed as 1 in matlab (0
% in graphsynth)
% There are two alternating choices, 1) Select a node to add to, and 2)
% Select the color of the node ROYGBV (indexed 1-6 for simplicity)
% How big should out sample off of every node be?
sample=100;
% Create Space for recipe rows=node index, columns=[origin,color]
%different from ColorGraph.m which has 3 columns
rainbow=zeros(Nodes,3);
BESTrainbow=[];
BESTscore=[];
%% Initiate the Directed Graph
% I tried several structures, but I think the best way to do this is to use
% a digraph and plot properties to track data and results
% We will keep track of the index of the Source Node S and target node T
S=1;
% We need to make labels for the nodels
nodeLabel={'n0'};
nodeLabel{1,S+1}='r';
nodeLabel{1,S+2}='o';
nodeLabel{1,S+3}='y';
nodeLabel{1,S+4}='g';
nodeLabel{1,S+5}='b';
nodeLabel{1,S+6}='v';
% We are going to need to keep track of the sum score values
NodeTable=table([1;0;0;0;0;0;0],'VariableNames',{'Sum'});
% We need to create a digraph with edge weights, of Sum
decisionTree=digraph(S,2:7,[1,1,1,1,1,1],NodeTable);
% Create an array with Q scores for each node
NodeQ=[decisionTree.Nodes.Sum(2:length(decisionTree.Nodes.Sum))./...
decisionTree.Edges.Weight];
% % Generate a plot of the directed graph, but we don't need to until end
% TreeGraph=plot(decisionTree,'Layout','layered','NodeLabel',nodeLabel);
%% Create initial Policy
% We know that the first location has to be the seed
rainbow(1,2)=str2num(nodeLabel{S}(2:length(nodeLabel{S})));
% I have already expanded so I need to make a queue of Targets
% Create a queue of Targets
Tq=decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2);
% We know that the Targets are colors so we can skip logic
for Color=1:6
% Select Target node from the queue
T=Tq(Color);
% Record the Target node color
rainbow(1,3)=Color;
% We have filled in 1 row already so n starts at 2
n=2;
% Run Samples using pure random
for s=1:sample
% Simulate to generate rainbows
while n<Nodes+1
%Make a Random Graph
run PureRandom.m
%Count
n=n+1;
end
% Reset n position
n=2;
% Evaluate the generated rainbow graph
run ColorScore.m;
% Distance in 3d from target coordinate
Qp=sqrt(GraphScore(1)^2+GraphScore(2)^2+GraphScore(3)^2);
%Is it the best?
[BESTscore,BESTrainbow]=isbest(Qp,rainbow,BESTscore,BESTrainbow);
% Update Scores sums in decisionTree.Nodes.Sum
decisionTree.Nodes.Sum(T)=decisionTree.Nodes.Sum(T)+Qp;
% Update edges travelled
[nada,row]=intersect(decisionTree.Edges.EndNodes,[S,T],'rows');
decisionTree.Edges.Weight(row)=decisionTree.Edges.Weight(row)+1;
% Now we can follow this policy in the next section
S=1; % Make sure that S is one before starting Select
end
end
%% MCTS LOOP START - This is where the primary loop begins
% Loop until we can make a number of choices based on policy that define
% the whole graph
PolicyDepth=0;
while PolicyDepth<Nodes
%% SELECT - Select nodes as informed by the current policy
% We are starting from the beginning so reset n to 1
n=1;
CHECK=1;
S=1;
rainbow=zeros(Nodes,3);
rainbow(:,1)=[1:Nodes]';
rainbow(1,2)=str2double(nodeLabel{S}(2:length(nodeLabel{S})));
% Begin Select Policy Loop
% if targets node exist in the Graph, then use policy
while ~isempty(decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S)))
% Is the current source node a location or a color?
% if Source Node Label starts with "n" we need to pick the next color
if nodeLabel{S}(1)=='n'
% Make a queue of possible target nodes
Tq=decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2);
% Double check there are six options
if length(Tq)~= 6
errbox = msgbox('at line 116 length(Tq)~=6');
return
end
% Using the Tq indexes determine choice with best Q value
% Update Q scores for each node
NodeQ=[decisionTree.Nodes.Sum(2:length(decisionTree.Nodes.Sum))./...
decisionTree.Edges.Weight];
% Select the Q scores for Options in the queue
Options=NodeQ(Tq-1);
% Choose the best option
Choice=find(min(Options)==Options);
% Record Position in Rainbow
rainbow(n,3)=Choice;
% Update Source node
S=Tq(Choice);
% Count Policy Choices
PolicyDepth=PolicyDepth+1;
n=n+1;
else % if not "n" then we must be picking a position
% Make a queue of possible Target nodes
Tq=decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2);
% Double check that the number of options=n
if length(Tq)~= n
errbox = msgbox('at line 146 length(Tq)~=n');
return
end
% Using the Tq indexes determine choice with the best Q value
NodeQ=[decisionTree.Nodes.Sum(2:length(decisionTree.Nodes.Sum))./...
decisionTree.Edges.Weight];
% Select the Q scores for Options in the queue
Options=NodeQ(Tq-1);
% Choose the best option
Choice=find(min(Options)==Options);
% Record Position in Rainbow, indexed from 0 to Nodes
rainbow(n,2)=Choice-1;
% Update Source node
S=Tq(Choice);
end
% Count n
CHECK=CHECK+1;
% % % % % waitbar(PolicyDepth/Nodes);
if PolicyDepth==Nodes
currentTime=clock;
TIMEELAPSED=currentTime-startTime;
% Evaluate the generated rainbow graph
run ColorScore.m;
% Distance in 3d from target coordinate
Qp=sqrt(GraphScore(1)^2+GraphScore(2)^2+GraphScore(3)^2);
% Is it the best score?
[BESTscore,BESTrainbow]=isbest(Qp,rainbow,BESTscore,BESTrainbow);
if BESTscore~=Qp
rainbow=BESTrainbow;
Qp=BESTscore;
end
return
end
% TRACK TIME PER RUN
currentTime=clock;
TIMEELAPSED=currentTime-startTime;
disp(strcat('Time Elapsed: ',...
num2str(TIMEELAPSED(3)),'-days, ','_',...
num2str(TIMEELAPSED(4)),'-hours, ','_',...
num2str(TIMEELAPSED(5)),'-mins, ','_',...
num2str(TIMEELAPSED(6)),'-secs, ','_'))
end
if TIMEELAPSED(5)*60+TIMEELAPSED(6)>=TIMEOUT*60
%Use pure random to finish the graph
disp('Policy Incomplete')
if PolicyDepth<n
rainbow(n,3)=ceil(6*rand());
end
while n<Nodes+1
%Make a Random Graph
run PureRandom.m
%Count
n=n+1;
end
% Evaluate the generated rainbow graph
%run ColorScore_mcts.m;
run ColorScore.m;
% Distance in 3d from target coordinate
Qp=sqrt(GraphScore(1)^2+GraphScore(2)^2+GraphScore(3)^2);
if BESTscore~=Qp
rainbow=BESTrainbow;
Qp=BESTscore;
end
return
end
%% EXPAND - Add the next level under current source
% We need to double check that there are no targets of this source already
if isempty(decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S)))
% Is the current source node a location or a color?
% if Source Node Label starts with "n" we need to expand with colors
if nodeLabel{S}(1)=='n'
% Add the appropriate edge to the decision tree
decisionTree=addedge(decisionTree,S,length(nodeLabel)+1:...
length(nodeLabel)+6,ones(6,1));
% Record Appropriate Node Labels
nodeLabel{1,length(nodeLabel)+1}='r';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
nodeLabel{1,length(nodeLabel)+1}='o';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
nodeLabel{1,length(nodeLabel)+1}='y';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
nodeLabel{1,length(nodeLabel)+1}='g';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
nodeLabel{1,length(nodeLabel)+1}='b';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
nodeLabel{1,length(nodeLabel)+1}='v';
decisionTree.Nodes.Sum(length(nodeLabel))=0;
% Double check that source has 6 new targets
if length(decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2))~= 6
errbox = msgbox('err ln 179, failed target node generation');
return
end
else % if not "n" then we must expand with positions
% Add the appropriate edges to the decision tree
decisionTree=addedge(decisionTree,S,length(nodeLabel)+1:...
length(nodeLabel)+n,ones(n,1));
% Record appropriate node labels
for newLabel=1:n
nodeLabel{1,length(nodeLabel)+1}=strcat('n',num2str(newLabel-1));
%Add first pass to make positive
decisionTree.Nodes.Sum(length(nodeLabel))=0;
end
% Double check that source has n new targets
if length(decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2))~= n
errbox = msgbox('err ln 206, failed target node generation');
return
end
end
else
% if the node already has been expanded
errbox = msgbox('err ln 173, Source already expanded');
return
end
%% SIMULATE - Sample graphs randomly
% Create a queue of targets from the source
% Create a queue of Targets
Tq=decisionTree.Edges.EndNodes(find(decisionTree.Edges.EndNodes(:,1)==S),2);
% Is the current source node a location or a color?
% if Source Node Label starts with "n" we need to simulate from color
if nodeLabel{S}(1)=='n'
% Double check we are in the right place
if length(Tq)~= 6
errbox = msgbox('err ln 236, length(Tq)~=6');
return
end
% Go through all colors
for Color=1:6
% Select Target node from the queue
T=Tq(Color);
% Record Source node index
rainbow(n,2)=str2num(nodeLabel{S}(2:length(nodeLabel{S})));
% Record the Target node color
rainbow(n,3)=Color;
% Save n value for later as N
N=n;
% Run Samples using pure random
for s=1:sample
% Simulate to generate rainbows
while n<Nodes+1
%Make a Random Graph
run PureRandom.m
%Count
n=n+1;
end
% Reset n position
n=N;
%% EVALUATE COLOR
% Evaluate the generated rainbow graph
run ColorScore.m;
% Distance in 3d from target coordinate
Qp=sqrt(GraphScore(1)^2+GraphScore(2)^2+GraphScore(3)^2);
% Is it the best score?
[BESTscore,BESTrainbow]=isbest(Qp,rainbow,BESTscore,BESTrainbow);
% Update Scores sums in decisionTree.Nodes.Sum
decisionTree.Nodes.Sum(T)=decisionTree.Nodes.Sum(T)+Qp;
% Update edges travelled
[nada,row]=intersect(decisionTree.Edges.EndNodes,[S,T],'rows');
decisionTree.Edges.Weight(row)=decisionTree.Edges.Weight(row)+1;
%% BACK PROPAGATE
% step back by a target
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),T,'rows');
tempT=decisionTree.Edges.EndNodes(row,1);
while tempT>1
% stepback by a source
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),tempT,'rows');
tempS=decisionTree.Edges.EndNodes(row,1);
% Update Scores sum in decisionTree.Nodes.Sum
decisionTree.Nodes.Sum(tempT)=decisionTree.Nodes.Sum(tempT)+Qp;
% Update edges travelled
[nada,row]=intersect(decisionTree.Edges.EndNodes,[tempS,tempT],'rows');
decisionTree.Edges.Weight(row)=decisionTree.Edges.Weight(row)+1;
% step back by a target
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),tempT,'rows');
tempT=decisionTree.Edges.EndNodes(row,1);
end
end
end
else % if not "n" then we must simulate next positions
% Double check we are in the right place
if length(Tq)~= n
errbox = msgbox('err ln 282, length(Tq)~=n');
return
end
% Go through all locations
for Location=1:n
% Select a target node from the queue
T=Tq(Location);
% Record Target node index
rainbow(n,2)=str2num(nodeLabel{T}(2:length(nodeLabel{T})));
% Record a random color
rainbow(n,3)=ceil(6*rand());
% Save n value for later as N
N=n;
% Run Samples using pure random
for s=1:sample
% Simulate to generate rainbows
while n<Nodes+1
%Make a Random Graph
run PureRandom.m
%Count
n=n+1;
end
% Reset n position
n=N;
%% EVALUATE LOCATION
% Evaluate the generated rainbow graph
run ColorScore.m;
% Distance in 3d from target coordinate
Qp=sqrt(GraphScore(1)^2+GraphScore(2)^2+GraphScore(3)^2);
% Is it the best score?
[BESTscore,BESTrainbow]=isbest(Qp,rainbow,BESTscore,BESTrainbow);
% Update Scores sums in decisionTree.Nodes.Sum
decisionTree.Nodes.Sum(T)=decisionTree.Nodes.Sum(T)+Qp;
% Update edges travelled
[nada,row]=intersect(decisionTree.Edges.EndNodes,[S,T],'rows');
decisionTree.Edges.Weight(row)=decisionTree.Edges.Weight(row)+1;
%% BACK PROPAGATE
% step back by a target
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),T,'rows');
tempT=decisionTree.Edges.EndNodes(row,1);
while tempT>1
% stepback by a source
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),tempT,'rows');
tempS=decisionTree.Edges.EndNodes(row,1);
% Update Scores sum in decisionTree.Nodes.Sum
decisionTree.Nodes.Sum(tempT)=decisionTree.Nodes.Sum(tempT)+Qp;
% Update edges travelled
[nada,row]=intersect(decisionTree.Edges.EndNodes,[tempS,tempT],'rows');
decisionTree.Edges.Weight(row)=decisionTree.Edges.Weight(row)+1;
% step back by a target
[nada,row]=intersect(decisionTree.Edges.EndNodes(:,2),tempT,'rows');
tempT=decisionTree.Edges.EndNodes(row,1);
end
end
end
end
%% Reset n and Policy Depth
n=1;
PolicyDepth=0;
end
%% Best score function
function [BESTscore,BESTrainbow]=isbest(Qp,rainbow,BESTscore,BESTrainbow)
% Check if the best score
if isempty(BESTrainbow)||BESTscore>Qp
BESTscore=Qp;
BESTrainbow=rainbow;
% disp(strcat('BS: ',num2str(BESTscore),' - ',' Qp: ',num2str(Qp)))
% disp(rainbow);
end
end