-
Notifications
You must be signed in to change notification settings - Fork 1
/
convertStructureToQueryDates.m
223 lines (216 loc) · 10.4 KB
/
convertStructureToQueryDates.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
function [queryDates,learningDates,refValidation] = convertStructureToQueryDates(targetVar,targetDim,QdateStart,QdateEnd,learningDates,climateData,maxThreshold,validationPrep,optimPrep,outputTime,inputDir,outputDir,saveMats)
%
%
%
% REDO DOCUMENTATION
%
%
%
% Query dates - variable to be generated
disp(' Processing queryDates for all target variables...')
targetVarL = lower(targetVar);
datesAll = climateData.date;
learningDatesDate = learningDates.date;
%imgLength = size(learningDates{1,2}{1,1},1);
%imgWidth = size(learningDates{1,2}{1,1},2);
% Query dates and adapt Learning dates if Validation ON
if validationPrep == false && optimPrep == false % VALIDATION OFF
% Query dates are all dates in query window, without dates in Learning dates
if outputTime == 1 % daily
% Select the dates that are not in learningDates
r = find(datesAll>=QdateStart & datesAll<=QdateEnd);
if min(datesAll)>QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll)<QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
queryDates = setdiff(queryDates, learningDatesDate);
if isempty(queryDates)
error('Query dates match with learning dates, nothing to generate')
end
elseif outputTime == 2 % monthly
r = find(datesAll>=QdateStart & datesAll<=QdateEnd);
if min(datesAll)>QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll)<QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
% Convert dailyDates to a matrix of year, month, and day components
dateVec = datevec(datetime(datesAll,'ConvertFrom','yyyyMMdd'));
% Find the indices of the dates where the day component is the last day of the month
lastDays = find(dateVec(:,3) == eomday(dateVec(:,1), dateVec(:,2)));
% Select the dates that are not in learningDates
queryDates = setdiff(queryDates(lastDays), learningDatesDate);
if isempty(queryDates)
error('Query dates match with learning dates, nothing to generate')
end
elseif outputTime == 3 % dekadal
r = find(datesAll >= QdateStart & datesAll <= QdateEnd);
if min(datesAll) > QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll) < QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
% Convert dailyDates to a matrix of year, month, and day components
dateVec = datevec(datetime(queryDates, 'ConvertFrom', 'yyyyMMdd'));
% Find the indices of the dates for the 10th, 20th, and the last day of the month
selectedDays = sort([find(dateVec(:,3) == 10); find(dateVec(:,3) == 20); find(dateVec(:,3) == eomday(dateVec(:, 1), dateVec(:, 2)))]);
% Select the dates that are not in learningDates
queryDates = setdiff(queryDates(selectedDays), learningDatesDate);
if isempty(queryDates)
error('Query dates match with learning dates, nothing to generate...')
end
else
error('Invalid outputTime value')
end
refValidation = [];
elseif validationPrep == true || optimPrep == true % validation or optimPrep ON
% Query dates are all dates in query window, replacing dates in Learning dates
for j = 1:numel(targetVarL)
if ~exist(outputDir,'dir')
mkdir(outputDir)
end
end
if outputTime == 1 % daily
% Select the dates that are not in learningDates
r = find(datesAll>=QdateStart & datesAll<=QdateEnd);
if min(datesAll)>QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll)<QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
elseif outputTime == 2 % monthly
r = find(datesAll>=QdateStart & datesAll<=QdateEnd);
if min(datesAll)>QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll)<QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
% Convert dailyDates to a matrix of year, month, and day components
dateVec = datevec(datetime(queryDates,'ConvertFrom','yyyyMMdd'));
% Find the indices of the dates where the day component is the last day of the month
lastDays = find(dateVec(:,3) == eomday(dateVec(:,1), dateVec(:,2)));
% Select the dates that are not in learningDates
queryDates = queryDates(lastDays);
elseif outputTime == 3 % dekadal
r = find(datesAll >= QdateStart & datesAll <= QdateEnd);
if min(datesAll) > QdateStart
error('Climate data first date > Query period start')
elseif max(datesAll) < QdateEnd
error('Climate data last date < Query period end')
end
queryDates = datesAll(r);
% Convert dailyDates to a matrix of year, month, and day components
dateVec = datevec(datetime(queryDates, 'ConvertFrom', 'yyyyMMdd'));
% Find the indices of the dates for the 10th, 20th, and the last day of the month
selectedDays = sort([find(dateVec(:,3) == 10); find(dateVec(:,3) == 20); find(dateVec(:,3) == eomday(dateVec(:, 1), dateVec(:, 2)))]);
% Select the dates that are not in learningDates
queryDates = queryDates(selectedDays);
else
error('Invalid outputTime value')
end
% Define learningDates as itself minus the query dates
ismem = ismember(learningDatesDate, queryDates);
learningDataValidation = learningDates(~ismem,:);
learningDatesValidation = learningDatesDate(ismem);
referenceValidation = {};
for j = 1:numel(targetVarL)
referenceValidation = [referenceValidation table2cell(learningDates(ismem,targetVarL(j)))];
if targetDim ~= 1
imagesRefValidation = nan(size(referenceValidation{1,j},1),size(referenceValidation{1,j},2),size(learningDatesValidation,1));
% Create matrix of reference dates
for i = 1:size(learningDatesValidation,1)
imagesRefValidation(:,:,i) = referenceValidation{i,j};
end
else
imagesRefValidation = nan(size(learningDatesValidation,1),1);
% Create matrix of reference dates
for i = 1:size(learningDatesValidation,1)
imagesRefValidation(i) = referenceValidation{i,j};
end
end
refValidation.(targetVarL(j)) = single(imagesRefValidation);
end
refValidation.date = learningDatesValidation;
if saveMats == true
disp(' Saving refValidation.mat file...')
save(fullfile(inputDir,'refValidation.mat'), 'refValidation', '-v7.3','-nocompression');
end
learningDates = learningDataValidation;
end
% Select closest targetVar index for each Query date
nearestIdx = nan(size(queryDates));
if validationPrep == false && optimPrep == false % validation OFF
for i = 1:numel(queryDates)
%[nearest, nearestIdx(i)] = min(abs(learningDatesDate - queryDates(i))); % find index of closest date
[nearest, nearestIdx(i)] = min(abs(datetime(learningDatesDate,'ConvertFrom','yyyymmdd') - datetime(queryDates(i),'ConvertFrom','yyyyMMdd')));
if days(nearest) > maxThreshold %%% MAX THRESHOLD <--------------------------------------------------------------------------------------------------------------------------------
nearestIdx(i) = nan;
end
end
% Match the dates
matchedTargetVarDates = [queryDates, nan(size(queryDates))];
for i = 1:length(nearestIdx)
if ~isnan(nearestIdx(i))
matchedTargetVarDates(i,2) = learningDatesDate(nearestIdx(i));
end
end
% Assign closest targetVar map to each Query date
% try
% matchedTargetVarTable = table('Size',[size(matchedTargetVarDates,1),numel(targetVar)+1], 'VariableTypes',{'double', 'cell'});
% catch
% try
% matchedTargetVarTable = table('Size',[size(matchedTargetVarDates,1),numel(targetVar)+1], 'VariableTypes',{'double', 'cell', 'cell'});
% catch
% error('Adapt ConvertStructureToQueryDates function to allow more variables')
% end
% end
for j = 1:numel(targetVarL)
targetVarData = learningDates.(targetVarL(j));
% Loop through the matched dates
for i = 1:size(matchedTargetVarDates, 1)
% Get the date to match
matchDate = matchedTargetVarDates(i, 2);
% If a match was found, add the date and data to the output table
if j == 1, matchedTargetVarTable{i, 1} = queryDates(i); end
if ~isnan(matchDate)
matchedTargetVarTable{i, j+1} = targetVarData(nearestIdx(i));
else
matchedTargetVarTable{i, j+1} = {nan};
end
end
end
elseif validationPrep == true || optimPrep == true % validation or optimPrep ON
matchedTargetVarDates = [queryDates, nan(size(queryDates))];
% try
% matchedTargetVarTable = table('Size',[size(matchedTargetVarDates,1),numel(targetVar)+1], 'VariableTypes',{'double', 'cell'});
% catch
% try
% matchedTargetVarTable = table('Size',[size(matchedTargetVarDates,1),numel(targetVar)+1], 'VariableTypes',{'double', 'cell', 'cell'});
% catch
% error('Adapt ConvertStructureToQueryDates function to allow more variables')
% end
% end
for j = 1:numel(targetVarL)
targetVarData = learningDates.(targetVarL(j));
% Loop through the matched dates
for i = 1:size(matchedTargetVarDates, 1)
% fill the table with NaNs the size of the variable to be generated
if j == 1, matchedTargetVarTable{i, j} = queryDates(i); end
matchedTargetVarTable{i, j+1} = {nan};
%matchedTargetVarTable{i, j+1} = {nan(size(targetVarData{1,1}))};
end
end
end
matchedTargetVarTable = cell2table(matchedTargetVarTable,"VariableNames",["date" targetVarL]');
queryDates = matchedTargetVarTable;
if saveMats == true
disp(' Saving Query dates, may take a while depending on input size...')
save(fullfile(inputDir,'queryDates.mat'), 'queryDates', '-v7.3','-nocompression');
end
end