forked from LoicGerber/kNN-Synthetic-Image-Generation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convertRawDataToStructure.m
141 lines (125 loc) · 5.4 KB
/
convertRawDataToStructure.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
function rawData = convertRawDataToStructure(targetVar,climateVars,addVars,rawDir,inputDir)
%
%
%
% REDO DOCUMENTATION
%
%
%
% Convert NetCDF data to structure
% Input directory containing NetCDF files
% files must be named with the variable. Example: precipitation data must
% be called pre.nc
% Check if output directories exist, if not create them
if ~exist(inputDir, 'dir')
mkdir(inputDir)
end
varsAll = [targetVar climateVars addVars];
fields = [];
rawData = struct();
rawData = structfun(@(x) [], rawData, 'UniformOutput', false);
% Get a list of all NetCDF files in the input directory
files = dir(fullfile(rawDir, '*.nc'));
if ~isempty(files)
for i = 1:length(files)
% Open the NetCDF file
ncfile = fullfile(rawDir, files(i).name);
[~, varName, ~] = fileparts(ncfile);
if ismember(lower(string(varName)),lower(varsAll))
varnameID = strcat(lower(varName),'Index');
disp(strcat(" Processing '", varName,"' data..."))
ncid = netcdf.open(ncfile,'NC_NOWRITE'); % Open NetCDF in read-only
% Get the variable IDs and dimensions
varid = netcdf.inqVarID(ncid,varName); % Return ID associated with variable name
[~,~,dimids,~] = netcdf.inqVar(ncid,varid); % returns 4 outputs, 1-2-4 are ignored (1-var name,2-data type,3-dimension IDs,4-num of var attributes)
dims = cell(length(dimids),1);
for j=1:length(dimids)
[dims{j,1}, dims{j,2}] = netcdf.inqDim(ncid,dimids(j));
end
% Get the time variable and convert to MATLAB datenum
time_varid = netcdf.inqVarID(ncid,'time');
time_data = netcdf.getVar(ncid,time_varid);
time_units = netcdf.getAtt(ncid,time_varid,'units');
if strlength(time_units) > 21
time_origin = datetime(strrep(time_units,'days since ',''),'InputFormat','yyyy-MM-dd hh:mm:ss');
else
time_origin = datetime(strrep(time_units,'days since ',''),'InputFormat','yyyy-MM-dd');
end
time_datenum = datenum(time_origin + days(time_data));
% Get the variable data for each day and store in a cell array
num_days = length(time_datenum);
data = cell(num_days,1);
dates = cell(num_days,1);
for j=1:num_days
day_start = time_datenum(j);
day_end = day_start + 1;
time_idx = find(time_datenum >= day_start & time_datenum < day_end);
data{j} = netcdf.getVar(ncid,varid,[0 0 time_idx-1],[dims{1,2} dims{2,2} 1],'single');
dates{j} = datestr(day_start,'yyyymmdd');
end
data = cellfun(@transpose,data,'UniformOutput',false);
dates = str2double(dates);
% Create the output structure
rawData.(lower(varName)) = data;
rawData.(varnameID) = dates;
else
disp(strcat(" '",varName,"' not in variables of interest, file not processed..."))
end
end
fields = string(fieldnames(rawData))';
fields = fields(ismember(fields,lower(varsAll)));
end
if isempty(files) || numel(fields) < numel(varsAll)
% Get a list of all GeoTIFF files in the input directory
varsTiff = varsAll(~ismember(lower(varsAll),lower(fields)));
for k = 1:numel(varsTiff)
varName = varsTiff(k);
files = dir(fullfile(rawDir, varName, '*.tif'));
if isempty(files)
error('No GeoTIFF files in rawDir... Missing variable files or unknown format.')
end
data = cell(length(files),1);
dates = nan(length(files),1);
varPrevious = [];
j = 0;
for i = 1:length(files)
j = j + 1;
% Open the GeoTIFF file
tiffFile = fullfile(rawDir, varName, files(i).name);
%[~, filename, ~] = fileparts(tiffFile);
[~, dateStr, ~] = fileparts(tiffFile);
% Split the filename into variable and date
%parts = split(filename, '_');
%varName = parts{1};
%dateStr = parts{2};
if ismember(lower(string(varName)), lower(varsAll))
if ~strcmp(varName,varPrevious)
if ~isempty(varPrevious)
% Create the output structure
varnameID = strcat(varPrevious, 'Index');
data = data(~cellfun('isempty',data));
dates = dates(~isnan(dates));
rawData.(varPrevious) = data;
rawData.(varnameID) = dates;
end
disp(strcat(" Processing '", varName, "' data..."))
j = 1;
end
% Read the variable data
data{j,1} = single(readgeoraster(tiffFile));
% Convert date string to numeric format
dates(j,1) = str2double(dateStr);
elseif ~strcmp(varName,varPrevious)
disp(strcat(" '", varName, "' not in variables of interest, file not processed..."))
end
varPrevious = varName;
end
% Create the output structure
varnameID = strcat(lower(varName), 'Index');
data = data(~cellfun('isempty',data));
dates = dates(~isnan(dates));
rawData.(lower(varName)) = data;
rawData.(varnameID) = dates;
end
end
end