This repository has been archived by the owner on Nov 10, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
transformer.py
227 lines (192 loc) · 7.85 KB
/
transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
"""Transformer for calculating plant height from as las file
"""
import copy
import datetime
import logging
import os
import laspy
import numpy as np
import terrautils.lemnatec
import configuration
import transformer_class
terrautils.lemnatec.SENSOR_METADATA_CACHE = os.path.dirname(os.path.realpath(__file__))
class __internal__():
"""Class for internal use only functions
"""
def __init__(self):
"""Initializes class instance
"""
@staticmethod
def cleanup_request_md(source_md: dict) -> dict:
"""Makes a copy of the source metadata and cleans it up for use as plot-level information
Arguments:
source_md: the source metadata to clone and clean up
Returns:
returns the cleaned up metadata
"""
if not source_md:
return {}
new_md = copy.deepcopy(source_md)
new_md.pop('list_files', None)
new_md.pop('context_md', None)
new_md.pop('working_folder', None)
return new_md
@staticmethod
def prepare_container_md(plot_name: str, plot_md: dict, key_name: str, source_file: str, result_files: list) -> dict:
"""Prepares the metadata for a single container
Arguments:
plot_name: the name of the container
plot_md: the metadata associated with this container
key_name: the name of the key related to the files
source_file: the name of the source file
result_files: list of files to add to container metadata
Return:
The formatted metadata
Notes:
The files in result_files are checked for existence before being added to the metadata
"""
cur_md = {
'name': plot_name,
'metadata': {
'replace': True,
'data': plot_md
},
'file': []
}
for one_file in result_files:
if os.path.exists(one_file):
cur_md['file'].append({
'path': one_file,
'key': key_name,
'metadata': {
'source': source_file,
'transformer': configuration.TRANSFORMER_NAME,
'version': configuration.TRANSFORMER_VERSION,
'timestamp': datetime.datetime.utcnow().isoformat(),
'plot_name': plot_name
}
})
return cur_md
def get_traits_table() -> tuple:
"""Returns the traits table information
Return:
Returns a tuple consisting of a list field names, and trait dictionary.
"""
# Compiled traits table
fields = ('local_datetime', 'canopy_height', 'access_level', 'species', 'site',
'citation_author', 'citation_year', 'citation_title', 'method')
traits = {'local_datetime' : '',
'canopy_height' : [],
'access_level': '2',
'species': 'Sorghum bicolor',
'site': [],
'citation_author': '"Zongyang, Li"',
'citation_year': '2016',
'citation_title': 'Maricopa Field Station Data and Metadata',
'method': 'Scanner 3d ply data to height'}
return (fields, traits)
def generate_traits_list(traits: dict) -> list:
"""Returns a list of trait values
Arguments:
traits: a dict containing the current trait data
Return:
A list of trait data
"""
# compose the summary traits
trait_list = [traits['local_datetime'],
traits['canopy_height'],
traits['access_level'],
traits['species'],
traits['site'],
traits['citation_author'],
traits['citation_year'],
traits['citation_title'],
traits['method']
]
return trait_list
def las_to_height(in_file: str, out_histogram_file: str = None) -> tuple:
"""Return a tuple of (height histogram, max height) from an LAS file.
Arguments:
in_file: the source LAS file
out_histogram_file: optional output file for the histogram data
Return:
A tuple of the height histogram and the maximum found height
"""
number_of_bins = 500
height_hist = np.zeros(number_of_bins)
las_handle = laspy.file.File(in_file)
z_data = las_handle.Z
if z_data.size == 0:
logging.info("No height data was loaded from las file: %s", in_file)
return height_hist, None
max_height = np.max(z_data)
height_hist = np.histogram(z_data, bins=number_of_bins, density=False)[0]
if out_histogram_file:
with open(out_histogram_file, 'w') as out_file:
out_file.write("bin,height_cm,count\n")
for idx, height in enumerate(height_hist):
out_file.write("%s,%s,%s\n" % (idx+1, "%s-%s" % (idx, idx+1), height))
return height_hist, max_height
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: dict) -> dict:
"""Performs the processing of the data
Arguments:
transformer: instance of transformer class
check_md: request specific metadata
transformer_md: metadata from previous runs of this transformer
full_md: the full request metadata
Return:
Returns a dictionary with the results of processing
"""
# pylint: disable=unused-argument
# Prepare local variables
start_timestamp = datetime.datetime.now()
plot_name = check_md['context_md'].get('plot_name')
if plot_name is None:
return {'code': -1000, 'error': "Plot name is missing from request metadata"}
# We only work with the first las file we find
container_md = []
maximum = 0
for one_file in check_md['list_files']():
if not os.path.splitext(one_file)[1].lower() == '.las':
continue
out_path = os.path.join(check_md['working_folder'], plot_name)
if not os.path.exists(out_path):
os.makedirs(out_path)
filename_base = os.path.splitext(os.path.basename(one_file))[0]
hist_csv = os.path.join(out_path, filename_base + '_histogram.csv')
out_csv = os.path.join(out_path, filename_base + '_canopyheight_bety.csv')
logging.debug("Calling las_to_height with source: '%s'", one_file)
logging.debug(" output histogram file: '%s'", str(hist_csv))
(_, maximum) = las_to_height(one_file, hist_csv)
if maximum is None:
msg = "LAS file has no height data: %s" % one_file
logging.warning(msg)
return {'code': 0, 'message': msg}
with open(out_csv, 'w') as csv_file:
(fields, traits) = get_traits_table()
csv_file.write(','.join(map(str, fields)) + '\n')
traits['canopy_height'] = str(maximum)
traits['site'] = plot_name
traits['local_datetime'] = check_md['timestamp']
trait_list = generate_traits_list(traits)
csv_file.write(','.join(map(str, trait_list)) + '\n')
# Prep the metadata for return
plot_md = __internal__.cleanup_request_md(check_md)
plot_md['plot_name'] = plot_name
container_md.append(
__internal__.prepare_container_md(plot_name, plot_md, configuration.TRANSFORMER_TYPE, one_file, [hist_csv, out_csv])
)
# We're only processing one file
break
if container_md:
return {
'code': 0,
'container': container_md,
configuration.TRANSFORMER_NAME:
{
'utc_timestamp': datetime.datetime.utcnow().isoformat(),
'processing_time': str(datetime.datetime.now() - start_timestamp),
'canopy_height': str(maximum)
}
}
return {'code': 0, 'message': "No LAS files were detected in the list of files to process"}