-
Notifications
You must be signed in to change notification settings - Fork 0
/
osm_merge_lats_lons_with_way_relation_ids.pl
387 lines (287 loc) · 12.7 KB
/
osm_merge_lats_lons_with_way_relation_ids.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
#--------------------------------------------------
# osm_merge_lats_lons_with_way_relation_ids.pl
#--------------------------------------------------
# (c) Copyright 2022-2023 by Richard Fobes at SolutionsCreative.com
# Permission to copy and use and modify this
# software is hereby given to individuals and to
# businesses with ten or fewer employees if this
# copyright notice is included in all copies
# and modified copies.
# All other rights are reserved.
# Businesses with more than ten employees are
# encouraged to contract with small businesses
# to supply the service of running this software
# if there are arrangements for either business
# to make donations to support the Open Street
# Map project.
# Disclaimer of Warranty: THERE IS NO WARRANTY
# FOR THIS SOFTWARE. THE COPYRIGHT HOLDER PROVIDES
# THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY
# KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
# BUT NOT LIMITED TO, THE FITNESS FOR A
# PARTICULAR PURPOSE.
# Limitation of Liability: IN NO EVENT WILL THE
# COPYRIGHT HOLDER BE LIABLE TO ANYONE FOR
# DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
# INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
# OUT OF THE USE OR INABILITY TO USE THE SOFTWARE.
#
#
# Merges the latitude and longitude locations of
# city or business nodes with city or business way
# and relation IDs so that the centers of
# bounding boxes can be calculated.
#
# Usage:
#
# perl osm_merge_lats_lons_with_way_relation_ids.pl
#--------------------------------------------------
# Input format:
#
# The first two digits must match the ending two
# digits in the node number, and the file must be
# sorted by the two digits so that the same two
# digits do not appear later in the file.
# 00 n1017500 w574529215
# 00 n1077800 w440112538
# 00 n1077900 w86163852
# 00 n1078100 w440112444
# 00 n1078200 w440112444
# 01 n1017501 w574529215
# 01 n1077801 w440112538
# 01 n1077901 w86163852
# 01 n1078101 w440112444
# 01 n1078201 w440112444
#--------------------------------------------------
# Output format:
#
# w89737702 10585974561 10162359387
# w52120510 10483886317 10109156389
# w73908440 10599808984 10303269180
# w180860990 10599866293 10303573954
# r769650408 10586676239 10334573989
#--------------------------------------------------
# Specify Linux or Windows path style.
# $slash_or_backslash = "\\" ; # windows
$slash_or_backslash = "/" ; # linux
#--------------------------------------------------
# Open the input file that contains the node and
# way pairs and the node and relation pairs. Each
# line is preceded by the ending two digits of the
# node ID. The ending digits were used to sort
# the lines based on the ending two digits.
$input_filename = 'output_sorted_node_and_way_or_relation_pairs.txt' ;
print "input filename: " . $input_filename . "\n" ;
open( IN_FILE , '<' , $input_filename ) ;
#--------------------------------------------------
# Open the output files.
$output_filename = 'output_outline_ways_relations_with_lats_lons.txt' ;
print "output filename: " . $output_filename . "\n" ;
open( OUT_FILE, '>' , $output_filename ) or die $! ;
$output_filename = 'output_log_from_merge_lats_lons.txt' ;
print "output filename: " . $output_filename . "\n" ;
open( OUT_LOG, '>' , $output_filename ) or die $! ;
#--------------------------------------------------
# Begin a loop that gets the next node and way
# pair, or the next node and relation pair.
$node_number_as_text = "" ;
$ending_two_digits_as_text = "00" ;
while ( $input_line = <IN_FILE> )
{
chomp( $input_line ) ;
if ( $input_line =~ /^[0-9][0-9] +n([0-9]+([0-9][0-9])) +([wr][0-9]+)/ )
{
$next_node_number_as_text = $1 ;
$next_ending_two_digits_as_text = $2 ;
$next_way_or_relation_id = $3 ;
} else
{
print OUT_LOG "unrecognized: [" . $input_line . "]" . "\n" ;
$count_of_lines_not_recognized ++ ;
next ;
}
#--------------------------------------------------
# If the ending two digits have changed, get and
# write the latitudes and longitudes for the nodes
# that end with the previous ending two digits,
# and then clear the associative array for the
# next ending two digits.
if ( $next_ending_two_digits_as_text ne $ending_two_digits_as_text )
{
&get_and_write_latitudes_longitudes_for_ways_relations( ) ;
%ways_relations_that_include_truncated_node = ( ) ;
}
#--------------------------------------------------
# Begin to handle the new node and way pair.
$node_number_as_text = $next_node_number_as_text ;
$ending_two_digits_as_text = $next_ending_two_digits_as_text ;
$way_or_relation_id = $next_way_or_relation_id ;
#--------------------------------------------------
# If the node number is short, pad it with leading
# zeros.
$character_length = length( ( $node_number_as_text . "" ) ) ;
if ( $character_length < 4 )
{
$node_number_as_text = substr( "0000" , 0 , ( 4 - $character_length ) ) . $node_number_as_text ;
}
#--------------------------------------------------
# Append the way ID number or relation ID number
# to a sub-list that is associated with this node
# ID. The sub-list lists the ways and relations
# that include that node in their list of points
# (nodes). The index to the main list of nodes
# uses a truncated node ID because the last two
# digits are the same as a result of handling
# all such node ID endings together (before
# progressing to the next ID ending).
# Omitting the last two unchanging digits
# increases processing speed (and uses less
# memory).
$node_number_truncated = substr( $node_number_as_text , 0 , ( length( $node_number_as_text . "" ) - 2 ) ) ;
$ways_relations_that_include_truncated_node{ $node_number_truncated } = $ways_relations_that_include_truncated_node{ $node_number_truncated } . $way_or_relation_id . " " ;
# print OUT_LOG "[" . $node_number_as_text . "][" . $node_number_truncated . "][" . $way_or_relation_id . "][" . $ways_relations_that_include_truncated_node{ $node_number_truncated } . "]" . "\n" ;
#--------------------------------------------------
# Repeat the loop to handle the next line in the
# input file.
}
#--------------------------------------------------
# Handle the final group of node ID numbers, which
# have 99 as the ending two digits.
&get_and_write_latitudes_longitudes_for_ways_relations( ) ;
#--------------------------------------------------
# Write some log info including a list of the
# nodes that were not found.
foreach $node_number_truncated ( keys( %ways_relations_that_include_truncated_node ) )
{
# print OUT_LOG "[" . $ways_relations_that_include_truncated_node{ $node_number_truncated } . "]" . "\n" ;
if ( $ways_relations_that_include_truncated_node{ $node_number_truncated } !~ /found/ )
{
print OUT_LOG "not found: n" . $node_number_truncated . $ending_two_digits_as_text . "\n" ;
$count_of_nodes_not_found ++ ;
}
}
print "skipped " . $count_of_lines_not_recognized . " input lines not recognized" . "\n" ;
print "counted " . $count_of_nodes_not_found . " node IDs not found" . "\n" ;
#--------------------------------------------------
# End of main code.
exit( ) ;
#--------------------------------------------------
# Subroutine that gets latitudes and longitudes
# and writes to the output file.
sub get_and_write_latitudes_longitudes_for_ways_relations
{
#--------------------------------------------------
# If testing is being done and there are no
# node IDs that have the specified ending two
# digits, return without reading the associated
# latitude and longitude file.
if ( keys( %ways_relations_that_include_truncated_node ) < 1 )
{
return ;
}
#--------------------------------------------------
# Create an integer that represents the ending two
# digits in bcd -- binary coded decimal -- format.
# The decimal number 48 is the ascii code
# for the digit zero (0).
$first_of_ending_two_digits_as_text = substr( $ending_two_digits_as_text , 0 , 1 ) ;
$second_of_ending_two_digits_as_text = substr( $ending_two_digits_as_text , 1 , 1 ) ;
$first_of_ending_two_digits_in_bcd_format = ord( $first_of_ending_two_digits_as_text ) - 48 ;
$second_of_ending_two_digits_in_bcd_format = ord( $second_of_ending_two_digits_as_text ) - 48 ;
$ending_two_digits_in_bcd_format = ( $first_of_ending_two_digits_in_bcd_format * 16 ) + $second_of_ending_two_digits_in_bcd_format ;
#--------------------------------------------------
# Open the input file that contains the nodes,
# latitudes, and longitudes in packed binary
# format. It is specific to the current ending
# two digits of node numbers.
$input_filename = 'lats_lons_in_groups' . $slash_or_backslash . 'output_packed_lats_lons_in_group_' . $ending_two_digits_as_text . '.bin' ;
open( IN_FILE_BINARY, '<:raw' , $input_filename ) or die $! ;
print "binary in: " . $input_filename . "\n" ;
#--------------------------------------------------
# Begin a loop that handles each group of nine
# packed integers from the input file. These
# integers hold the node ID and latitude and
# longitude for one node.
$progress_counter = 0 ;
$log_line_counter = 0 ;
while( read( IN_FILE_BINARY , $bytes , 18 ) )
{
$digits_node_lat_lon = unpack( "h36" , $bytes ) ;
#--------------------------------------------------
# Get the node number. The final
# two digits are already known to match the
# ending digits being handled.
# The leading zeros must be removed before
# checking the truncated version with a match.
$node_number_full = substr( $digits_node_lat_lon , 0 , 12 ) ;
$node_number_truncated = substr( $node_number_full , 0 , 10 ) ;
$node_number_truncated =~ s/^0+// ;
if ( length( $node_number_truncated ) == 0 )
{
$node_number_truncated = "0" ;
}
# print OUT_LOG "-[" . $node_number_full . "][" . $node_number_truncated . "]" . "\n" ;
#--------------------------------------------------
# Display progress.
$progress_counter ++ ;
if ( $progress_counter > 10000000 )
{
$progress_counter = 0 ;
$log_line_counter ++ ;
print "handled next 10,000,000 nodes (" . $log_line_counter . ")" . "\n" ;
}
#--------------------------------------------------
# If this node is not of interest, restart the
# node loop to handle the next node.
if ( not( exists( $ways_relations_that_include_truncated_node{ $node_number_truncated } ) ) )
{
next ;
}
#--------------------------------------------------
# Get the latitude and longitude integers as text.
# They are 11 digits in length. Ignore the first
# digit in each group of 12 digits.
$latitude = substr( $digits_node_lat_lon , 13 , 11 ) ;
$longitude = substr( $digits_node_lat_lon , 25 , 11 ) ;
# print OUT_LOG "latitude and longitude: " . $latitude . " " . $longitude . "\n" ;
#--------------------------------------------------
# For each of the way IDs that include this node,
# write the way ID and the latitude and longitude
# for this node. The node ID is not included.
# Later steps (in other scripts) will find the
# maximum and minimum latitude and longitude for
# each way, and these define the bounding box for
# this way ID. When there is a duplicate way
# or relation ID in the list, omit the
# duplicates.
$list_of_way_or_relation_ids_as_text = $ways_relations_that_include_truncated_node{ $node_number_truncated } ;
if ( index( $list_of_way_or_relation_ids_as_text , " " ) < 0 )
{
@list_of_way_or_relation_ids = ( $list_of_way_or_relation_ids_as_text ) ;
} else
{
@list_of_way_or_relation_ids = split( / / , $list_of_way_or_relation_ids_as_text ) ;
}
$previous_way_or_relation_id = "" ;
foreach $way_or_relation_id ( @list_of_way_or_relation_ids )
{
if ( ( $way_or_relation_id =~ /^ *$/ ) || ( $way_or_relation_id eq $previous_way_or_relation_id ) )
{
next ;
}
print OUT_FILE $way_or_relation_id . " " . $latitude . " " . $longitude . "\n" ;
$previous_way_or_relation_id = $way_or_relation_id ;
}
$ways_relations_that_include_truncated_node{ $node_number_truncated } .= " found" ;
#--------------------------------------------------
# Repeat the loop to consider the next node in
# the binary file that contains the node's
# latitude and longitude numbers.
}
#--------------------------------------------------
# End of subroutine.
close( IN_FILE_BINARY ) ;
return ;
}
#--------------------------------------------------
# End of code.