-
Notifications
You must be signed in to change notification settings - Fork 0
/
r2t_susa.py
156 lines (128 loc) · 5.26 KB
/
r2t_susa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""
STEP 3: Extract raster covered by polygon and calculate statistics. It is an
updated version of raster2table.py
30/03/2021
Variant 2
- use buffer and remove polygons that are too small
- calculate 75 percentile instead of mean
- add pixel count for each polygon
- calculate monthly statistics and interpolate where value is missing
NOTE: Before you can start extracting data from rasters, the rasters have to be
stacked into a single GeoTIFF file (see stack_gtif.py)
"""
import time
from tqdm import tqdm
from shapely.geometry import mapping
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from os.path import join, basename
from os import makedirs
def r2t(pth_raster, pth_shp, dir_out, suffix):
"""Function creates shape file and csv file containing mean and standard
deviation of the underlain raster for each polygon.
Parameters
----------
pth_raster : str
Path to input raster.
pth_shp : str
Path to input shapefile.
dir_out : str
Path to directory for saving results.
suffix : str
Suffix to be added to results.
Returns
-------
Finished message.
"""
def ms_rio(geom, raster):
"""Returns subset of an array covered by the input polygon. The input
polygon has to be in the GeoJSON format. The crop attribute sets values
not covered by polygon to nan. All_touched is used to prevent empty rasters
for very thin polygons."""
out_image, _ = mask(raster, geom, crop=True, all_touched=True, nodata=np.nan)
return out_image
# Prepare output name for saving results to files
out_name = basename(pth_shp)[:-4] + "_" + suffix
# Read polygons to data frame
print("Reading shapefile...")
t = time.time()
tdf = gpd.read_file(pth_shp)
t = time.time() - t
print(f"Read SHP with GeoPandas: {t} seconds")
# # Write to / read from pickle for faster debugging
# pickle.dump(tdf, "df.p", "wb"))
# tmp_read = pickle.load(open("df.p", "rb"))
# # Uncomment for processing of a subset (useful for debugging)
# # tdf = tdf[tdf.REGIJA == 4].copy()
# tdf = tdf[0:1000].copy()
# Add buffer and reformat geometry to GeoJSON format (append a new column)
print("Adding buffer")
tdf["buffered"] = tdf["geometry"].buffer(-5)
tqdm.pandas(desc="geometry -> GeoJSON")
tdf["geom"] = tdf["buffered"].progress_apply(lambda g: [mapping(g)])
# # TEST WITHOUT BUFFER
# tqdm.pandas(desc="geometry -> GeoJSON2")
# tdf["geom2"] = tdf.geometry.progress_apply(lambda g: [mapping(g)])
# Remove polygons with 0 area after buffer
rows_a, _ = tdf.shape
tdf = tdf[tdf["buffered"].area > 0].copy()
rows_b, _ = tdf.shape
rows_removed = rows_a - rows_b
print(f"Filtering... Removed {rows_removed} rows")
# Extract rasters, and calculate mean & std (each stored to a new column)
src = rasterio.open(pth_raster)
tqdm.pandas(desc="Extracting arrays")
tdf["hand_rst"] = tdf.geom.progress_apply(lambda g: ms_rio(g, src))
# # TEST WITHOUT
# tqdm.pandas(desc="Extracting arrays2")
# tdf["hand_rst2"] = tdf.geom2.progress_apply(lambda g: ms_rio(g, src))
src.close()
# Count valid pixels
tqdm.pandas(desc="Counting pixels")
tdf["pix_count"] = tdf.hand_rst.progress_apply(lambda g: int(np.count_nonzero(~np.isnan(g), axis=(1, 2))))
# # Calculate 75th percentile by dates
# tqdm.pandas(desc="75th percentile")
# tdf["by_dates"] = tdf.hand_rst.progress_apply(lambda g: np.nanpercentile(g, 75, axis=(1, 2)))
# print(" Splitting LIST to COLUMNS...")
# # Create list of column names (dates of individual images)
# with open("c:\\susa\\datumi_ZV2017.txt") as file:
# dates = [suffix + "_" + x.rstrip('\n') for x in file]
# tdf[dates] = gpd.GeoDataFrame(tdf.by_dates.tolist(), index=tdf.index)
# tdf = tdf.drop(columns=["by_dates"])
# Calculate aggregated mean (HAND INDEX)
tqdm.pandas(desc="Mean")
index_tag = suffix + "_mean"
tdf[index_tag] = tdf.hand_rst.progress_apply(lambda g: np.nanmean(g))
# Caluclate Standard Deviation (HAND INDEX)
tqdm.pandas(desc="Std")
tdf["hand_std"] = tdf.hand_rst.progress_apply(lambda g: np.nanstd(g))
# Save shapefile (drop GeoJSON and raster columns)
print("Saving SHP...")
makedirs(dir_out, exist_ok=True)
out_shp = join(dir_out, out_name + ".shp")
tdf = tdf.drop(columns=["buffered", "geom", "hand_rst"])
tdf.to_file(out_shp)
# Save CSV (also drop geometry)
print("Saving CSV...")
makedirs(dir_out, exist_ok=True)
out_csv = join(dir_out, out_name + ".csv")
tdf = tdf.drop(columns=["geometry"])
tdf.to_csv(out_csv)
return "DONE!"
if __name__ == "__main__":
# SET PATHS
# ---------
# Input raster
# geotiff = "c:\\susa\\ZV2017_NDVI_Cmask.tif"
geotiff = "p:\\ARRS Susa\\podatki\\hand_index\\hand_index_d96tm.tif"
# Input shape file
# shapefile = "c:\\susa\\ZV2017_d96tm_Slo_big10\\ZV2017_d96tm_Slo_big10.shp"
shapefile = "c:\\susa\\ARSKTRP_ZV_clean\\ZV2017_d96tm_clean.shp"
# Output folder and suffix to be added to SHP and CSV files
output_loc = "c:\\susa\\ZV2017_pc075"
suff = "hand"
print(f">>> Run for: {suff}")
result = r2t(geotiff, shapefile, output_loc, suff)
print(result)