Skip to content

Commit

Permalink
load module added
Browse files Browse the repository at this point in the history
  • Loading branch information
PennyHow committed May 22, 2024
1 parent 7a2f7e5 commit 0888382
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 95 deletions.
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
},
keywords="glaciology ice lake ESA",
classifiers=[
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand All @@ -30,6 +32,6 @@
package_dir={"": "src"},
packages=setuptools.find_packages(where="src"),
package_data={"griml.test": ["*"]},
python_requires=">=3.10",
python_requires=">=3.8",
install_requires=['geopandas', 'pandas', 'scipy', 'Shapely', 'rasterio'],
)
19 changes: 15 additions & 4 deletions src/griml/convert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,19 @@
import glob
from pathlib import Path

def convert(indir, outdir, proj, band_info, startdate, enddate, outfile=None):
def convert(indir, proj, band_info, startdate, enddate, outdir=None):
'''Compile features from multiple processings into one geodataframe
Parameters
----------
inlist : list
List of files or geopandas.dataframe.DataFrame objects to merge
Returns
-------
all_gdf : geopandas.dataframe.GeoDataFrame
Compiled goedataframe
'''

# Iterate through files
converted=[]
Expand All @@ -14,18 +26,17 @@ def convert(indir, outdir, proj, band_info, startdate, enddate, outfile=None):
print('\n'+str(count) + '. Converting ' + str(Path(i).name))

# Convert raster to vector
if outfile is not None:
if outdir is not None:
outfile = str(Path(outdir).joinpath(Path(i).stem+'.shp'))
g = raster_to_vector(str(i), proj, band_info, startdate, enddate, None)
print('Saved to '+str(Path(outfile).name))

else:
g = raster_to_vector(str(i), proj, band_info, startdate, enddate, outfile)
g = raster_to_vector(str(i), proj, band_info, startdate, enddate)

converted.append(g)
count=count+1

print('Finished')
return (converted)

if __name__ == "__main__":
Expand Down
64 changes: 49 additions & 15 deletions src/griml/filter/filter_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,72 @@
# -*- coding: utf-8 -*-

from griml.filter import filter_margin, filter_area
from griml.load import load
import geopandas as gpd
from pathlib import Path
import glob

def filter_vectors(indir, outdir, margin_buff):
def filter_vectors(inlist, margin_file, outdir=None, min_area=0.05):
'''Filter vectors by area and margin proximity
Parameters
----------
inlist : list
List of either file paths of GeoDataFrame objects to filter
margin_file : str, geopandas.GeoSeries
Bufferred margin to perform margin proximity filter
outdir : str, optional
Output directory to write files to
min_area: int, optional
Threshold area (sq km) to filter by
Returns
-------
filtered : list
List of filtered GeoDataFrame objects
'''

# Load margin
margin_buff = load(margin_file)

# Iterate through input list
count=1
for infile in list(glob.glob(indir)):
filtered=[]
for infile in inlist:

# Load and define name
if type(infile)==str:
print('\n'+str(count)+'/'+str(len(inlist)) +
': Filtering vectors in '+str(Path(infile).name))
name = str(Path(infile).stem)+"_filtered.shp"

else:
print('\n'+str(count)+'/'+str(len(inlist)))
name = 'lakes_' + str(count) + "_filtered.shp"

print('\n'+str(count)+'. Filtering vectors in '+str(Path(infile).stem))
vectors = gpd.read_file(infile)
vectors = load(infile)

vectors = filter_area(vectors)
# Perform filtering steps
vectors = filter_area(vectors, min_area)
print(f'{vectors.shape[0]} features over 0.05 sq km')

vectors = filter_margin(vectors, margin_buff)
print(f'{vectors.shape[0]} features within 500 m of margin')

# Retain and save if vectors are present after filtering
if vectors.shape[0]>0:
if outdir is not None:
vectors.to_file(outdir+str(Path(infile).stem)+"_filtered.shp")
vectors.to_file(Path(outdir).joinpath(name))
filtered.append(vectors)
else:
print('No vectors present after filter. Moving to next file.')
count=count+1



if __name__ == "__main__":
indir = "/home/pho/python_workspace/GrIML/other/iml_2017/vectors/*.shp"

infile_margin = "/home/pho/python_workspace/GrIML/other/datasets/ice_margin/gimp_icemask_line_polstereo_simple_buffer.shp"
print('Preparing ice margin buffer...')
margin_buff = gpd.read_file(infile_margin)
# margin_buff = margin.buffer(500)
# margin_buff = gpd.GeoDataFrame(geometry=margin_buff, crs=margin.crs)
infile1 = '../test/test_icemask.shp'
margin_buff = gpd.read_file(infile1)

filter_vectors(indir, margin_buff)
infile2 = ['../test/test_filter.shp']
filter_vectors(infile2, margin_buff)
1 change: 1 addition & 0 deletions src/griml/load/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from griml.load.load import *
80 changes: 80 additions & 0 deletions src/griml/load/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import geopandas as gpd
import pandas as pd
import os, errno
from pathlib import Path

def load(i):
'''Load vectors into appropriate format for processing
Parameters
----------
i : str, geopandas.geodataframe.GeoDataFrame, pandas.core.series.Series
Input vectors (either from file or vector object)
Returns
-------
out : geopandas.geodataframe.GeoDataFrame
Output vector object
'''
if is_string(i):
if is_filepath(i):
out = load_vector_from_file(i)
return out

else:
ValueError('Expected valid vector filepath, but instead got '+i)

elif is_geo_object(i):
out = i
return out

else:
TypeError('Expected str, geopandas.geodataframe.GeoDataFrame or pandas.core.series.Series object, but instead got '+str(type(i)))

def load_vector_from_file(infile):
'''Load vector object from file'''
gdf = gpd.read_file(infile)
return gdf

def is_string(n):
'''Check if input for loading is string'''
if type(n)==str:
return True
else:
return False

def is_filepath(n):
'''Check if string is valid file'''
if os.path.isfile(n):
return True
else:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), n)

def is_dir(n):
'''Check if string is valid directory'''
if os.path.isdir(str(Path(n).parent)):
return True
else:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), n)

def is_geo_object(typ):
'''Check if object is valid vector object'''
if type(typ)==gpd.geodataframe.GeoDataFrame or type(typ)==pd.core.series.Series:
return True
else:
return False

if __name__ == "__main__":
infile1 = '../test/test_icemask.shp'
test1 = load(infile1)

g = gpd.read_file(infile1)
test2 = load(g)

infile2 = '../test/test_icemsak.shp'
test2 = load(infile2)
90 changes: 53 additions & 37 deletions src/griml/merge/merge_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,24 @@
import geopandas as gpd
import pandas as pd
from glob import glob
from griml.load import load

def merge_vectors(feature_list, method_list, collection_list, start_date_list,
end_date_list, proj='EPSG:3413'):
def merge_vectors(inlist, outfile=None, proj='EPSG:3413'):
'''Compile features from multiple processings into one geodataframe
Parameters
----------
feature_list : list
List of shapely features
method_list : list
List of strings denoting processing method
collection_list : list
List of strings denoting dataset collection
date_list : list
List of start and end date for processing
inlist : list
List of files or geopandas.dataframe.DataFrame objects to merge
Returns
-------
all_gdf : geopandas.GeoDataFrame
all_gdf : geopandas.dataframe.GeoDataFrame
Compiled goedataframe
'''
feature, method, collection, start_date, end_date = _load_all(inlist)
dfs=[]
for a,b,c,d,e in zip(feature_list, method_list, collection_list, start_date_list, end_date_list):
for a,b,c,d,e in zip(feature, method, collection, start_date, end_date):
if a is not None:

#Construct geodataframe with basic metadata
Expand All @@ -48,10 +43,51 @@ def merge_vectors(feature_list, method_list, collection_list, start_date_list,

all_gdf['area_sqkm'] = all_gdf['geometry'].area/10**6
all_gdf['length_km'] = all_gdf['geometry'].length/1000
all_gdf = gpd.GeoDataFrame(all_gdf, geometry=all_gdf.geometry,
crs=proj)
# all_gdf = gpd.GeoDataFrame(all_gdf, geometry=all_gdf.geometry,
# crs=proj)

if outfile is not None:
print('Saving file...')
all_gdf.to_file(outfile)
print('Saved to '+str(outfile))

return all_gdf

def _load_all(inlist):
'''Load info from all features for merging
Parameters
----------
inlist : list
List of files or geodataframe.dataframe.DataFrame objects to load info
from
Returns
-------
feature_list : list
List of shapely features
method_list : list
List of strings denoting processing method
collection_list : list
List of strings denoting dataset collection
date_list : list
List of start and end date for processing
'''
features=[]
methods=[]
sources=[]
starts=[]
ends=[]

for f in inlist:
i = load(f)
features.append(list(i['geometry']))
methods.append(list(i['method']))
sources.append(list(i['source']))
starts.append(list(i['startdate']))
ends.append(list(i['enddate']))

return features, methods, sources, starts, ends

def _dissolve_vectors(gdf):
'''Dissolve overlapping polygons in a Pandas GeoDataFrame
Expand All @@ -72,26 +108,6 @@ def _dissolve_vectors(gdf):
return gdf2

if __name__ == "__main__":
indir = "/home/pho/python_workspace/GrIML/other/iml_2017/test/*.shp"
outfile = "/home/pho/python_workspace/GrIML/other/iml_2017/merged_vectors/griml_2017_inventory.shp"
features=[]
methods=[]
sources=[]
starts=[]
ends=[]

for f in list(glob(indir)):
i = gpd.read_file(f)

features.append(list(i['geometry']))
methods.append(list(i['method']))
sources.append(list(i['source']))
starts.append(list(i['startdate']))
ends.append(list(i['enddate']))
vectors = merge_vectors(features, methods, sources, starts, ends)

# features.append(i)
# vectors = pd.concat(features, ignore_index=True)


vectors.to_file(outfile)
infile1 = '../test/test_merge_1.shp'
infile2 = '../test/test_merge_2.shp'
vectors = merge_vectors([infile1,infile2])
Loading

0 comments on commit 0888382

Please sign in to comment.