Skip to content

Commit

Permalink
Merge pull request #9 from RJbalikian/regional_work
Browse files Browse the repository at this point in the history
Regional work
  • Loading branch information
RJbalikian authored Feb 15, 2024
2 parents cf2e0d6 + 04acd2f commit 1b1fc23
Show file tree
Hide file tree
Showing 29 changed files with 187,935 additions and 186,863 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
testnotebook.ipynb
cache/
w4h/__pycache__/

w4h/resources/sample_data/Output*
218 changes: 189 additions & 29 deletions docs/core.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
RTD_THEME=False #Not currently working
RUN_TESTS=True
LINT_IT=False
RELEASE_VERSION = "0.0.18"
RELEASE_VERSION = "0.0.19"

# Set the filepaths
currentDir = pathlib.Path((__file__)).parent
Expand Down
18 changes: 9 additions & 9 deletions docs/layers.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
from w4h import logger_function, verbose_print

#Function to Merge tables
def merge_tables(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
def merge_metadata(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
&#34;&#34;&#34;Function to merge tables, intended for merging metadata table with data table

Parameters
Expand Down Expand Up @@ -72,7 +72,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(merge_tables, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
verbose_print(merge_metadata, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
if header_df is None:
#Figure out which columns to include
if data_cols is None:
Expand Down Expand Up @@ -201,7 +201,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
layerList = range(1,layers+1)
res_list = []
resdf_list = []
print(df.columns)

#Generate Column names based on (looped) integers
for layer in layerList:
zStr = &#39;ELEV&#39;
Expand Down Expand Up @@ -1003,7 +1003,7 @@ <h2 id="returns">Returns</h2>
layerList = range(1,layers+1)
res_list = []
resdf_list = []
print(df.columns)

#Generate Column names based on (looped) integers
for layer in layerList:
zStr = &#39;ELEV&#39;
Expand Down Expand Up @@ -1110,8 +1110,8 @@ <h2 id="returns">Returns</h2>
return resdf_list</code></pre>
</details>
</dd>
<dt id="w4h.layers.merge_tables"><code class="name flex">
<span>def <span class="ident">merge_tables</span></span>(<span>data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs)</span>
<dt id="w4h.layers.merge_metadata"><code class="name flex">
<span>def <span class="ident">merge_metadata</span></span>(<span>data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs)</span>
</code></dt>
<dd>
<div class="desc"><p>Function to merge tables, intended for merging metadata table with data table</p>
Expand Down Expand Up @@ -1143,7 +1143,7 @@ <h2 id="returns">Returns</h2>
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def merge_tables(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
<pre><code class="python">def merge_metadata(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
&#34;&#34;&#34;Function to merge tables, intended for merging metadata table with data table

Parameters
Expand Down Expand Up @@ -1172,7 +1172,7 @@ <h2 id="returns">Returns</h2>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(merge_tables, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
verbose_print(merge_metadata, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
if header_df is None:
#Figure out which columns to include
if data_cols is None:
Expand Down Expand Up @@ -1250,7 +1250,7 @@ <h1>Index</h1>
<li><code><a title="w4h.layers.get_layer_depths" href="#w4h.layers.get_layer_depths">get_layer_depths</a></code></li>
<li><code><a title="w4h.layers.layer_interp" href="#w4h.layers.layer_interp">layer_interp</a></code></li>
<li><code><a title="w4h.layers.layer_target_thick" href="#w4h.layers.layer_target_thick">layer_target_thick</a></code></li>
<li><code><a title="w4h.layers.merge_tables" href="#w4h.layers.merge_tables">merge_tables</a></code></li>
<li><code><a title="w4h.layers.merge_metadata" href="#w4h.layers.merge_metadata">merge_metadata</a></code></li>
</ul>
</li>
</ul>
Expand Down
207 changes: 165 additions & 42 deletions docs/main.html

Large diffs are not rendered by default.

136 changes: 105 additions & 31 deletions docs/mapping.html
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,22 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
lidarURL = r&#39;https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&amp;service=WCS&#39;

#Read study area shapefile (or other file) into geopandas
def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, log=False, verbose=False):
def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, buffer=None, log=False, verbose=False):
&#34;&#34;&#34;Read study area geospatial file into geopandas

Parameters
----------
study_area_path : str or pathlib.Path
Filepath to any geospatial file readable by geopandas.
Polygon is best, but may work with other types if extent is correct.
crs : str, tuple, dict, optional
study_area_crs : str, tuple, dict, optional
CRS designation readable by geopandas/pyproj
buffer : None or numeric, default=None
If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
log : bool, default = False
Whether to log results to log file, by default False
verbose : bool, default=False
Whether to print status and results to terminal

Returns
-------
Expand All @@ -74,6 +78,13 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
studyAreaIN = gpd.read_file(study_area_path)
studyAreaIN.to_crs(study_area_crs, inplace=True)

if buffer is not None:
studyAreaIN = studyAreaIN.buffer(distance=buffer)
if verbose:
print(&#39;\tBuffer applied.&#39;)
if verbose:
print(&#34;\tStudy area read.&#34;)

return studyAreaIN

#Convert coords in columns to geometry in geopandas dataframe
Expand Down Expand Up @@ -155,7 +166,7 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
return gdfClip

#Function to sample raster points to points specified in geodataframe
def sample_raster_points(raster, points_df, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):
def sample_raster_points(raster, points_df, well_id_col=&#39;API_NUMBER&#39;, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):
&#34;&#34;&#34;Sample raster values to points from geopandas geodataframe.

Parameters
Expand All @@ -164,6 +175,8 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
Raster containing values to be sampled.
points_df : geopandas.geodataframe
Geopandas dataframe with geometry column containing point values to sample.
well_id_col : str, default=&#34;API_NUMBER&#34;
Column that uniquely identifies each well so multiple sampling points are not taken per well
xcol : str, default=&#39;LONGITUDE&#39;
Column containing name for x-column, by default &#39;LONGITUDE.&#39;
This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
Expand All @@ -186,27 +199,48 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>

if verbose:
verbose_print(sample_raster_points, locals(), exclude_params=[&#39;raster&#39;, &#39;points_df&#39;])
print(&#34;Sampling grid for {}.&#34;.format(new_col))
print(&#34;\tSampling {} grid for at all well locations.&#34;.format(new_col))

#Project points to raster CRS
rastercrsWKT=raster.spatial_ref.crs_wkt
points_df = points_df.to_crs(rastercrsWKT)
#if xcol==&#39;LONGITUDE&#39; and ycol==&#39;LATITUDE&#39;:
if rastercrsWKT != points_df.crs:
if verbose:
print(&#34;\tTemporarily reprojecting raster data to point data&#39;s CRS.&#34;)
pointCRS = points_df.crs.to_epsg()
if pointCRS is None:
pointCRS = points_df.crs.to_wkt()

raster = raster.rio.reproject(pointCRS)
#points_df = points_df.to_crs(rastercrsWKT)

xCOLOUT = xcol+&#39;_PROJ&#39;
yCOLOUT = ycol+&#39;_PROJ&#39;
points_df[xCOLOUT] = points_df[&#39;geometry&#39;].x
points_df[yCOLOUT] = points_df[&#39;geometry&#39;].y
xData = np.array(points_df[xCOLOUT].values)
yData = np.array(points_df[yCOLOUT].values)
zData = []
zID = []
zInd = []

# Get unique well values to reduce sampling time
uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
if verbose:
print(f&#34;\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column&#34;)

# Loop over DataFrame rows
for i, row in points_df.iterrows():
for i, row in uniqueWells.iterrows():
# Select data from DataArray at current coordinates and append to list
zData.append(raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item())
#sampleArr=raster.sel(x=xData, y=yData, method=&#39;nearest&#39;).values
#sampleArr = np.diag(sampleArr)
#sampleDF = pd.DataFrame(sampleArr, columns=[new_col])
points_df[new_col] = zData#sampleDF[new_col]
zInd.append(i)
zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item()])

inputtype = points_df.dtypes[well_id_col]
wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))

# Merge each unique well&#39;s data with all well intervals
wellZDF[well_id_col].astype(inputtype, copy=False)
points_df = points_df.merge(wellZDF, how=&#39;left&#39;, on=well_id_col)
#points_df[new_col] = zData#sampleDF[new_col]
return points_df

#Merge xyz dataframe into a metadata dataframe
Expand Down Expand Up @@ -723,8 +757,8 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
if grid_crs is None:
try:
grid_crs=gridIN.spatial_ref.crs_wkt
except:
iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
except Exception:
iswsCRS = w4h.read_dict(r&#39;../resources/sample_data/isws_crs.json&#39;)
gridIN.rio.write_crs(iswsCRS)
elif grid_crs.lower()==&#39;isws&#39;:
iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
Expand Down Expand Up @@ -1311,8 +1345,8 @@ <h2 id="returns">Returns</h2>
if grid_crs is None:
try:
grid_crs=gridIN.spatial_ref.crs_wkt
except:
iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
except Exception:
iswsCRS = w4h.read_dict(r&#39;../resources/sample_data/isws_crs.json&#39;)
gridIN.rio.write_crs(iswsCRS)
elif grid_crs.lower()==&#39;isws&#39;:
iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
Expand Down Expand Up @@ -1498,7 +1532,7 @@ <h2 id="returns">Returns</h2>
</details>
</dd>
<dt id="w4h.mapping.read_study_area"><code class="name flex">
<span>def <span class="ident">read_study_area</span></span>(<span>study_area_path, study_area_crs='EPSG:4269', log=False, verbose=False)</span>
<span>def <span class="ident">read_study_area</span></span>(<span>study_area_path, study_area_crs='EPSG:4269', buffer=None, log=False, verbose=False)</span>
</code></dt>
<dd>
<div class="desc"><p>Read study area geospatial file into geopandas</p>
Expand All @@ -1507,10 +1541,14 @@ <h2 id="parameters">Parameters</h2>
<dt><strong><code>study_area_path</code></strong> :&ensp;<code>str</code> or <code>pathlib.Path</code></dt>
<dd>Filepath to any geospatial file readable by geopandas.
Polygon is best, but may work with other types if extent is correct.</dd>
<dt><strong><code>crs</code></strong> :&ensp;<code>str, tuple, dict</code>, optional</dt>
<dt><strong><code>study_area_crs</code></strong> :&ensp;<code>str, tuple, dict</code>, optional</dt>
<dd>CRS designation readable by geopandas/pyproj</dd>
<dt><strong><code>buffer</code></strong> :&ensp;<code>None</code> or <code>numeric</code>, default=<code>None</code></dt>
<dd>If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.</dd>
<dt><strong><code>log</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
<dd>Whether to log results to log file, by default False</dd>
<dt><strong><code>verbose</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
<dd>Whether to print status and results to terminal</dd>
</dl>
<h2 id="returns">Returns</h2>
<dl>
Expand All @@ -1521,18 +1559,22 @@ <h2 id="returns">Returns</h2>
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, log=False, verbose=False):
<pre><code class="python">def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, buffer=None, log=False, verbose=False):
&#34;&#34;&#34;Read study area geospatial file into geopandas

Parameters
----------
study_area_path : str or pathlib.Path
Filepath to any geospatial file readable by geopandas.
Polygon is best, but may work with other types if extent is correct.
crs : str, tuple, dict, optional
study_area_crs : str, tuple, dict, optional
CRS designation readable by geopandas/pyproj
buffer : None or numeric, default=None
If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
log : bool, default = False
Whether to log results to log file, by default False
verbose : bool, default=False
Whether to print status and results to terminal

Returns
-------
Expand All @@ -1545,6 +1587,13 @@ <h2 id="returns">Returns</h2>
studyAreaIN = gpd.read_file(study_area_path)
studyAreaIN.to_crs(study_area_crs, inplace=True)

if buffer is not None:
studyAreaIN = studyAreaIN.buffer(distance=buffer)
if verbose:
print(&#39;\tBuffer applied.&#39;)
if verbose:
print(&#34;\tStudy area read.&#34;)

return studyAreaIN</code></pre>
</details>
</dd>
Expand Down Expand Up @@ -1860,7 +1909,7 @@ <h2 id="returns">Returns</h2>
</details>
</dd>
<dt id="w4h.mapping.sample_raster_points"><code class="name flex">
<span>def <span class="ident">sample_raster_points</span></span>(<span>raster, points_df, xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False)</span>
<span>def <span class="ident">sample_raster_points</span></span>(<span>raster, points_df, well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False)</span>
</code></dt>
<dd>
<div class="desc"><p>Sample raster values to points from geopandas geodataframe.</p>
Expand All @@ -1870,6 +1919,8 @@ <h2 id="parameters">Parameters</h2>
<dd>Raster containing values to be sampled.</dd>
<dt><strong><code>points_df</code></strong> :&ensp;<code>geopandas.geodataframe</code></dt>
<dd>Geopandas dataframe with geometry column containing point values to sample.</dd>
<dt><strong><code>well_id_col</code></strong> :&ensp;<code>str</code>, default=<code>"API_NUMBER"</code></dt>
<dd>Column that uniquely identifies each well so multiple sampling points are not taken per well</dd>
<dt><strong><code>xcol</code></strong> :&ensp;<code>str</code>, default=<code>'LONGITUDE'</code></dt>
<dd>Column containing name for x-column, by default 'LONGITUDE.'
This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.</dd>
Expand All @@ -1893,7 +1944,7 @@ <h2 id="returns">Returns</h2>
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def sample_raster_points(raster, points_df, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):
<pre><code class="python">def sample_raster_points(raster, points_df, well_id_col=&#39;API_NUMBER&#39;, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):
&#34;&#34;&#34;Sample raster values to points from geopandas geodataframe.

Parameters
Expand All @@ -1902,6 +1953,8 @@ <h2 id="returns">Returns</h2>
Raster containing values to be sampled.
points_df : geopandas.geodataframe
Geopandas dataframe with geometry column containing point values to sample.
well_id_col : str, default=&#34;API_NUMBER&#34;
Column that uniquely identifies each well so multiple sampling points are not taken per well
xcol : str, default=&#39;LONGITUDE&#39;
Column containing name for x-column, by default &#39;LONGITUDE.&#39;
This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
Expand All @@ -1924,27 +1977,48 @@ <h2 id="returns">Returns</h2>

if verbose:
verbose_print(sample_raster_points, locals(), exclude_params=[&#39;raster&#39;, &#39;points_df&#39;])
print(&#34;Sampling grid for {}.&#34;.format(new_col))
print(&#34;\tSampling {} grid for at all well locations.&#34;.format(new_col))

#Project points to raster CRS
rastercrsWKT=raster.spatial_ref.crs_wkt
points_df = points_df.to_crs(rastercrsWKT)
#if xcol==&#39;LONGITUDE&#39; and ycol==&#39;LATITUDE&#39;:
if rastercrsWKT != points_df.crs:
if verbose:
print(&#34;\tTemporarily reprojecting raster data to point data&#39;s CRS.&#34;)
pointCRS = points_df.crs.to_epsg()
if pointCRS is None:
pointCRS = points_df.crs.to_wkt()

raster = raster.rio.reproject(pointCRS)
#points_df = points_df.to_crs(rastercrsWKT)

xCOLOUT = xcol+&#39;_PROJ&#39;
yCOLOUT = ycol+&#39;_PROJ&#39;
points_df[xCOLOUT] = points_df[&#39;geometry&#39;].x
points_df[yCOLOUT] = points_df[&#39;geometry&#39;].y
xData = np.array(points_df[xCOLOUT].values)
yData = np.array(points_df[yCOLOUT].values)
zData = []
zID = []
zInd = []

# Get unique well values to reduce sampling time
uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
if verbose:
print(f&#34;\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column&#34;)

# Loop over DataFrame rows
for i, row in points_df.iterrows():
for i, row in uniqueWells.iterrows():
# Select data from DataArray at current coordinates and append to list
zData.append(raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item())
#sampleArr=raster.sel(x=xData, y=yData, method=&#39;nearest&#39;).values
#sampleArr = np.diag(sampleArr)
#sampleDF = pd.DataFrame(sampleArr, columns=[new_col])
points_df[new_col] = zData#sampleDF[new_col]
zInd.append(i)
zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item()])

inputtype = points_df.dtypes[well_id_col]
wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))

# Merge each unique well&#39;s data with all well intervals
wellZDF[well_id_col].astype(inputtype, copy=False)
points_df = points_df.merge(wellZDF, how=&#39;left&#39;, on=well_id_col)
#points_df[new_col] = zData#sampleDF[new_col]
return points_df</code></pre>
</details>
</dd>
Expand Down
Loading

0 comments on commit 1b1fc23

Please sign in to comment.