Merge pull request #9 from RJbalikian/regional_work

Regional work
RJbalikian · Feb 15, 2024 · 1b1fc23 · 1b1fc23
2 parents cf2e0d6 + 04acd2f
commit 1b1fc23
Show file tree

Hide file tree

Showing 29 changed files with 187,935 additions and 186,863 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,4 @@
 testnotebook.ipynb
 cache/
 w4h/__pycache__/
-
+w4h/resources/sample_data/Output*
diff --git a/docs/core.html b/docs/core.html
diff --git a/docs/generate_docs.py b/docs/generate_docs.py
@@ -15,7 +15,7 @@
 RTD_THEME=False #Not currently working
 RUN_TESTS=True
 LINT_IT=False
-RELEASE_VERSION = "0.0.18"
+RELEASE_VERSION = "0.0.19"
 
 # Set the filepaths
 currentDir = pathlib.Path((__file__)).parent

diff --git a/docs/layers.html b/docs/layers.html
@@ -43,7 +43,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
 from w4h import logger_function, verbose_print
 
 #Function to Merge tables
-def merge_tables(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
+def merge_metadata(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
     &#34;&#34;&#34;Function to merge tables, intended for merging metadata table with data table
 
     Parameters
@@ -72,7 +72,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
     if verbose:
-        verbose_print(merge_tables, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
+        verbose_print(merge_metadata, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
     if header_df is None:
         #Figure out which columns to include
         if data_cols is None:
@@ -201,7 +201,7 @@ <h1 class="title">Module <code>w4h.layers</code></h1>
     layerList = range(1,layers+1)
     res_list = []
     resdf_list = []
-    print(df.columns)
+
     #Generate Column names based on (looped) integers
     for layer in layerList:
         zStr = &#39;ELEV&#39;
@@ -1003,7 +1003,7 @@ <h2 id="returns">Returns</h2>
     layerList = range(1,layers+1)
     res_list = []
     resdf_list = []
-    print(df.columns)
+
     #Generate Column names based on (looped) integers
     for layer in layerList:
         zStr = &#39;ELEV&#39;
@@ -1110,8 +1110,8 @@ <h2 id="returns">Returns</h2>
         return resdf_list</code></pre>
 </details>
 </dd>
-<dt id="w4h.layers.merge_tables"><code class="name flex">
-<span>def <span class="ident">merge_tables</span></span>(<span>data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs)</span>
+<dt id="w4h.layers.merge_metadata"><code class="name flex">
+<span>def <span class="ident">merge_metadata</span></span>(<span>data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Function to merge tables, intended for merging metadata table with data table</p>
@@ -1143,7 +1143,7 @@ <h2 id="returns">Returns</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def merge_tables(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
+<pre><code class="python">def merge_metadata(data_df, header_df, data_cols=None, header_cols=None, auto_pick_cols=False, drop_duplicate_cols=True, log=False, verbose=False, **kwargs):
     &#34;&#34;&#34;Function to merge tables, intended for merging metadata table with data table
 
     Parameters
@@ -1172,7 +1172,7 @@ <h2 id="returns">Returns</h2>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
     if verbose:
-        verbose_print(merge_tables, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
+        verbose_print(merge_metadata, locals(), exclude_params=[&#39;data_df&#39;, &#39;header_df&#39;])
     if header_df is None:
         #Figure out which columns to include
         if data_cols is None:
@@ -1250,7 +1250,7 @@ <h1>Index</h1>
 <li><code><a title="w4h.layers.get_layer_depths" href="#w4h.layers.get_layer_depths">get_layer_depths</a></code></li>
 <li><code><a title="w4h.layers.layer_interp" href="#w4h.layers.layer_interp">layer_interp</a></code></li>
 <li><code><a title="w4h.layers.layer_target_thick" href="#w4h.layers.layer_target_thick">layer_target_thick</a></code></li>
-<li><code><a title="w4h.layers.merge_tables" href="#w4h.layers.merge_tables">merge_tables</a></code></li>
+<li><code><a title="w4h.layers.merge_metadata" href="#w4h.layers.merge_metadata">merge_metadata</a></code></li>
 </ul>
 </li>
 </ul>

diff --git a/docs/main.html b/docs/main.html
diff --git a/docs/mapping.html b/docs/mapping.html
@@ -50,18 +50,22 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
 lidarURL = r&#39;https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&amp;service=WCS&#39;
 
 #Read study area shapefile (or other file) into geopandas
-def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, log=False, verbose=False):
+def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, buffer=None, log=False, verbose=False):
     &#34;&#34;&#34;Read study area geospatial file into geopandas
 
     Parameters
     ----------
     study_area_path : str or pathlib.Path
         Filepath to any geospatial file readable by geopandas. 
         Polygon is best, but may work with other types if extent is correct.
-    crs : str, tuple, dict, optional
+    study_area_crs : str, tuple, dict, optional
         CRS designation readable by geopandas/pyproj
+    buffer : None or numeric, default=None
+        If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
     log : bool, default = False
         Whether to log results to log file, by default False
+    verbose : bool, default=False
+        Whether to print status and results to terminal
 
     Returns
     -------
@@ -74,6 +78,13 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
     studyAreaIN = gpd.read_file(study_area_path)
     studyAreaIN.to_crs(study_area_crs, inplace=True)
 
+    if buffer is not None:
+        studyAreaIN = studyAreaIN.buffer(distance=buffer)
+        if verbose:
+            print(&#39;\tBuffer applied.&#39;)
+    if verbose:
+        print(&#34;\tStudy area read.&#34;)
+
     return studyAreaIN
 
 #Convert coords in columns to geometry in geopandas dataframe
@@ -155,7 +166,7 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
     return gdfClip
 
 #Function to sample raster points to points specified in geodataframe
-def sample_raster_points(raster, points_df, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):  
+def sample_raster_points(raster, points_df, well_id_col=&#39;API_NUMBER&#39;, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):  
     &#34;&#34;&#34;Sample raster values to points from geopandas geodataframe.
 
     Parameters
@@ -164,6 +175,8 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
         Raster containing values to be sampled.
     points_df : geopandas.geodataframe
         Geopandas dataframe with geometry column containing point values to sample.
+    well_id_col : str, default=&#34;API_NUMBER&#34;
+        Column that uniquely identifies each well so multiple sampling points are not taken per well
     xcol : str, default=&#39;LONGITUDE&#39;
         Column containing name for x-column, by default &#39;LONGITUDE.&#39;
         This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
@@ -186,27 +199,48 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
 
     if verbose:
         verbose_print(sample_raster_points, locals(), exclude_params=[&#39;raster&#39;, &#39;points_df&#39;])
-        print(&#34;Sampling grid for {}.&#34;.format(new_col))
+        print(&#34;\tSampling {} grid for at all well locations.&#34;.format(new_col))
 
     #Project points to raster CRS
     rastercrsWKT=raster.spatial_ref.crs_wkt
-    points_df = points_df.to_crs(rastercrsWKT)
-    #if xcol==&#39;LONGITUDE&#39; and ycol==&#39;LATITUDE&#39;:
+    if rastercrsWKT != points_df.crs:
+        if verbose:
+            print(&#34;\tTemporarily reprojecting raster data to point data&#39;s CRS.&#34;)
+        pointCRS = points_df.crs.to_epsg()
+        if pointCRS is None:
+            pointCRS = points_df.crs.to_wkt()
+
+        raster = raster.rio.reproject(pointCRS)
+        #points_df = points_df.to_crs(rastercrsWKT)
+
     xCOLOUT = xcol+&#39;_PROJ&#39;
     yCOLOUT = ycol+&#39;_PROJ&#39;
     points_df[xCOLOUT] = points_df[&#39;geometry&#39;].x
     points_df[yCOLOUT] = points_df[&#39;geometry&#39;].y
     xData = np.array(points_df[xCOLOUT].values)
     yData = np.array(points_df[yCOLOUT].values)
     zData = []
+    zID = []
+    zInd = []
+
+    # Get unique well values to reduce sampling time
+    uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
+    if verbose:
+        print(f&#34;\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column&#34;)
+
     # Loop over DataFrame rows
-    for i, row in points_df.iterrows():
+    for i, row in uniqueWells.iterrows():
         # Select data from DataArray at current coordinates and append to list
-        zData.append(raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item())
-    #sampleArr=raster.sel(x=xData, y=yData, method=&#39;nearest&#39;).values
-    #sampleArr = np.diag(sampleArr)
-    #sampleDF = pd.DataFrame(sampleArr, columns=[new_col])
-    points_df[new_col] = zData#sampleDF[new_col]
+        zInd.append(i)
+        zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item()])
+
+    inputtype = points_df.dtypes[well_id_col]
+    wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))
+
+    # Merge each unique well&#39;s data with all well intervals
+    wellZDF[well_id_col].astype(inputtype, copy=False)
+    points_df = points_df.merge(wellZDF, how=&#39;left&#39;, on=well_id_col)
+    #points_df[new_col] = zData#sampleDF[new_col]
     return points_df
 
 #Merge xyz dataframe into a metadata dataframe
@@ -723,8 +757,8 @@ <h1 class="title">Module <code>w4h.mapping</code></h1>
             if grid_crs is None:
                 try:
                     grid_crs=gridIN.spatial_ref.crs_wkt
-                except:
-                    iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
+                except Exception:
+                    iswsCRS = w4h.read_dict(r&#39;../resources/sample_data/isws_crs.json&#39;)
                     gridIN.rio.write_crs(iswsCRS)
             elif grid_crs.lower()==&#39;isws&#39;:
                 iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
@@ -1311,8 +1345,8 @@ <h2 id="returns">Returns</h2>
             if grid_crs is None:
                 try:
                     grid_crs=gridIN.spatial_ref.crs_wkt
-                except:
-                    iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
+                except Exception:
+                    iswsCRS = w4h.read_dict(r&#39;../resources/sample_data/isws_crs.json&#39;)
                     gridIN.rio.write_crs(iswsCRS)
             elif grid_crs.lower()==&#39;isws&#39;:
                 iswsCRS = w4h.read_dict(r&#39;../resources/isws_crs&#39;)
@@ -1498,7 +1532,7 @@ <h2 id="returns">Returns</h2>
 </details>
 </dd>
 <dt id="w4h.mapping.read_study_area"><code class="name flex">
-<span>def <span class="ident">read_study_area</span></span>(<span>study_area_path, study_area_crs='EPSG:4269', log=False, verbose=False)</span>
+<span>def <span class="ident">read_study_area</span></span>(<span>study_area_path, study_area_crs='EPSG:4269', buffer=None, log=False, verbose=False)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Read study area geospatial file into geopandas</p>
@@ -1507,10 +1541,14 @@ <h2 id="parameters">Parameters</h2>
 <dt><strong><code>study_area_path</code></strong> :&ensp;<code>str</code> or <code>pathlib.Path</code></dt>
 <dd>Filepath to any geospatial file readable by geopandas.
 Polygon is best, but may work with other types if extent is correct.</dd>
-<dt><strong><code>crs</code></strong> :&ensp;<code>str, tuple, dict</code>, optional</dt>
+<dt><strong><code>study_area_crs</code></strong> :&ensp;<code>str, tuple, dict</code>, optional</dt>
 <dd>CRS designation readable by geopandas/pyproj</dd>
+<dt><strong><code>buffer</code></strong> :&ensp;<code>None</code> or <code>numeric</code>, default=<code>None</code></dt>
+<dd>If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.</dd>
 <dt><strong><code>log</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
 <dd>Whether to log results to log file, by default False</dd>
+<dt><strong><code>verbose</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
+<dd>Whether to print status and results to terminal</dd>
 </dl>
 <h2 id="returns">Returns</h2>
 <dl>
@@ -1521,18 +1559,22 @@ <h2 id="returns">Returns</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, log=False, verbose=False):
+<pre><code class="python">def read_study_area(study_area_path, study_area_crs=&#39;EPSG:4269&#39;, buffer=None, log=False, verbose=False):
     &#34;&#34;&#34;Read study area geospatial file into geopandas
 
     Parameters
     ----------
     study_area_path : str or pathlib.Path
         Filepath to any geospatial file readable by geopandas. 
         Polygon is best, but may work with other types if extent is correct.
-    crs : str, tuple, dict, optional
+    study_area_crs : str, tuple, dict, optional
         CRS designation readable by geopandas/pyproj
+    buffer : None or numeric, default=None
+        If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
     log : bool, default = False
         Whether to log results to log file, by default False
+    verbose : bool, default=False
+        Whether to print status and results to terminal
 
     Returns
     -------
@@ -1545,6 +1587,13 @@ <h2 id="returns">Returns</h2>
     studyAreaIN = gpd.read_file(study_area_path)
     studyAreaIN.to_crs(study_area_crs, inplace=True)
 
+    if buffer is not None:
+        studyAreaIN = studyAreaIN.buffer(distance=buffer)
+        if verbose:
+            print(&#39;\tBuffer applied.&#39;)
+    if verbose:
+        print(&#34;\tStudy area read.&#34;)
+
     return studyAreaIN</code></pre>
 </details>
 </dd>
@@ -1860,7 +1909,7 @@ <h2 id="returns">Returns</h2>
 </details>
 </dd>
 <dt id="w4h.mapping.sample_raster_points"><code class="name flex">
-<span>def <span class="ident">sample_raster_points</span></span>(<span>raster, points_df, xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False)</span>
+<span>def <span class="ident">sample_raster_points</span></span>(<span>raster, points_df, well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', verbose=True, log=False)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Sample raster values to points from geopandas geodataframe.</p>
@@ -1870,6 +1919,8 @@ <h2 id="parameters">Parameters</h2>
 <dd>Raster containing values to be sampled.</dd>
 <dt><strong><code>points_df</code></strong> :&ensp;<code>geopandas.geodataframe</code></dt>
 <dd>Geopandas dataframe with geometry column containing point values to sample.</dd>
+<dt><strong><code>well_id_col</code></strong> :&ensp;<code>str</code>, default=<code>"API_NUMBER"</code></dt>
+<dd>Column that uniquely identifies each well so multiple sampling points are not taken per well</dd>
 <dt><strong><code>xcol</code></strong> :&ensp;<code>str</code>, default=<code>'LONGITUDE'</code></dt>
 <dd>Column containing name for x-column, by default 'LONGITUDE.'
 This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.</dd>
@@ -1893,7 +1944,7 @@ <h2 id="returns">Returns</h2>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def sample_raster_points(raster, points_df, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):  
+<pre><code class="python">def sample_raster_points(raster, points_df, well_id_col=&#39;API_NUMBER&#39;, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, new_col=&#39;SAMPLED&#39;, verbose=True, log=False):  
     &#34;&#34;&#34;Sample raster values to points from geopandas geodataframe.
 
     Parameters
@@ -1902,6 +1953,8 @@ <h2 id="returns">Returns</h2>
         Raster containing values to be sampled.
     points_df : geopandas.geodataframe
         Geopandas dataframe with geometry column containing point values to sample.
+    well_id_col : str, default=&#34;API_NUMBER&#34;
+        Column that uniquely identifies each well so multiple sampling points are not taken per well
     xcol : str, default=&#39;LONGITUDE&#39;
         Column containing name for x-column, by default &#39;LONGITUDE.&#39;
         This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
@@ -1924,27 +1977,48 @@ <h2 id="returns">Returns</h2>
 
     if verbose:
         verbose_print(sample_raster_points, locals(), exclude_params=[&#39;raster&#39;, &#39;points_df&#39;])
-        print(&#34;Sampling grid for {}.&#34;.format(new_col))
+        print(&#34;\tSampling {} grid for at all well locations.&#34;.format(new_col))
 
     #Project points to raster CRS
     rastercrsWKT=raster.spatial_ref.crs_wkt
-    points_df = points_df.to_crs(rastercrsWKT)
-    #if xcol==&#39;LONGITUDE&#39; and ycol==&#39;LATITUDE&#39;:
+    if rastercrsWKT != points_df.crs:
+        if verbose:
+            print(&#34;\tTemporarily reprojecting raster data to point data&#39;s CRS.&#34;)
+        pointCRS = points_df.crs.to_epsg()
+        if pointCRS is None:
+            pointCRS = points_df.crs.to_wkt()
+
+        raster = raster.rio.reproject(pointCRS)
+        #points_df = points_df.to_crs(rastercrsWKT)
+
     xCOLOUT = xcol+&#39;_PROJ&#39;
     yCOLOUT = ycol+&#39;_PROJ&#39;
     points_df[xCOLOUT] = points_df[&#39;geometry&#39;].x
     points_df[yCOLOUT] = points_df[&#39;geometry&#39;].y
     xData = np.array(points_df[xCOLOUT].values)
     yData = np.array(points_df[yCOLOUT].values)
     zData = []
+    zID = []
+    zInd = []
+
+    # Get unique well values to reduce sampling time
+    uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
+    if verbose:
+        print(f&#34;\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column&#34;)
+
     # Loop over DataFrame rows
-    for i, row in points_df.iterrows():
+    for i, row in uniqueWells.iterrows():
         # Select data from DataArray at current coordinates and append to list
-        zData.append(raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item())
-    #sampleArr=raster.sel(x=xData, y=yData, method=&#39;nearest&#39;).values
-    #sampleArr = np.diag(sampleArr)
-    #sampleDF = pd.DataFrame(sampleArr, columns=[new_col])
-    points_df[new_col] = zData#sampleDF[new_col]
+        zInd.append(i)
+        zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method=&#39;nearest&#39;).item()])
+
+    inputtype = points_df.dtypes[well_id_col]
+    wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))
+
+    # Merge each unique well&#39;s data with all well intervals
+    wellZDF[well_id_col].astype(inputtype, copy=False)
+    points_df = points_df.merge(wellZDF, how=&#39;left&#39;, on=well_id_col)
+    #points_df[new_col] = zData#sampleDF[new_col]
     return points_df</code></pre>
 </details>
 </dd>