From d3865a25829accb87ab4e17065abe703203895a3 Mon Sep 17 00:00:00 2001 From: Elise Hinman <121896266+ehinman@users.noreply.github.com> Date: Thu, 1 Aug 2024 19:44:44 -0500 Subject: [PATCH] Adjust gwlevels URL services (#147) * add gwlevels to waterdata services * add state code conversion * get rid of usa piece * adjust mock requests, but still errors * one correction * switch format in urls to get tests to work * Update dataretrieval/nwis.py keep API consistent Co-authored-by: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> --------- Co-authored-by: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> --- dataretrieval/nwis.py | 22 +- ...retrieval_GroundwaterLevels_Examples.ipynb | 270 +++++++++--------- ...es_gwlevels.txt => waterdata_gwlevels.txt} | 0 tests/waterservices_test.py | 14 +- 4 files changed, 159 insertions(+), 147 deletions(-) rename tests/data/{waterservices_gwlevels.txt => waterdata_gwlevels.txt} (100%) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 85cc5e3..39cbafb 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -34,9 +34,10 @@ PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?' -WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels'] +WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat'] WATERDATA_SERVICES = [ 'qwdata', + 'gwlevels', 'measurements', 'peaks', 'pmcodes', @@ -434,12 +435,23 @@ def get_gwlevels( """ _check_sites_value_types(sites) - kwargs['startDT'] = kwargs.pop('startDT', start) - kwargs['endDT'] = kwargs.pop('endDT', end) - kwargs['sites'] = kwargs.pop('sites', sites) + # Make kwargs backwards compatible with waterservices + # vocabulary + if 'startDT' in kwargs: + kwargs['begin_date'] = kwargs.pop('startDT') + if 'endDT' in kwargs: + kwargs['end_date'] = kwargs.pop('endDT') + if 'sites' in kwargs: + kwargs['site_no'] = kwargs.pop('sites') + if 'stateCd'in kwargs: + kwargs['state_cd'] = kwargs.pop('stateCd') + + kwargs['begin_date'] = kwargs.pop('begin_date', start) + kwargs['end_date'] = kwargs.pop('end_date', end) + kwargs['site_no'] = kwargs.pop('site_no', sites) kwargs['multi_index'] = multi_index - response = query_waterservices('gwlevels', ssl_check=ssl_check, **kwargs) + response = query_waterdata('gwlevels', format = 'rdb', ssl_check=ssl_check, **kwargs) df = _read_rdb(response.text) diff --git a/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb index c057031..a0afd9f 100644 --- a/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb @@ -16,55 +16,58 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Install the Package\n", "\n", "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "!pip install dataretrieval" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "!pip install dataretrieval" + ] }, { "cell_type": "markdown", - "source": [ - "Load the package so you can use it along with other packages used in this notebook." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Load the package so you can use it along with other packages used in this notebook." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "from dataretrieval import nwis\n", + "from IPython.display import display" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Basic Usage\n", "\n", @@ -75,26 +78,29 @@ "* **sites** (string or list of strings): A list of USGS site identifiers for which to retrieve data.\n", "* **start** (string): The beginning date for a period for which to retrieve data. If the waterdata parameter begin_date is supplied, it will overwrite the start parameter (defaults to '1851-01-01')\n", "* **end** (string): The ending date for a period for which to retrieve data. If the waterdata parameter end_date is supplied, it will overwrite the end parameter." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Example 1: Get groundwater level data for a single monitoring site." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Example 1: Get groundwater level data for a single monitoring site." + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Set the parameters needed to retrieve data\n", @@ -103,206 +109,206 @@ "# Retrieve the data\n", "data = nwis.get_gwlevels(sites=site_id)\n", "print(\"Retrieved \" + str(len(data[0])) + \" data values.\")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Interpreting the Result\n", "\n", "The result of calling the `get_gwlevels()` function is an object that contains a Pandas data frame and an associated metadata object. The Pandas data frame contains the data requested. The data frame is indexed by the dates associated with the data values.\n", "\n", "Once you've got the data frame, there's several useful things you can do to explore the data." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Display the data frame as a table" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Display the data frame as a table" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "display(data[0])" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "display(data[0])" + ] }, { "cell_type": "markdown", - "source": [ - "Show the data types of the columns in the resulting data frame." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Show the data types of the columns in the resulting data frame." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(data[0].dtypes)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(data[0].dtypes)" + ] }, { "cell_type": "markdown", - "source": [ - "Get summary statistics for the daily streamflow values." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Get summary statistics for the daily streamflow values." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "data[0]['lev_va'].describe()" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "data[0]['lev_va'].describe()" + ] }, { "cell_type": "markdown", - "source": [ - "Make a quick time series plot." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Make a quick time series plot." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "ax = data[0].plot(y='lev_va')\n", - "ax.set_xlabel('Date')\n", - "ax.set_ylabel('Water Level (feet below land surface)')" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "ax = data[0].plot(x = 'lev_dt', y='lev_va')\n", + "ax.set_xlabel('Date')\n", + "ax.set_ylabel('Water Level (feet below land surface)')" + ] }, { "cell_type": "markdown", - "source": [ - "The other part of the result returned from the `get_gwlevels()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "The other part of the result returned from the `get_gwlevels()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(\"The query URL used to retrieve the data from NWIS was: \" + data[1].url)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(\"The query URL used to retrieve the data from NWIS was: \" + data[1].url)" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Additional Examples\n", "\n", "You can also request data for multiple sites at the same time.\n", "\n", "Example 2: Get data for multiple sites. Site numbers are specified using a comma delimited list of strings." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "site_ids = [\"434400121275801\", \"375907091432201\"]\n", "data2 = nwis.get_gwlevels(sites=site_ids)\n", "print(\"Retrieved \" + str(len(data2[0])) + \" data values.\")\n", "display(data2[0])" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "Some groundwater level data have dates that include only a year or a month and year, but no day.\n", - "\n", - "Example 3: Retrieve groundwater level data that have dates without a day." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Some groundwater level data have dates that include only a year or a month and year, but no day.\n", + "\n", + "Example 3: Retrieve groundwater level data that have dates without a day." + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "data3 = nwis.get_gwlevels(sites=\"425957088141001\")\n", @@ -311,69 +317,63 @@ "# Print the date/time index values, which show up as NaT because\n", "# the dates can't be converted to a date/time data type\n", "print(data3[0].index)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "If you want to see the USGS RDB (delimited text) version of the data just retrieved, you can get the URL for the request that was sent to the USGS web service." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "If you want to see the USGS RDB (delimited text) version of the data just retrieved, you can get the URL for the request that was sent to the USGS web service." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "# Print the URL used to retrieve the data\n", - "print(\"You can examine the data retrieved from NWIS at: \" + data3[1].url)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "# Print the URL used to retrieve the data\n", + "print(\"You can examine the data retrieved from NWIS at: \" + data3[1].url)" + ] }, { "cell_type": "markdown", - "source": [ - "You can also retrieve data for a site within a specified time window by specifying a start date and an end date.\n", - "\n", - "Example 4: Get groundwater level data for a site between a startDate and endDate." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "You can also retrieve data for a site within a specified time window by specifying a start date and an end date.\n", + "\n", + "Example 4: Get groundwater level data for a site between a startDate and endDate." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "data4 = nwis.get_gwlevels(sites=site_id, start=\"1980-01-01\", end=\"2000-12-31\")\n", - "print(\"Retrieved \" + str(len(data4[0])) + \" data values.\")\n" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "data4 = nwis.get_gwlevels(sites=site_id, start=\"1980-01-01\", end=\"2000-12-31\")\n", + "print(\"Retrieved \" + str(len(data4[0])) + \" data values.\")\n" + ] } ], "metadata": { @@ -385,16 +385,16 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/tests/data/waterservices_gwlevels.txt b/tests/data/waterdata_gwlevels.txt similarity index 100% rename from tests/data/waterservices_gwlevels.txt rename to tests/data/waterdata_gwlevels.txt diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index a04e09d..c829232 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -232,11 +232,11 @@ def test_get_gwlevels(requests_mock): """Tests get_gwlevels method correctly generates the request url and returns the result in a DataFrame.""" format = "rdb" site = '434400121275801' - request_url = 'https://waterservices.usgs.gov/nwis/gwlevels?startDT=1851-01-01' \ - '&sites={}&format={}'.format(site, format) - response_file_path = 'data/waterservices_gwlevels.txt' + request_url = 'https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01' \ + '&site_no={}'.format(format, site) + response_file_path = 'data/waterdata_gwlevels.txt' mock_request(requests_mock, request_url, response_file_path) - df, md = get_gwlevels(sites=[site]) + df, md = get_gwlevels(sites=site) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -249,9 +249,9 @@ def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list): """Tests get_gwlevels method for valid input types for the 'sites' parameter.""" _format = "rdb" site = '434400121275801' - request_url = 'https://waterservices.usgs.gov/nwis/gwlevels?startDT=1851-01-01' \ - '&sites={}&format={}'.format(site, _format) - response_file_path = 'data/waterservices_gwlevels.txt' + request_url = 'https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01' \ + '&site_no={}'.format(_format, site) + response_file_path = 'data/waterdata_gwlevels.txt' mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site]