diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index 6abb962..4bdf88e 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -25,7 +25,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 matplotlib pytest requests + pip install flake8 matplotlib pytest requests haversine mock - name: Lint with flake8 run: flake8 --statistics *.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5d2c10b..d4cacf4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,18 +2,18 @@ image: python:3.7 test: script: - # Install Python packages required to run code. Add any additional - # packages your code needs require here. - - pip install dateutils flake8 matplotlib numpy pytest requests + # Install Python packages required to run code. Add any additional + # packages your code needs require here. + - pip install dateutils flake8 matplotlib numpy pytest requests mock - # flake8 static code and style testing. Enable for extra testing. - # - python -m flake8 . + # flake8 static code and style testing. Enable for extra testing. + # - python -m flake8 . # Run unit tests - - python -m pytest -v . + - python -m pytest -v . - # Run deliverables. Add your deliverables to the test system here. - - python Task1A.py + # Run deliverables. Add your deliverables to the test system here. + - python Task1A.py - - python Task2A.py - - python Task2D.py + - python Task2A.py + - python Task2D.py diff --git a/Help-Guide.rtf b/Help-Guide.rtf new file mode 100644 index 0000000..5b10e37 --- /dev/null +++ b/Help-Guide.rtf @@ -0,0 +1,60 @@ +{\rtf1\ansi\ansicpg1252\cocoartf2577 +\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 Menlo-Regular;} +{\colortbl;\red255\green255\blue255;\red0\green0\blue0;\red255\green255\blue255;\red27\green31\blue34; +\red10\green77\blue204;\red249\green250\blue251;} +{\*\expandedcolortbl;;\cssrgb\c0\c0\c0;\cssrgb\c100000\c100000\c100000;\cssrgb\c14118\c16078\c18039; +\cssrgb\c1176\c40000\c83922;\cssrgb\c98039\c98431\c98824;} +\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0 +\pard\tx566\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\pardirnatural\partightenfactor0 + +\f0\fs28 \cf2 \cb3 CUED Flood Warning System:\ +\ +Creating SSH Key: (Recommended but not mandatory)\ +\ +1. Check for existing SSH Keys - \ +\pard\pardeftab720\partightenfactor0 +\cf2 \expnd0\expndtw0\kerning0 +ls -al ~/.ssh\ +# Lists the files in your .ssh directory, if they exist\ +\ +2. \ +ssh-keygen -t ed25519 -C "your_email@example.com"\ +\ +3. Changing Passphrase\ +ssh-keygen -p -f ~/.ssh/id_ed25519\ +\ +Git:\ +\ +1. Use source control to manage commits, messages and pushing to repo.\ +\ +2. To push change:\ +\ + - Head to source control \ + - View proposed change\ + - Click on \'91stage change\'92 icon\ + - Add commit message\ + - Click check mark to commit (to local file system)\ + - Click (\'85) to view further options and select push (sends to remote team repo.)\ +\ +Linting:\ +\ +1. Open terminal: pip install pylint (\'97-user) \ +\ +2. Open command palette on VSCode\ +\ +3. Type - Python: Select Linter\ +\ +4. Choose flake8 (a wrapper which checks code for adherence to flake8 coding style)\ +\ +\ +5. GitHub Build Log (under Actions) shows potential errors with commits to repo including stylistic errors. \ +\ +6. Please use clear messages when committing to repo for benefit of other team members.\ +\ +7. Commit - Saves to local file system. \ + Push - Pushes changes to GitHub remote repo. ({\field{\*\fldinst{HYPERLINK "https://github.com/cued-ia-computing/flood-jzo20-lk480"}}{\fldrslt \cf5 \cb6 flood-jzo20-lk480}}\cf4 \cb1 ) +\fs27\fsmilli13600 \cf2 \cb3 \ +\ +\ +\ +} \ No newline at end of file diff --git a/Task1B.py b/Task1B.py new file mode 100644 index 0000000..de5dbf9 --- /dev/null +++ b/Task1B.py @@ -0,0 +1,19 @@ +from floodsystem.stationdata import build_station_list +from floodsystem.geo import stations_by_distance + + +def run(): + + station = build_station_list() + list_of_stations = stations_by_distance(station, (52.2053, 0.1218)) + ten_closest_stations = [(station.name, station.town, distance) for station, distance in list_of_stations[:10]] + ten_furthest_stations = [(station.name, station.town, distance) for station, distance in list_of_stations[-10:]] + print(f"10 closest stations from cambridge \n: {ten_closest_stations}") + print(f"10 furthest stations from cambridge \n: {ten_furthest_stations}") + + +if __name__ == "__main__": + print("*** Task 1B: CUED Part IA Flood Warning System ***") + run() + +my_variable = 8 diff --git a/Task1C.py b/Task1C.py new file mode 100644 index 0000000..4d96dc0 --- /dev/null +++ b/Task1C.py @@ -0,0 +1,18 @@ +from floodsystem.stationdata import build_station_list +from floodsystem.geo import stations_within_radius + + +def run(): + + stations = build_station_list() + centre_coord = (52.2053, 0.1218) + r = 10 + list_of_stations = stations_within_radius(stations, centre_coord, r) + print("All stations within {rad}km from {centre}\n: {ans}".format(rad=r, centre=centre_coord, ans=list_of_stations)) + + +if __name__ == "__main__": + print("*** Task 1C: CUED Part IA Flood Warning System ***") + run() + +my_variable = 8 diff --git a/Task1D.py b/Task1D.py new file mode 100644 index 0000000..1b07c44 --- /dev/null +++ b/Task1D.py @@ -0,0 +1,39 @@ +from floodsystem.stationdata import build_station_list +from floodsystem.geo import rivers_with_station +from floodsystem.geo import stations_by_river + + +def run(): + # Build the MoniteringStation list + stations = build_station_list() + + # Part 1, first 10 river with at least one station + num_station_atleast_one = rivers_with_station(stations) + first_ten = num_station_atleast_one[:10] + # printing the length and first 10 + print('{} stations. First 10 - {}'.format(len(num_station_atleast_one), first_ten)) + + # Part 2 + Obj_around_river = stations_by_river(stations) + Aire = sorted(Obj_around_river['River Aire']) + Cam = sorted(Obj_around_river['River Cam']) + Thames = sorted(Obj_around_river['River Thames']) + print('River Aire stations: {}\n River Cam stations: {}\n River Thames stations: {}\n'.format(Aire, Cam, Thames)) + + # Without getting a dictionary + # for river in ["River Aire", "River Cam", "River Thames"]: + # station_list = [] + + # for station in stations_by_river(stations)[river]: (couldn't work) + + # for station in stations: (but this works) + # if river == station.river: + # station_list.append(station.name) + # print(sorted(station_list)) + + +if __name__ == "__main__": + print("*** Task 1D: CUED Part IA Flood Warning System ***") + run() + +my_variable = 8 diff --git a/Task1E.py b/Task1E.py new file mode 100644 index 0000000..3a42375 --- /dev/null +++ b/Task1E.py @@ -0,0 +1,14 @@ +from floodsystem.stationdata import build_station_list +from floodsystem.geo import rivers_by_station_number + + +def run(): + N = 9 # As stated in demonstration program requirements + stations = build_station_list() + result = rivers_by_station_number(stations, N) + print(f"List of {N} rivers with the greatest number of monitoring stations \n {result}") + + +if __name__ == "__main__": + print("*** Task 1E: CUED Part IA Flood Warning System ***") + run() diff --git a/Task1F.py b/Task1F.py new file mode 100644 index 0000000..9d69500 --- /dev/null +++ b/Task1F.py @@ -0,0 +1,13 @@ +from floodsystem.stationdata import build_station_list +from floodsystem.station import inconsistent_typical_range_stations + + +def run(): + stations = build_station_list() + result = inconsistent_typical_range_stations(stations) + print(f"List of stations with inconsistent typical range data \n {result}") + + +if __name__ == "__main__": + print("*** Task 1F: CUED Part IA Flood Warning System ***") + run() diff --git a/Task2B.py b/Task2B.py new file mode 100644 index 0000000..c476981 --- /dev/null +++ b/Task2B.py @@ -0,0 +1,21 @@ +from floodsystem.stationdata import build_station_list, update_water_levels +from floodsystem.flood import stations_level_over_threshold + + +def run(): + # Build list of stations + stations = build_station_list() + + # Update latest level data for all stations + update_water_levels(stations) + + # Obtain list of stations with relative level over given threshold + result = stations_level_over_threshold(stations, 0.8) + + for station, relative_level in result: + print(f"{station.name}: {relative_level}") + + +if __name__ == "__main__": + print("*** Task 2B: CUED Part IA Flood Warning System ***") + run() diff --git a/Task2C.py b/Task2C.py new file mode 100644 index 0000000..57b6bee --- /dev/null +++ b/Task2C.py @@ -0,0 +1,21 @@ +from floodsystem.stationdata import build_station_list, update_water_levels +from floodsystem.flood import stations_highest_rel_level + + +def run(): + # Build list of stations + stations = build_station_list() + + # Update latest level data for all stations + update_water_levels(stations) + + # Obtain list of stations with highest relative water level + result = stations_highest_rel_level(stations, 10) + + for station, relative_level in result: + print(f"{station.name}: {relative_level}") + + +if __name__ == "__main__": + print("*** Task 2C: CUED Part IA Flood Warning System ***") + run() diff --git a/Task2E.py b/Task2E.py new file mode 100644 index 0000000..2f4ec01 --- /dev/null +++ b/Task2E.py @@ -0,0 +1,20 @@ +from floodsystem.stationdata import build_station_list, get_historical_water_levels +from floodsystem.plot import plot_water_levels, plot_water_levels_bokeh + + +def run(plot_type): + # Build list of stations + stations = build_station_list() + station = stations[0] + dates, levels = get_historical_water_levels(station, 10) + if plot_type == "matplotlib": + plot_water_levels(station, dates, levels) + elif plot_type == "bokeh": + plot_water_levels_bokeh(station, dates, levels) + else: + raise Exception(f"Unknown Plot Type {plot_type}") + + +if __name__ == "__main__": + print("*** Task 2E: CUED Part IA Flood Warning System ***") + run("matplotlib") diff --git a/Task2F.py b/Task2F.py new file mode 100644 index 0000000..1d4bf3b --- /dev/null +++ b/Task2F.py @@ -0,0 +1,20 @@ +from floodsystem.stationdata import build_station_list, update_water_levels, get_historical_water_levels +from floodsystem.plot import plot_water_level_with_fit +from floodsystem.flood import stations_highest_rel_level + + +def run(): + # Build list of stations + stations = build_station_list() + # Update latest level data for all stations + update_water_levels(stations) + + stations_highest_level = stations_highest_rel_level(stations, 5) + for station, _ in stations_highest_level: + dates, levels = get_historical_water_levels(station, 2) + plot_water_level_with_fit(station, dates, levels, 4) + + +if __name__ == "__main__": + print("*** Task 2F: CUED Part IA Flood Warning System ***") + run() diff --git a/Task2G.md b/Task2G.md new file mode 100644 index 0000000..d98e484 --- /dev/null +++ b/Task2G.md @@ -0,0 +1,17 @@ +# Flood Warning System Flood Risk Assesment Methodology + +This file outline the assesment methodology for determining the flood risk at towns across the UK. + +Polynomial Regression: +To generate a regression line to forecast future water levels we utilised numpy polyfit. +The numpy.poly1d object allows us to obtain the latest water level according to the polynimal regression +line on the day we have specified. + +The polynomial is generated by using a least-squares regression on a set period of historical data. After +experimenting with different lookback periods, 30 days gave us the most accurate forecast and hence is the default argument to the function. + +Assesment: +To classify the towns for flood risk we utilised the relative water level to compare the latest forecasted level to +the typical high/low range data for each station. We then sorted the stations by the town they were situated in and selected the largest relative water level. + +We then utilised a conditional statement to classify the risk level depending on the magnitude of the relative water level. Thus obtaining a flood risk assesment for all towns in the DEFRA. \ No newline at end of file diff --git a/Task2G.py b/Task2G.py new file mode 100644 index 0000000..8a859ca --- /dev/null +++ b/Task2G.py @@ -0,0 +1,77 @@ +from datetime import date +from collections import defaultdict +from floodsystem.stationdata import build_station_list, update_water_levels +from floodsystem.analysis import update_forecasted_levels +from floodsystem.flood import stations_highest_rel_level + +# Please see classification methodology .md file + +SEVERE = 2.0 +HIGH = 1.0 +MODERATE = 0.5 +LOW = 0.0 + + +def risk_assesment(relative_water_levels): + """Takes a list of relative water levels by station for each town, computes the + largest relative water level. + If largest relative water level is greater than [2.0,] returns Severe. + If largest relative water level is between than [1.0, 2.0), returns High. + If largest relative water level is between than [0.5, 1.0), returns Moderate. + If largest relative water level is greater than [, 0.5), returns Severe. + Args: + relative_water_levels ([list]): List of relative water levels + Returns: + [str]: Flood Risk Assesment + """ + max_relative_water_level = max([x for x in relative_water_levels if x is not None]) + if max_relative_water_level >= SEVERE: + return 'Severe' + elif HIGH <= max_relative_water_level < SEVERE: + return 'High' + elif MODERATE <= max_relative_water_level < HIGH: + return 'Moderate' + else: + return 'Low' + + +def run(forecast_date, N=50): + # 1. Build list of all Monitoring Stations + all_stations = build_station_list() + # 2. Update latest water levels for each station + update_water_levels(all_stations) + # 3. Find N stations with highest latest water level + stations_rel_level = stations_highest_rel_level(all_stations, N) + # 4. Unpack list of station objects + stations = [station for station, _ in stations_rel_level] + # 5. Forecast level for given date and update the latest-level for each station + update_forecasted_levels(stations, forecast_date) + # 6. Create default dictionary with values of an empty list + relative_levels_by_town = defaultdict(list) + + # 7. Populating relative_levels_town with relative water level for each station in a town + for station in stations: + # Exclude stations where the town name is not provided + if station.town is not None: + v = relative_levels_by_town[station.town] + v.append(station.relative_water_level()) + + # 8. Create dictionary to store flood warning by town + flood_warning_by_town = {} + + # 9. Populate dictionary by calling risk_assesment() function with a list of forecasted + # relative water levels. + for town, relative_water_levels in relative_levels_by_town.items(): + flood_warning_by_town[town] = risk_assesment(relative_water_levels) + + # 10. Return dictionary of flood warning by town + return flood_warning_by_town + + +if __name__ == "__main__": + print("*** Task 2G: CUED Part IA Flood Warning System ***") + forecast_date = date(2021, 3, 5) + number_of_stations = 50 + flood_warning_by_town = run(forecast_date, number_of_stations) + for k, v in flood_warning_by_town.items(): + print(f"{k}: {v}") diff --git a/floodsystem/analysis.py b/floodsystem/analysis.py new file mode 100644 index 0000000..edb8fd3 --- /dev/null +++ b/floodsystem/analysis.py @@ -0,0 +1,56 @@ +"""This module contains a collection of functions related to fitting a +least-squares polynomial to historical water levels data + +""" + +import numpy as np +import matplotlib.pyplot as plt # noqa +import matplotlib +from floodsystem.stationdata import get_historical_water_levels + + +def polyfit(dates, levels, p): + """Given the water-level time history, this function computes a + least squares fit polynomial of degree p to the water level data. + + Args: + dates ([list]): List of dates + levels ([list]): List of river water levels + p ([int]): Degree of polynomial + + Returns: + [tuple]: returns tuple of 1D polynomial representing a least squares fit and time shift + """ + assert isinstance(p, int) and p > 0, f"{p} is not a positive integer" + + x = matplotlib.dates.date2num(dates) + y = levels + try: + # Find coefficients of best-fit polynomial f(x) of degree p + p_coeff = np.polyfit(x - x[0], y, p) + # Convert coefficient into a polynomial that can be evaluated + poly = np.poly1d(p_coeff) + # Returns a 1D polynomial and time-axis shift + return poly, x[0] + except TypeError: + # workaround to handle unexpected numpy polyfit errors + return None, x[0] + + +def update_forecasted_levels(stations, forecast_date, num_days=30): + """Forecasts the water level for the given date using polynomial regression with + a given lookback period. + + Args: + stations (list[MonitoringStation]): List of Monitoring Station Objects + forecast_date ([Date]): Date to be forecasted + num_days (int, optional): Number of days to retrieve historical data. Defaults to 30. + """ + for station in stations: + print(f"Fetching forecasted level for {station.name}") + dates, levels = get_historical_water_levels(station, num_days) + if len(dates) != 0 and len(levels) != 0: + model, d0 = polyfit(dates, levels, 4) + if model is not None: + dn = matplotlib.dates.date2num(forecast_date) + station.latest_level = model(dn - d0) diff --git a/floodsystem/datafetcher.py b/floodsystem/datafetcher.py index 4738bff..60116f2 100644 --- a/floodsystem/datafetcher.py +++ b/floodsystem/datafetcher.py @@ -107,7 +107,7 @@ def fetch_latest_water_level_data(use_cache=False): return data -def fetch_measure_levels(measure_id, dt): +def fetch_measure_levels(measure_id, dt, use_cache=True): """Fetch measure levels from latest reading and going back a period dt. Return list of dates and a list of values. @@ -134,7 +134,8 @@ def fetch_measure_levels(measure_id, dt): d = dateutil.parser.parse(measure['dateTime']) # Append data - dates.append(d) - levels.append(measure['value']) + if 'value' in measure: + dates.append(d) + levels.append(measure['value']) return dates, levels diff --git a/floodsystem/flood.py b/floodsystem/flood.py new file mode 100644 index 0000000..3b1303c --- /dev/null +++ b/floodsystem/flood.py @@ -0,0 +1,55 @@ +"""This module provides a set of functions that determine the likelhihood of a flood occuring at +the locations of the DEFRA Monitoring Stations + +""" + +from floodsystem.utils import sorted_by_key # noqa + + +def stations_level_over_threshold(stations, tol=None): + """Returns a list of tuples containing station objects and relative water level + + Args: + stations (list[tuples]): Monitoring Station Objects + tol ([float, None]): Threshold Value which defaults to None + + Returns: + [list(tuples)]: list of tuples containing station objects and relative water level, sorted in descending order + """ + stations_over_threshold = [] + for station in stations: + relative_water_level = station.relative_water_level() + if relative_water_level is not None and (tol is None or relative_water_level > tol): + stations_over_threshold.append((station, relative_water_level)) + return sorted_by_key(stations_over_threshold, 1, reverse=True) + + +def stations_highest_rel_level(stations, N): + """Returns a list of N tuples containing stations(objects) and their relative water level + + Args: + stations (list[tuples]): Monitoring Station Objects + N ([float]): Number of water station + + Returns: + [list(tuples)]: list of N tuples containing station objects and relative water level, sorted in descending order + """ + # sorted_list = stations_level_over_threshold(stations) + # unable to filter out extreme value + + stations_rel_level = [] + + for station in stations: + # check if water level consistent + rel_level = station.relative_water_level() + + # check if water level reasonable + if rel_level is not None: + + # check if current water level not extreme + if rel_level <= 30: + stations_rel_level.append((station, rel_level)) + + sorted_list = sorted(stations_rel_level, key=lambda x: x[1], reverse=True) + + return sorted_list[:N] diff --git a/floodsystem/geo.py b/floodsystem/geo.py index e367087..f8cd3a8 100644 --- a/floodsystem/geo.py +++ b/floodsystem/geo.py @@ -6,4 +6,118 @@ """ -from .utils import sorted_by_key # noqa +from floodsystem.utils import sorted_by_key # noqa +from haversine import haversine, Unit +from collections import Counter + + +def stations_by_distance(stations, p): + """ Computes the distance from reference point (p) and the + coordinates of a station using haversine function. This distance is appended to a list + which is then sorted by distance. + + Args: + stations [list]: List of MonitoringStation objects + p [float]: [description] + + Returns: + [list[tuples]]: Sorted list of tuples containing station name, town, distance. + """ + station_by_distance = [] + for station in stations: + station_by_distance.append((station, haversine(station.coord, p, unit=Unit.KILOMETERS))) + + return sorted_by_key(station_by_distance, 1) + + +def stations_within_radius(stations, centre, r): + """returns a list of all stations within radius r of a geographic coordinate x + + Args: + stations [list]: List of MonitoringStation objects + centre [tuple]: geographic point to measure relative distances from + r ([float]): radius from a given point; computed using haversine formula + + Returns: + [list]: returns a list of station names in alphabetical order + """ + stations_within_radius = [] + for station in stations: + if r >= haversine(station.coord, centre, unit=Unit.KILOMETERS): + stations_within_radius.append(station.name) + + return sorted(stations_within_radius) + + +def rivers_with_station(stations): + """ Function which given a list of station objects returns a list containing + the names of the rivers with at least 1 monitoring station + + Args: + stations [list]: List of MonitoringStation objects + + Returns: + [list]: returns a unique list of stations with at least one monitoring station + in alphabetical order + """ + rivers_with_station_set = {station.river for station in stations} + rivers_with_station_list = [] + for name in rivers_with_station_set: + rivers_with_station_list.append(name) + + return sorted(rivers_with_station_list) + + +def stations_by_river(stations): + """Returns a dictionary which maps the river names to a list of station objects for the given + river + + Args: + stations ([list]): List of MonitoringStation objects + + Returns: + [dictionary]: returns a dictionary which maps the river name (key) to list of station objects (value) + """ + stations_by_river_dict = {} + station = stations + for i in range(len(stations)): + if not station[i].river in stations_by_river_dict: + stations_by_river_dict[station[i].river] = [] + stations_by_river_dict[station[i].river].append(station[i].name) + else: + stations_by_river_dict[station[i].river].append(station[i].name) + + return stations_by_river_dict + + +def rivers_by_station_number(stations, N): + """ Determines the N rivers with the greatest number of monitoring station, + outputted as a list of tuples containg the river name and the number of stations + associated with the river. + + Args: + stations [list]: List of MonitoringStation objects + N [integer]: N rivers with the greatest number of monitoring stations + + Returns: + [list[tuples]]: Sorted list of tuples containing river name, number of stations + """ + assert N > 0, "N (rivers) cannot be 0 or negative number" + # Counting the number of stations associated with each river + # The Counter will return a dict e.g. {'river1' : 10, 'river2': 8, etc.} + station_rivers_count = Counter([station.river for station in stations]) + # station_rivers_count.items() returns a list of tuples e.g. [('river1',10), ('river2',8), etc.] + # We then iterate through to obtain a list of unique values for the number of stations i.e no repeated values + stations_count = sorted(list(set([v for k, v in station_rivers_count.items()]))) + # We produce a list of tuples e.g. [('river1',10,5),('river2',8,2)] + # Tuple contains river name, number of stations, rank + # index(v)+1 sets the rank to start at value 1 instead of 0 + station_rivers_rank = [(k, v, stations_count.index(v) + 1) + for k, v in station_rivers_count.items()] + # start_index is number of unique values present e.g. 25 + start_index = len(set(stations_count)) + # result outputs a list of tuples containing the river name and number of associated stations + # number of tuples is dependent on the value of N and the number of rivers with equal number of stations + result = [(k, v) for k, v, r in station_rivers_rank if r > start_index - N] + # returns sorted list in descending order + return sorted_by_key(result, 1, reverse=True) diff --git a/floodsystem/plot.py b/floodsystem/plot.py new file mode 100644 index 0000000..a5e2fd3 --- /dev/null +++ b/floodsystem/plot.py @@ -0,0 +1,81 @@ +""" This modules contains a collection of functions to plot water levels data""" + +import matplotlib.pyplot as plt +import matplotlib +from bokeh.plotting import figure, output_file, show +import numpy as np +from floodsystem.analysis import polyfit + + +def plot_water_levels(station, dates, levels): + + # Plot + plt.plot(dates, levels) + plt.plot(dates, np.full(len(dates), station.typical_range[0]), label="Low", color='tab:green') + plt.plot(dates, np.full(len(dates), station.typical_range[1]), label="High", color='tab:red') + plt.legend(loc='center left') + + # Add axis labels, rotate date labels and add plot title + plt.xlabel('date') + plt.ylabel('water level (m)') + plt.xticks(rotation=45) + plt.title(f"Station {station.name}") + + # Display plot + plt.tight_layout() # This makes sure plot does not cut off date labels + + plt.show() + + +def plot_water_levels_bokeh(station, dates, levels): + + # output to static HTML file + output_file("water_levels_bokeh.html") + + # create a new plot with a title and axis labels + p = figure(title=f"Station {station.name}", x_axis_label='date', y_axis_label='water level (m)') + + # add a line renderer with legend and line thickness + p.line(dates, levels, legend_label="Water Level", line_width=2) + p.line(dates, np.full(len(dates), station.typical_range[1]), legend_label="High", line_width=2, color='red') + p.line(dates, np.full(len(dates), station.typical_range[0]), legend_label="Low", line_width=2, color='green') + # show the results + show(p) + + +def plot_water_level_with_fit(station, dates, levels, p): + """Plots the polynomial regression line of order p + + Args: + station ([MonitoringStation]): Monitoring Station with a series of attributes + dates ([date]): List of dates + levels ([list(floats)]): List of water levels for monitoring stations + p ([int]): order of regression polynomial + """ + poly, d0 = polyfit(dates, levels, p) + x = matplotlib.dates.date2num(dates) + y = levels + plt.plot(x, y, '.') + # Plot polynomial fit at 30 points along interval (note that polynomial + # is evaluated using the shift x) + x1 = np.linspace(x[0], x[-1], 30) + # Get the current figure for re-sizing + fig = plt.gcf() + fig.set_size_inches(12, 7, forward=True) + # Get the current axis to get the date formatter + ax = plt.gca() + hfmt = matplotlib.dates.DateFormatter('%d/%m/%y %H:%M') + # Set the date formatter for the x-axis + ax.xaxis.set_major_formatter(hfmt) + ax.plot(x1, poly(x1 - x[0]), label="Regression Line") + ax.plot(x1, np.full(len(x1), station.typical_range[0]), + label="Low", color='tab:green') + ax.plot(x1, np.full(len(x1), station.typical_range[1]), + label="High", color='tab:red') + plt.xlabel('Dates (DD/MM/YY HH:MI') + plt.xticks(rotation=30) + plt.ylabel("Water Level (m)") + plt.legend(loc='center left') + plt.title(station.name) + # Display plot + plt.show() diff --git a/floodsystem/station.py b/floodsystem/station.py index cee0c85..46f485a 100644 --- a/floodsystem/station.py +++ b/floodsystem/station.py @@ -11,7 +11,7 @@ class MonitoringStation: """This class represents a river level monitoring station""" def __init__(self, station_id, measure_id, label, coord, typical_range, - river, town): + river, town, latest_level=None): self.station_id = station_id self.measure_id = measure_id @@ -27,7 +27,7 @@ def __init__(self, station_id, measure_id, label, coord, typical_range, self.river = river self.town = town - self.latest_level = None + self.latest_level = latest_level def __repr__(self): d = "Station name: {}\n".format(self.name) @@ -38,3 +38,49 @@ def __repr__(self): d += " river: {}\n".format(self.river) d += " typical range: {}".format(self.typical_range) return d + + def typical_range_consistent(self): + """ Method which checks typical high/low range data for consistency. If data is consistent + method returns True, if data is inconsistent method returns False. + + Returns: + [boolean]: Returns True if data is consistent; Returns False if data is None or inconsistent + """ + if self.typical_range is None: + return False + elif self.typical_range[0] is None or self.typical_range[1] is None: + return False + elif self.typical_range[0] >= self.typical_range[1]: + return False + else: + return True + + def relative_water_level(self): + """Returns latest water level as a fraction of the typical range + e.g. 1.0 corresponds to a level equalling typical_high + 0.0 corresponds to a level equalling typical_low + + Returns: + [float]: water level as a fraction of the typical range + """ + # Test + if self.typical_range_consistent() is True and self.latest_level is not None: + return (self.latest_level - self.typical_range[0]) / (self.typical_range[1] - self.typical_range[0]) + + +def inconsistent_typical_range_stations(stations): + """ Function that takes a list of station objects and returns a list containing + the station names of stations where data is inconsistent or missing (None) + + Args: + stations [list[tuples]]: list of station objects + + Returns: + [list[strings]]: Returns a list of station names + """ + result = [] + for station in stations: + if not station.typical_range_consistent(): + result.append(station.name) + + return result diff --git a/floodsystem/stationdata.py b/floodsystem/stationdata.py index 1e3e61e..1c7b4df 100644 --- a/floodsystem/stationdata.py +++ b/floodsystem/stationdata.py @@ -1,11 +1,11 @@ # Copyright (C) 2018 Garth N. Wells # # SPDX-License-Identifier: MIT -"""This module provides interface for extracting statiob data from +"""This module provides interface for extracting station data from JSON objects fetched from the Internet and """ - +import datetime from . import datafetcher from .station import MonitoringStation @@ -87,3 +87,18 @@ def update_water_levels(stations): if station.measure_id in measure_id_to_value: if isinstance(measure_id_to_value[station.measure_id], float): station.latest_level = measure_id_to_value[station.measure_id] + + +def get_historical_water_levels(station, num_days): + """Utility function that gets historical water level data from the + data fetcher. + + Args: + station ([MonitoringStation]): An instance of a MonitoringStation + num_days ([int]): Number of days to retrieve historical data + + Returns: + [tuple]: returns list of dates and list of water-levels (m). + """ + dates, levels = datafetcher.fetch_measure_levels(station.measure_id, dt=datetime.timedelta(days=num_days)) + return dates, levels diff --git a/floodsystem/utils.py b/floodsystem/utils.py index f049d01..27b5264 100644 --- a/floodsystem/utils.py +++ b/floodsystem/utils.py @@ -12,7 +12,7 @@ def sorted_by_key(x, i, reverse=False): Sort on first entry of tuple: - > sorted_by_key([(1, 2), (5, 1]), 0) + > sorted_by_key([(1, 2), (5, 1)], 0) >>> [(1, 2), (5, 1)] Sort on second entry of tuple: diff --git a/setup.py b/setup.py index a3dd507..1fae6ba 100644 --- a/setup.py +++ b/setup.py @@ -3,4 +3,5 @@ version='0.1', description='CUED Part IA flood warning system exercise', packages=['floodsystem'], + install_requires=['haversine'] ) diff --git a/test_analysis.py b/test_analysis.py new file mode 100644 index 0000000..f3d963d --- /dev/null +++ b/test_analysis.py @@ -0,0 +1,12 @@ +from datetime import datetime +import numpy as np +import matplotlib.pyplot as plt # noqa +from floodsystem.analysis import polyfit + + +def test_polyfit(): + + dates = [datetime(2016, 12, 30), datetime(2016, 12, 31), datetime(2017, 1, 1)] + levels = [0.2, 0.7, 0.95] + actual = polyfit(dates, levels, 1) + assert isinstance(actual[0], np.poly1d) diff --git a/test_flood.py b/test_flood.py new file mode 100644 index 0000000..d05474d --- /dev/null +++ b/test_flood.py @@ -0,0 +1,51 @@ +from floodsystem.station import MonitoringStation +from floodsystem.flood import stations_level_over_threshold +from floodsystem.flood import stations_highest_rel_level + + +def create_test_station(s_id=None, + m_id=None, + label=None, + coord=None, + trange=None, + river=None, + town=None, + latest_level=None): + return MonitoringStation(s_id, m_id, label, coord, trange, river, town, latest_level) + + +def test_stations_level_over_threshold(): + # Create Station 1 + s1 = create_test_station(label='station1', trange=(1, 2), latest_level=1.5) + s2 = create_test_station(label='station2', trange=(1, 2), latest_level=1) + s3 = create_test_station(label='station3', trange=(1, 2), latest_level=1.2) + s4 = create_test_station(label='station4', trange=(1, 2), latest_level=2) + + stations = [s1, s2, s3, s4] + + actual = stations_level_over_threshold(stations, 0.1) + actual_names = [] + for station, level in actual: + actual_names.append(station.name) + expected_names = ['station4', 'station1', 'station3'] + assert actual_names == expected_names + + +def test_stations_highest_rel_level(): + # create stations + s1 = create_test_station(label='station 1', trange=(0.1, 0.2), latest_level=1.5) + s2 = create_test_station(label='station 2', trange=(0.1, 0.5), latest_level=1.5) + s3 = create_test_station(label='station 3', trange=(0.2, 0.4), latest_level=1.5) + s4 = create_test_station(label='station 4', trange=(0.1, 0.7), latest_level=1.5) + s5 = create_test_station(label='station 5', trange=(0.8, 0.9), latest_level=1.5) + s6 = create_test_station(label='station 6', trange=(0.1, 0.9), latest_level=1.5) + + stations = [s1, s2, s3, s4, s5, s6] + + output_station = stations_highest_rel_level(stations, 5) + actual_N_rivers = [] + # test for stations + for station, level in output_station: + actual_N_rivers.append(station.name) + expected_N_rivers = ['station 1', 'station 5', 'station 3', 'station 2', 'station 4'] + assert actual_N_rivers == expected_N_rivers diff --git a/test_geo.py b/test_geo.py new file mode 100644 index 0000000..409170c --- /dev/null +++ b/test_geo.py @@ -0,0 +1,143 @@ +import numpy as np +from mock import Mock +from floodsystem.station import MonitoringStation +from floodsystem.geo import (stations_by_distance, rivers_by_station_number, stations_within_radius, + stations_by_river, rivers_with_station) +import pytest + + +def create_mock_station(**kwargs): + mock = Mock(spec=MonitoringStation, **kwargs) + mock.name = kwargs.get('label') + return mock + + +# test for 1B +def test_stations_by_distance(): + # Create the first station + s1 = create_mock_station(coord=(50.8167, -0.2667), river='Adur') + + # Create the second station + s2 = create_mock_station(coord=(51.5855, -0.616), river='Thames') + + ref_point = (51.0017, -2.6363) + + stations = [s1, s2] + + sorted_pairs = stations_by_distance(stations, ref_point) + # test for rivers + actual_sorted_rivers = [x.river for x, distance in sorted_pairs] + expected_sorted_rivers = ['Thames', 'Adur'] + assert actual_sorted_rivers == expected_sorted_rivers + + # test for distances + actual_sorted_distances = [distance for x, distance in sorted_pairs] + expected_distances = [154.7439, 167.404] + assert np.allclose(actual_sorted_distances, expected_distances) + + +# test for 1C +def test_stations_within_radius(): + # Create station 1 + s1 = create_mock_station(coord=(50.8167, -0.2667), label='station 1') + + # Create station 2 + s2 = create_mock_station(coord=(51.5855, -0.616), label='station 2') + + centre_coord = (51.0017, -2.6363) + + radius_from_centre = 200 + + stations = [s1, s2] + + actual_sorted_rivers = stations_within_radius(stations, centre_coord, radius_from_centre) + # test for stations + expected_sorted_rivers = ['station 1', 'station 2'] + assert actual_sorted_rivers == expected_sorted_rivers + + +# test for 1D +def test_rivers_with_station(): + # This is looks like a dodgy test as we are creating stations with the same co-ordinates but with different names + s1 = create_mock_station(coord=(50.8167, -0.2667), river="Adur") + + s2 = create_mock_station(coord=(50.8167, -0.2667), river="Adur") + + s3 = create_mock_station(coord=(51.5855, -0.616), river="Thames") + + s4 = create_mock_station(coord=(51.5855, -0.616), river="Ail") + + s5 = create_mock_station(coord=(51.5855, -0.616), river="Ail") + + stations = [s1, s2, s3, s4, s5] + + sorted_pairs = rivers_with_station(stations) + # test for length + actual_sorted_length = len(sorted_pairs) + expected_sorted_length = 3 + assert actual_sorted_length == expected_sorted_length + + # test for river + actual_sorted_rivers = sorted_pairs + expected_sorted_rivers = ['Adur', 'Ail', 'Thames'] + assert actual_sorted_rivers == expected_sorted_rivers + + +def test_stations_by_river(): + + s1 = create_mock_station(coord=(50.9167, -0.2687), river="Adur", label="station 1") + + s2 = create_mock_station(coord=(50.8167, -0.2667), river="Adur", label="station 2") + + s3 = create_mock_station(coord=(51.5855, -0.616), river="Thames", label="station 3") + + s4 = create_mock_station(coord=(51.5855, -0.6126), river="Ail", label="station 4") + + stations = [s1, s2, s3, s4] + + stations_dict = stations_by_river(stations) + # test for stations + actual_sorted_rivers = sorted(stations_dict['Adur']) + expected_sorted_rivers = ['station 1', 'station 2'] + assert actual_sorted_rivers == expected_sorted_rivers + + +def test_rivers_by_station_number(): + # Create a list of mock classes + rivers = ['a'] * 10 + ['b'] * 10 + ['c'] * 5 + ['d'] * 20 + ['e'] * 20 + ['f'] * 21 + stations = [] + for river in rivers: + stations.append(create_mock_station(river=river)) + N = 2 + expected_output = [('f', 21), ('d', 20), ('e', 20)] + actual_output = rivers_by_station_number(stations, N) + # test for sorted list + assert actual_output == expected_output + + +def test_rivers_by_station_number_with_negative_N(): + # Create a list of mock classes + stations = [] + N = -5 + # I found here how to assert when my function raises an exception with N<0 + # https://stackoverflow.com/questions/23337471/how-to-properly-assert-that-an-exception-gets-raised-in-pytest + with pytest.raises(AssertionError): + rivers_by_station_number(stations, N) + + # Testing with N = 0 + N = 0 + with pytest.raises(AssertionError): + rivers_by_station_number(stations, N) + + +def test_rivers_by_station_number_with_max_N(): + # Create a list of mock classes + rivers = ['a'] * 10 + ['b'] * 10 + ['c'] * 5 + ['d'] * 20 + ['e'] * 20 + ['f'] * 21 + stations = [] + for river in rivers: + stations.append(create_mock_station(river=river)) + N = 4 + expected_output = [('f', 21), ('d', 20), ('e', 20), ('a', 10), ('b', 10), ('c', 5)] + actual_output = rivers_by_station_number(stations, N) + # test for sorted list + assert actual_output == expected_output diff --git a/test_station.py b/test_station.py index 5cd7836..caf808c 100644 --- a/test_station.py +++ b/test_station.py @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: MIT """Unit test for the station module""" - -from floodsystem.station import MonitoringStation +import pytest +from floodsystem.station import MonitoringStation, inconsistent_typical_range_stations def test_create_monitoring_station(): @@ -25,3 +25,127 @@ def test_create_monitoring_station(): assert s.typical_range == trange assert s.river == river assert s.town == town + + +def test_typical_range_consistent_inconsistent_data(): + # Create a station + s_id = "test-s-id" + m_id = "test-m-id" + label = "some station" + coord = (-2.0, 4.0) + trange = (5, 2) + river = "River X" + town = "My Town" + s1 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + assert not s1.typical_range_consistent() + + +def test_typical_range_consistent_consistent_data(): + # Create a station + s_id = "test-s-id" + m_id = "test-m-id" + label = "some station" + coord = (-2.0, 4.0) + trange = (2, 4) + river = "River X" + town = "My Town" + s2 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + assert s2.typical_range_consistent() + + +def test_typical_range_consistent_none_data(): + # Create a station + s_id = "test-s-id" + m_id = "test-m-id" + label = "some station" + coord = (-2.0, 4.0) + trange = None + river = "River X" + town = "My Town" + s3 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + assert not s3.typical_range_consistent() + + +def test_typical_range_consistent_if_one_is_none_data(): + # Create a station + s_id = "test-s-id" + m_id = "test-m-id" + label = "some station" + coord = (-2.0, 4.0) + trange = (0, None) + river = "River X" + town = "My Town" + s4 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + assert not s4.typical_range_consistent() + + +def test_inconsistent_typical_range_stations_all_inconsistent(): + # Create a first station + s_id = "test-sid1" + m_id = "test-m-id" + label = "a" + coord = (-2.0, 4.0) + trange = (2.0, -4) + river = "River X" + town = "My Town" + s1 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + # Create a second station + s_id = "test-sid2" + m_id = "test-m-id" + label = "b" + coord = (-2.0, 4.0) + trange = (-3, None) + river = "River X" + town = "My Town" + s2 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + + stations = [s1, s2] + expected = ["a", "b"] + actual = inconsistent_typical_range_stations(stations) + assert actual == expected + + +def test_inconsistent_typical_range_stations_all_consistent(): + # Create a first station + s_id = "test-sid1" + m_id = "test-m-id" + label = "a" + coord = (-2.0, 4.0) + trange = (2.0, 4) + river = "River X" + town = "My Town" + s1 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + # Create a second station + s_id = "test-sid2" + m_id = "test-m-id" + label = "b" + coord = (-2.0, 4.0) + trange = (3, 9) + river = "River X" + town = "My Town" + s2 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + + stations = [s1, s2] + expected = [] + actual = inconsistent_typical_range_stations(stations) + assert actual == expected + + +@pytest.mark.parametrize("trange,latest_level,expected", [((1, 2), 1.5, 0.5), + ((None, 2), 1.5, None), + ((1, 2), None, None), + ((1, 1), 1.5, None), + ((2, 1), 1.5, None)]) +def test_relative_water_level(trange, latest_level, expected): + # Create a first station + s_id = "test-sid1" + m_id = "test-m-id" + label = "a" + coord = (-2.0, 4.0) + river = "River X" + town = "My Town" + s1 = MonitoringStation(s_id, m_id, label, coord, trange, river, town) + s1.latest_level = latest_level + actual = s1.relative_water_level() + + assert actual == expected diff --git a/water_levels_bokeh.html b/water_levels_bokeh.html new file mode 100644 index 0000000..d69970b --- /dev/null +++ b/water_levels_bokeh.html @@ -0,0 +1,85 @@ + + + + + + + + + + + Bokeh Plot + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + \ No newline at end of file