diff --git a/week04/quakes.py b/week04/quakes.py new file mode 100644 index 0000000..9bf160b --- /dev/null +++ b/week04/quakes.py @@ -0,0 +1,101 @@ + +import json +import requests +import numpy as np +import pandas as pd +import inflect +import matplotlib.pyplot as plt +from datetime import datetime +from scipy import stats + +# We use inflect to convert 1 into "1st", 2 into "2nd" etc +integer_engine = inflect.engine() + + +################################################################### +# This code is split into 3 sections +# 1. Download data and parse it +# 2. Find and print strongest earthquakes +# 3. Plot details of earthquake size and mean magnitude by year +################################################################### + +################################################################### +## Section 1 - load data and parse it into a dataframe +################################################################### + +# Load the data from the USGS earthquake service +quakes = requests.get("http://earthquake.usgs.gov/fdsnws/event/1/query.geojson", + params={ + 'starttime': "2000-01-01", + "maxlatitude": "58.723", + "minlatitude": "50.008", + "maxlongitude": "1.67", + "minlongitude": "-9.756", + "minmagnitude": "1", + "endtime": datetime.today().strftime('%Y-%m-%d'), + "orderby": "time-asc"} + ) + +# Convert the result into a dictionary +quakes_object = json.loads(quakes.text) + +# Now convert all the earthquakes into a pandas dataframe +quakes_dataframe = pd.json_normalize(quakes_object['features']) + +################################################################### +## Section 2 - find and print strongest earthquakes +################################################################### + +# Get the row in the dataframe that corresponds to the strongest quake +max_quake = quakes_dataframe[quakes_dataframe['properties.mag'] == quakes_dataframe['properties.mag'].max()] + +# Reset the index so the row numbers start from 0 +max_quake = max_quake.reset_index(drop=True) + +# Print all the strongest earthquakes +for index, quake in max_quake.iterrows(): + # Print the strongest earthquake + print(f"The maximum magnitude is {quake['properties.mag']} " + f"and it occured for the {integer_engine.ordinal(index + 1)} time at ({', '.join(str(x) for x in quake['geometry.coordinates'][0:2])}) at {quake['geometry.coordinates'][2]} kilometres deep.") + +################################################################### +## Section 3 - plot by year, the number and mean quake magnitude +################################################################### + +# Convert milisecond unix timestamp to datetime +quakes_dataframe['properties.time'] = quakes_dataframe['properties.time'].apply(lambda x: datetime.fromtimestamp(x/1000)) + +# Group by year and get the mean magnitude and number of magnitudes +yearly_quakes = quakes_dataframe['properties.mag'].groupby(quakes_dataframe['properties.time'].dt.year).agg(['mean', 'count', stats.sem]) + +# Get list of years +years = yearly_quakes.index.tolist(); + +# Take every 5th year for ticks +xtick_years = np.append(np.arange(np.min(years), np.max(years), step=5), np.max(years)) + +# Plot mean magnitude of quakes each year +f = plt.figure() +plt.errorbar(years, yearly_quakes['mean'], yearly_quakes['sem'], capsize=3) + +# Configure axes +plt.xticks(xtick_years) +plt.xlabel('Year', fontsize=18) +plt.ylabel('Mean magnitude (Richter)', fontsize=16) + +# Show, and save as PDF +plt.show() +f.savefig("quake_magnitude_by_year.pdf", bbox_inches='tight') + +# Plot number of quakes per year +f = plt.figure() +plt.plot(years, yearly_quakes['count']) + +# Configure axes +plt.xticks(xtick_years) +plt.xlabel('Year', fontsize=18) +plt.ylabel('Number of quakes', fontsize=16) + +# Show, and save as PDF +plt.show() +f.savefig("quake_count_by_year.pdf", bbox_inches='tight') diff --git a/week04/requirements.txt b/week04/requirements.txt new file mode 100644 index 0000000..025227e --- /dev/null +++ b/week04/requirements.txt @@ -0,0 +1,5 @@ +numpy +scipy +pandas +inflect +datetime diff --git a/week05-testing/test_times.py b/week05-testing/test_times.py new file mode 100644 index 0000000..aba3b86 --- /dev/null +++ b/week05-testing/test_times.py @@ -0,0 +1,43 @@ +import times +import pytest + +class TestTimeOverlap: + """TestTimeOverlap class contains all functions involved in + testing the overlap of two times based on example UCL RITS code""" + + def test_given_input(self): + """Tests hardcoded input given as part of the example problem""" + result = times.compute_overlap_time(times.large, times.short) + expected = [('2010-01-12 10:30:00', '2010-01-12 10:37:00'), ('2010-01-12 10:38:00', '2010-01-12 10:45:00')] + assert result == expected + + def test_non_overlapping_times(self): + """Tests two time ranges that do not overlap""" + t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00") + t2 = times.time_range("2010-01-12 12:30:00", "2010-01-12 12:45:00") + result = times.compute_overlap_time(t1, t2) + expected = [] + assert result == expected + + def test_several_intervals(self): + """Tests intervals where two intervals overlap""" + t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00", 2, 0) + t2 = times.time_range("2010-01-12 10:30:00", "2010-01-12 11:30:00", 2, 0) + result = times.compute_overlap_time(t1, t2) + expected = [('2010-01-12 10:30:00', '2010-01-12 11:00:00'),('2010-01-12 11:00:00', '2010-01-12 11:30:00')] + assert result == expected + + def test_touching_times(self): + """Tests intervals where one finishes at the same time another starts""" + t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00") + t2 = times.time_range("2010-01-12 12:00:00", "2010-01-12 12:30:00") + result = times.compute_overlap_time(t1, t2) + expected = [] + assert result == expected + + def test_negative_time_interval(self): + """Tests a time range which goes backwards""" + t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00") + t2 = times.time_range("2010-01-12 11:00:00", "2010-01-12 10:30:00") + with pytest.raises(ValueError): + times.compute_overlap_time(t1, t2) diff --git a/week05-testing/test_times_parameterised.py b/week05-testing/test_times_parameterised.py new file mode 100644 index 0000000..ca2f887 --- /dev/null +++ b/week05-testing/test_times_parameterised.py @@ -0,0 +1,28 @@ +import times +import pytest + +class TestTimeOverlap: + """TestTimeOverlap class contains all functions involved in + testing the overlap of two times based on example UCL RITS code""" + + @pytest.mark.parametrize("time_range_1, time_range_2, expected", [ + # Test hardcoded input from example problem + (times.large, times.short, [('2010-01-12 10:30:00', '2010-01-12 10:37:00'), ('2010-01-12 10:38:00', '2010-01-12 10:45:00')]), + # Tests two time ranges that do not overlap + (times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00"), times.time_range("2010-01-12 12:30:00", "2010-01-12 12:45:00"), []), + # Tests intervals where each has several intervals and two sub-intervals overlap + (times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00", 2, 0), times.time_range("2010-01-12 10:30:00", "2010-01-12 11:30:00", 2, 0), [('2010-01-12 10:30:00', '2010-01-12 11:00:00'),('2010-01-12 11:00:00', '2010-01-12 11:30:00')]), + # Tests two intervals where one finishes at exactly the same time another started + (times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00"), times.time_range("2010-01-12 12:00:00", "2010-01-12 12:30:00"), []) + ]) + def test_given_input(self, time_range_1, time_range_2, expected): + """Tests hardcoded input given as part of the example problem""" + result = times.compute_overlap_time(time_range_1, time_range_2) + assert result == expected + + def test_negative_time_interval(self): + """Tests a time range which goes backwards""" + t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00") + t2 = times.time_range("2010-01-12 11:00:00", "2010-01-12 10:30:00") + with pytest.raises(ValueError): + times.compute_overlap_time(t1, t2) diff --git a/week05-testing/times.py b/week05-testing/times.py new file mode 100644 index 0000000..4ad010a --- /dev/null +++ b/week05-testing/times.py @@ -0,0 +1,31 @@ +import datetime + + +def time_range(start_time, end_time, number_of_intervals=1, gap_between_intervals_s=0): + start_time_s = datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") + end_time_s = datetime.datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S") + d = (end_time_s - start_time_s).total_seconds() / number_of_intervals + gap_between_intervals_s * (1 / number_of_intervals - 1) + sec_range = [(start_time_s + datetime.timedelta(seconds=i * d + i * gap_between_intervals_s), + start_time_s + datetime.timedelta(seconds=(i + 1) * d + i * gap_between_intervals_s)) + for i in range(number_of_intervals)] + return [(ta.strftime("%Y-%m-%d %H:%M:%S"), tb.strftime("%Y-%m-%d %H:%M:%S")) for ta, tb in sec_range] + + +def compute_overlap_time(range1, range2): + overlap_time = [] + for start1, end1 in range1: + for start2, end2 in range2: + if start1 > end1 or start2 > end2: + raise ValueError("Start time cannot be bigger than end time") + low = max(start1, start2) + high = min(end1, end2) + if low != high and low < high: + overlap_time.append((low, high)) + return overlap_time + +# Define two time ranges +large = time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00") +short = time_range("2010-01-12 10:30:00", "2010-01-12 10:45:00", 2, 60) + +if __name__ == "__main__": + print(compute_overlap_time(large, short))