Skip to content

Commit

Permalink
Merge branch 'main' of github.com:twemyss/rse-classwork-2020 into mai…
Browse files Browse the repository at this point in the history
…n. Answers UCL-RITS#90
  • Loading branch information
twemyss committed Nov 5, 2020
2 parents 4e4a684 + bd77284 commit 3a71a34
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 0 deletions.
101 changes: 101 additions & 0 deletions week04/quakes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@

import json
import requests
import numpy as np
import pandas as pd
import inflect
import matplotlib.pyplot as plt
from datetime import datetime
from scipy import stats

# We use inflect to convert 1 into "1st", 2 into "2nd" etc
integer_engine = inflect.engine()


###################################################################
# This code is split into 3 sections
# 1. Download data and parse it
# 2. Find and print strongest earthquakes
# 3. Plot details of earthquake size and mean magnitude by year
###################################################################

###################################################################
## Section 1 - load data and parse it into a dataframe
###################################################################

# Load the data from the USGS earthquake service
quakes = requests.get("http://earthquake.usgs.gov/fdsnws/event/1/query.geojson",
params={
'starttime': "2000-01-01",
"maxlatitude": "58.723",
"minlatitude": "50.008",
"maxlongitude": "1.67",
"minlongitude": "-9.756",
"minmagnitude": "1",
"endtime": datetime.today().strftime('%Y-%m-%d'),
"orderby": "time-asc"}
)

# Convert the result into a dictionary
quakes_object = json.loads(quakes.text)

# Now convert all the earthquakes into a pandas dataframe
quakes_dataframe = pd.json_normalize(quakes_object['features'])

###################################################################
## Section 2 - find and print strongest earthquakes
###################################################################

# Get the row in the dataframe that corresponds to the strongest quake
max_quake = quakes_dataframe[quakes_dataframe['properties.mag'] == quakes_dataframe['properties.mag'].max()]

# Reset the index so the row numbers start from 0
max_quake = max_quake.reset_index(drop=True)

# Print all the strongest earthquakes
for index, quake in max_quake.iterrows():
# Print the strongest earthquake
print(f"The maximum magnitude is {quake['properties.mag']} "
f"and it occured for the {integer_engine.ordinal(index + 1)} time at ({', '.join(str(x) for x in quake['geometry.coordinates'][0:2])}) at {quake['geometry.coordinates'][2]} kilometres deep.")

###################################################################
## Section 3 - plot by year, the number and mean quake magnitude
###################################################################

# Convert milisecond unix timestamp to datetime
quakes_dataframe['properties.time'] = quakes_dataframe['properties.time'].apply(lambda x: datetime.fromtimestamp(x/1000))

# Group by year and get the mean magnitude and number of magnitudes
yearly_quakes = quakes_dataframe['properties.mag'].groupby(quakes_dataframe['properties.time'].dt.year).agg(['mean', 'count', stats.sem])

# Get list of years
years = yearly_quakes.index.tolist();

# Take every 5th year for ticks
xtick_years = np.append(np.arange(np.min(years), np.max(years), step=5), np.max(years))

# Plot mean magnitude of quakes each year
f = plt.figure()
plt.errorbar(years, yearly_quakes['mean'], yearly_quakes['sem'], capsize=3)

# Configure axes
plt.xticks(xtick_years)
plt.xlabel('Year', fontsize=18)
plt.ylabel('Mean magnitude (Richter)', fontsize=16)

# Show, and save as PDF
plt.show()
f.savefig("quake_magnitude_by_year.pdf", bbox_inches='tight')

# Plot number of quakes per year
f = plt.figure()
plt.plot(years, yearly_quakes['count'])

# Configure axes
plt.xticks(xtick_years)
plt.xlabel('Year', fontsize=18)
plt.ylabel('Number of quakes', fontsize=16)

# Show, and save as PDF
plt.show()
f.savefig("quake_count_by_year.pdf", bbox_inches='tight')
5 changes: 5 additions & 0 deletions week04/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
numpy
scipy
pandas
inflect
datetime
43 changes: 43 additions & 0 deletions week05-testing/test_times.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import times
import pytest

class TestTimeOverlap:
"""TestTimeOverlap class contains all functions involved in
testing the overlap of two times based on example UCL RITS code"""

def test_given_input(self):
"""Tests hardcoded input given as part of the example problem"""
result = times.compute_overlap_time(times.large, times.short)
expected = [('2010-01-12 10:30:00', '2010-01-12 10:37:00'), ('2010-01-12 10:38:00', '2010-01-12 10:45:00')]
assert result == expected

def test_non_overlapping_times(self):
"""Tests two time ranges that do not overlap"""
t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00")
t2 = times.time_range("2010-01-12 12:30:00", "2010-01-12 12:45:00")
result = times.compute_overlap_time(t1, t2)
expected = []
assert result == expected

def test_several_intervals(self):
"""Tests intervals where two intervals overlap"""
t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00", 2, 0)
t2 = times.time_range("2010-01-12 10:30:00", "2010-01-12 11:30:00", 2, 0)
result = times.compute_overlap_time(t1, t2)
expected = [('2010-01-12 10:30:00', '2010-01-12 11:00:00'),('2010-01-12 11:00:00', '2010-01-12 11:30:00')]
assert result == expected

def test_touching_times(self):
"""Tests intervals where one finishes at the same time another starts"""
t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00")
t2 = times.time_range("2010-01-12 12:00:00", "2010-01-12 12:30:00")
result = times.compute_overlap_time(t1, t2)
expected = []
assert result == expected

def test_negative_time_interval(self):
"""Tests a time range which goes backwards"""
t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00")
t2 = times.time_range("2010-01-12 11:00:00", "2010-01-12 10:30:00")
with pytest.raises(ValueError):
times.compute_overlap_time(t1, t2)
28 changes: 28 additions & 0 deletions week05-testing/test_times_parameterised.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import times
import pytest

class TestTimeOverlap:
"""TestTimeOverlap class contains all functions involved in
testing the overlap of two times based on example UCL RITS code"""

@pytest.mark.parametrize("time_range_1, time_range_2, expected", [
# Test hardcoded input from example problem
(times.large, times.short, [('2010-01-12 10:30:00', '2010-01-12 10:37:00'), ('2010-01-12 10:38:00', '2010-01-12 10:45:00')]),
# Tests two time ranges that do not overlap
(times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00"), times.time_range("2010-01-12 12:30:00", "2010-01-12 12:45:00"), []),
# Tests intervals where each has several intervals and two sub-intervals overlap
(times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00", 2, 0), times.time_range("2010-01-12 10:30:00", "2010-01-12 11:30:00", 2, 0), [('2010-01-12 10:30:00', '2010-01-12 11:00:00'),('2010-01-12 11:00:00', '2010-01-12 11:30:00')]),
# Tests two intervals where one finishes at exactly the same time another started
(times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00"), times.time_range("2010-01-12 12:00:00", "2010-01-12 12:30:00"), [])
])
def test_given_input(self, time_range_1, time_range_2, expected):
"""Tests hardcoded input given as part of the example problem"""
result = times.compute_overlap_time(time_range_1, time_range_2)
assert result == expected

def test_negative_time_interval(self):
"""Tests a time range which goes backwards"""
t1 = times.time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00")
t2 = times.time_range("2010-01-12 11:00:00", "2010-01-12 10:30:00")
with pytest.raises(ValueError):
times.compute_overlap_time(t1, t2)
31 changes: 31 additions & 0 deletions week05-testing/times.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import datetime


def time_range(start_time, end_time, number_of_intervals=1, gap_between_intervals_s=0):
start_time_s = datetime.datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
end_time_s = datetime.datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")
d = (end_time_s - start_time_s).total_seconds() / number_of_intervals + gap_between_intervals_s * (1 / number_of_intervals - 1)
sec_range = [(start_time_s + datetime.timedelta(seconds=i * d + i * gap_between_intervals_s),
start_time_s + datetime.timedelta(seconds=(i + 1) * d + i * gap_between_intervals_s))
for i in range(number_of_intervals)]
return [(ta.strftime("%Y-%m-%d %H:%M:%S"), tb.strftime("%Y-%m-%d %H:%M:%S")) for ta, tb in sec_range]


def compute_overlap_time(range1, range2):
overlap_time = []
for start1, end1 in range1:
for start2, end2 in range2:
if start1 > end1 or start2 > end2:
raise ValueError("Start time cannot be bigger than end time")
low = max(start1, start2)
high = min(end1, end2)
if low != high and low < high:
overlap_time.append((low, high))
return overlap_time

# Define two time ranges
large = time_range("2010-01-12 10:00:00", "2010-01-12 12:00:00")
short = time_range("2010-01-12 10:30:00", "2010-01-12 10:45:00", 2, 60)

if __name__ == "__main__":
print(compute_overlap_time(large, short))

0 comments on commit 3a71a34

Please sign in to comment.