Skip to content

Commit

Permalink
Merge pull request #3 from CAM-Gerlach/cam-improvements-solutions
Browse files Browse the repository at this point in the history
PR: Improve code correctness, style, performance and format in solutions
  • Loading branch information
juanis2112 authored Sep 11, 2020
2 parents ac0ac71 + 1e0d9af commit dabf4d4
Showing 1 changed file with 59 additions and 58 deletions.
117 changes: 59 additions & 58 deletions workshop_solutions.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,88 @@
# -*- coding: utf-8 -*-
#
# Copyright © Spyder Project Contributors
# Licensed under the terms of the MIT License
"""Workshop main flow."""

# pylint: disable=invalid-name, fixme

# In[1] Importing Libraries and Data

# %% [1] Importing Libraries and Data

# Third-party imports
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import explained_variance_score
from sklearn.model_selection import train_test_split

# Local imports
from utils import (
plot_correlations, plot_color_gradients, aggregate_by_year,
predicted_temperature)
from utils import aggregate_by_year, plot_correlations, plot_color_gradients


# In[2] Exploring Data
# %% [2] Exploring the Data

# Reading data
# Read the data
weather_data = pd.read_csv('data/weatherHistory.csv')

# Print size of data
print(len(weather_data))
# Print first 3 rows of DataFrame
print(weather_data.head(3))
# Print length of data
len(weather_data)

# TO DO: Print the last 3 rows of the DataFrame
print(weather_data.tail(3))
# Print first three rows of DataFrame
weather_data.head(3)

# TO DO: Print the last three rows of the DataFrame
weather_data.tail(3)

# In[3] Visualisation

# %% [3] Visualization

# Order rows according to date
weather_data = pd.read_csv('data/weatherHistory.csv')
weather_data['Formatted Date'] = pd.to_datetime(
weather_data['Formatted Date'])
weather_data['Formatted Date'].str[:-6])
weather_data_ordered = weather_data.sort_values(by='Formatted Date')
# Order Index according to date
weather_data_ordered = weather_data_ordered.reset_index(drop=True)

# Reset index to restore its order
weather_data_ordered.reset_index(drop=True)

# Drop categorical columns
weather_data_ordered = weather_data_ordered.drop(
weather_data_ordered.drop(
columns=['Summary', 'Precip Type', 'Loud Cover', 'Daily Summary'])
# Plot Temperature Vs Formatted Date

# Plot temperature vs. date
weather_data_ordered.plot(
x='Formatted Date', y=['Temperature (C)'], color='red', figsize=(15, 8))
x='Formatted Date', y='Temperature (C)', color='red', figsize=(15, 8))

# TO DO: Plot Temperature (C) V.S the Date using only the data from 2006
weather_data_ordered.head(8759).plot(x='Formatted Date', y=['Temperature (C)'], color='red')
# TODO: Plot temperature vs date using only the data from 2006
weather_data_ordered.loc[
weather_data_ordered["Formatted Date"].dt.year == 2006, :].plot(
x='Formatted Date', y='Temperature (C)', color='red')

# -----------------------------------------------------------------------------
# Plot Temperature and Humidity in the same plot
# Plot temperature and humidity in the same plot
weather_data_ordered.plot(
subplots=True, x='Formatted Date', y=['Temperature (C)', 'Humidity'],
figsize=(15, 8))
# TO DO: Plot different combinations of the variables, for different years

# TODO: Plot different combinations of the variables, and for different years

# -----------------------------------------------------------------------------

# In[4] Data summarization and aggregation
# %% [4] Data summarization and aggregation

# Weather data by year
weather_data_by_year = aggregate_by_year(
weather_data_ordered, 'Formatted Date')
weather_data_ordered, date_column='Formatted Date')

# TO DO: Create and use a function to get the average
# of the weather data by month
# TODO: Create and use a function to average the weather data by month


# In[5] Data Analysis and Interpretation
# %% [5] Data Analysis and Interpretation

# Plot Correlations
# Plot correlations
plot_correlations(weather_data_ordered, size=15)
# Plot Gradients colormaps
cmap_category, cmap_list = ('Plot gradients convention', ['viridis', ])
plot_color_gradients(cmap_category, cmap_list)

# Compute Correlations
# Plot gradient colormaps
plot_color_gradients(
cmap_category='Plot gradients convention', cmap_list=['viridis', ])

# Compute correlations
weather_correlations = weather_data_ordered.corr()
weather_data_ordered['Temperature (C)'].corr(
weather_data_ordered['Humidity'])
Expand All @@ -88,35 +91,33 @@
# Contrast them with the weather_correlations dataframe


# In[6] Data Modeling and Prediction
# %% [6] Data Modeling and Prediction

# Get data subsets for the model
X_train, X_test, Y_train, Y_test = train_test_split(
x_train, x_test, y_train, y_test = train_test_split(
weather_data_ordered['Humidity'], weather_data_ordered['Temperature (C)'],
test_size=0.25)

# Run regression
regresion = linear_model.LinearRegression()
regresion.fit(X_train.values.reshape(-1, 1), Y_train.values.reshape(-1, 1))
print(regresion.intercept_, regresion.coef_) # beta_0=intercept, beta_1=coef_

# Get coefficients
beta_0 = regresion.intercept_[0]
beta_1 = regresion.coef_[0, 0]

# Plot predicted model with test data.
Y_predict = predicted_temperature(X_test, beta_0, beta_1)
plt.scatter(X_test, Y_test, c='red', label='observation', s=1)
plt.scatter(X_test, Y_predict, c='blue', label='model')
regression = linear_model.LinearRegression()
regression.fit(x_train.values.reshape(-1, 1), y_train.values.reshape(-1, 1))

# Print coefficients
print(regression.intercept_, regression.coef_) # beta_0, beta_1


# %% [7] Predictive Model Testing and Evaluation

# Plot predicted model with test data
y_predict = regression.predict(x_test.values.reshape(-1, 1))
plt.scatter(x_test, y_test, c='red', label='Observation', s=1)
plt.scatter(x_test, y_predict, c='blue', label='Model')
plt.xlabel('Humidity')
plt.ylabel('Temperature (C)')
plt.legend()
plt.show()

# TO DO: Using the coefficients predict the temperature for a
# given level of humidity using the 'predicted_temperature' function
# available in 'utils'
# TODO: Using the model, predict the temperature for a given level of humidity

# Evaluate model numerically
ev = explained_variance_score(Y_test, Y_predict)
print(ev)
explained_variance_score(y_test, y_predict)

0 comments on commit dabf4d4

Please sign in to comment.