Adds ML Test codes

dhatwalia · Sep 7, 2021 · 36dfb4d · 36dfb4d
1 parent 39b54dc
commit 36dfb4d
Show file tree

Hide file tree

Showing 5 changed files with 2,535 additions and 0 deletions.
diff --git a/ml-tests/README.md b/ml-tests/README.md
@@ -0,0 +1,30 @@
+# Insurance Advisor
+
+Lakehead University COMP-9800 Project Winter - Spring/Summer 2021
+
+## Steps to install
+
+1. Install Anaconda
+
+2. Install the xgboost
+
+```
+$ pip install xgboost
+```
+
+## Steps to run
+1. To run the notebook on Anaconda, Click on “Cells” and click on “Run all”
+
+2. To run the notebook on Google Colab, Click on "Runtime" and click on “Run all”
+
+3. To run all the models from .py file
+
+```
+$ python insurance.py
+```
+
+4. To only execute Voting Regressor with 2 GBRs from .py file
+
+```
+$ python gbr.py
+```
diff --git a/ml-tests/gbr.py b/ml-tests/gbr.py
@@ -0,0 +1,64 @@
+from pandas import *
+from math import ceil
+from sklearn.ensemble import GradientBoostingRegressor, VotingRegressor
+from sklearn.metrics import mean_absolute_error
+from sklearn.model_selection import train_test_split
+import numpy as np
+
+# Load the data
+print('Reading the data...')
+data = read_csv("insurance.csv")
+print('Read completed.\n')
+
+# One-hot encoding
+print('Preprocessing data...')
+data = get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)
+
+# Format and Split the data
+x = data[['age', 'bmi', 'children', 'sex_male', 'smoker_yes', 'region_northwest', 'region_southeast', 'region_southwest']]
+y = data['charges']
+
+train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.4)
+print('Preprocessing completed.\n')
+
+# Train the model and make predictions
+r1 = GradientBoostingRegressor(loss='huber', learning_rate=0.13, max_features='auto', alpha=0.7, random_state=1)
+r2 = GradientBoostingRegressor(loss='huber', learning_rate=0.13, max_features='auto', alpha=0.7, random_state=1)
+model = VotingRegressor([('gbr1', r1), ('gbr2', r2)])
+model.fit(train_x, train_y)
+
+print('Testing the model...')
+predicted = model.predict(test_x)
+mae = mean_absolute_error(test_y, predicted)
+print('Mean Absolute Error : ',mae)
+print('Testing completed.\n')
+
+# Predict cost for a sample customer
+print('Running for one sample...')
+sample = DataFrame({
+            'age': 26,
+            'bmi': 25.44,
+            'children': 1,
+            'sex_male': 1, 
+            'smoker_yes' : 0,
+            'region_northeast': 0,
+            'region_southeast': 0,
+            'region_southwest': 1,
+          }, [1])
+print('Sample data : ',sample)
+cost = model.predict(sample)[0]
+print('Predicted cost : ', cost)
+print('Sample run completed.\n')
+
+print('Calculating premium...')
+# Calculate premium
+def compute_monthly_premium(cost):
+    multiplier = 1.1
+    return ceil(cost*multiplier)/12
+
+print('Monthly Premium : ',compute_monthly_premium(cost))
+print('Premium calculated.\n')
+
+print('Program completed.')
+
+print('Mean Absolute Error : ',mae)