# Import required libraries
import numpy as np
import csv
import cv2
import sklearn
import time
from sklearn import preprocessing, metrics, cross_validation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
# NOTE: the next import is only valid for scikit-learn version <= 0.17
# for scikit-learn >= 0.18 use:
# from sklearn.model_selection import train_test_split
from sklearn.cross_validation import train_test_split
# Constants
data_path = "data/"
csv_params = []
csv_labels = []
# Reading the content of csv file
with open(data_path + 'heart-statlog.csv') as csv_file:
csv_reader = csv.reader(csv_file)
# Uncomment this line if file contains headers
# next(csv_reader, None)
for each_line in csv_reader:
csv_params.append(each_line[:-1])
csv_labels.append(each_line[-1])
# Converting the data type from list to numpy array
input_params = np.array(csv_params)
input_labels = np.array(csv_labels)
print("Shape of input params: ", input_params.shape) #(rows, columns)
print("Shape of input labels: ", input_labels.shape) #(rows, columns)
Shape of input params: (270, 13)
Shape of input labels: (270,)
- age
- sex
- chest pain type (4 values)
- resting blood pressure
- serum cholestoral in mg/dl
- fasting blood sugar > 120 mg/dl
- resting electrocardiographic results (values 0,1,2)
- maximum heart rate achieved
- exercise induced angina
- oldpeak = ST depression induced by exercise relative to rest
- the slope of the peak exercise ST segment
- number of major vessels (0-3) colored by flourosopy
- thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
- input_label -> present/absent
print("Sample input parameters: ", input_params[0])
print("Corresponding input label: ", input_labels[0])
Sample input parameters: ['70' '1' '4' '130' '322' '0' '2' '109' '0' '2.4' '2' '3' '3']
Corresponding input label: present
# Scaling the input params by using standard scaler
X_scaler = StandardScaler().fit(input_params)
X_scaled = X_scaler.transform(input_params)
print("Scaled Output: ", X_scaled[0])
Scaled Output: [ 1.71209356 0.6894997 0.87092765 -0.07540984 1.40221232 -0.41702883
0.98166365 -1.75920811 -0.7012223 1.18101235 0.67641928 2.47268219
-0.87570581]
C:\Users\AKhil\Anaconda3\envs\carnd-term1\lib\site-packages\sklearn\utils\validation.py:475: DataConversionWarning: Data with input dtype <U3 was converted to float64 by StandardScaler.
warnings.warn(msg, DataConversionWarning)
# Splitting the data-set in train and test data
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, input_labels, test_size=0.2, random_state=rand_state)
print("Length of training data: ", len(X_train), len(y_train))
print("Length of test data: ", len(X_test), len(y_test))
Length of training data: 216 216
Length of test data: 54 54
# Training the SVM model
svc = LinearSVC()
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
0.03 Seconds to train SVC...
Test Accuracy of SVC = 0.8889
# 57,1,4,140,192,0,0,148,0,0.4,2,0,6,absent
# 67,1,4,160,286,0,2,108,1,1.5,2,3,3,present
validation_array = np.array([57,1,4,140,192,0,0,148,0,0.4,2,0,6])
transformed_input = X_scaler.transform(validation_array.reshape(1, -1))
print(svc.predict(transformed_input))
['absent']