Skip to content

Commit

Permalink
Merge pull request #246 from Yc325/Machine_Learning_Minor_Update
Browse files Browse the repository at this point in the history
Machine learning very small update
  • Loading branch information
didymo authored Oct 24, 2022
2 parents c808b78 + b19754b commit 1f1673f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 11 deletions.
6 changes: 6 additions & 0 deletions src/Model/batchprocessing/BatchProcessMachineLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def __init__(self, progress_callback, interrupt_flag,
self.preprocessing = None
self.run_ml = None
self.X_train = None
self.X_train_for_confusion_matrix = None
self.X_test = None
self.y_train = None
self.y_train_for_confusion_matrix = None
self.y_test = None
self.params = None
self.scaling = None
Expand Down Expand Up @@ -115,6 +117,8 @@ def preprocessing_for_ml(self):
self.preprocessing.scaling
self.machine_learning_options['features'] =\
self.preprocessing.column_names
self.X_train_for_confusion_matrix = self.preprocessing.x_train_for_confusion_matrix
self.y_train_for_confusion_matrix = self.preprocessing.y_train_for_confusion_matrix
self.run_model_accept =\
self.preprocessing.permission

Expand All @@ -128,6 +132,8 @@ def run_model(self):
self.run_ml = MlModeling(
self.X_train,
self.X_test,
self.X_train_for_confusion_matrix,
self.y_train_for_confusion_matrix,
self.y_train,
self.y_test,
self.preprocessing.target,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class MlModeling():
def __init__(self,
train_feature,
test_feature,
train_feature_dataset_for_confusion_matrix,
train_label_dataset_for_confusion_matrix,
train_label,
test_label,
target,
Expand All @@ -38,19 +40,23 @@ def __init__(self,
permission=None):
self.train_feature = train_feature
self.test_feature = test_feature
self.train_feature_dataset_for_confusion_matrix = train_feature_dataset_for_confusion_matrix
self.train_label_dataset_for_confusion_matrix = train_label_dataset_for_confusion_matrix
self.train_label = train_label
self.test_label = test_label
self.target = target
self.type_model = type_model
self.tuning = tuning
self.permission = permission
self.confusion_matrix = None
self.train_dataset_confusion_matrix = None
self.model = None
self.score = None
self.accuracy = {
"accuracy": '',
"model": ''
}
self.model_names = ['RandomForestClassifier', 'MLPClassifier']

"""
Class initializer function.
Expand Down Expand Up @@ -127,6 +133,7 @@ def calculate_balance(self):
return balance

def custom_confusion_matrix(self,
test_label,
predictions):
"""
The function creates a confusion matrix
Expand All @@ -136,10 +143,10 @@ def custom_confusion_matrix(self,
see here: https://towardsdatascience.com
/understanding-confusion-matrix-a9ad42dcfd62
"""
unique_label = np.unique([self.test_label,
unique_label = np.unique([test_label,
predictions])
cmtx = pd.DataFrame(
confusion_matrix(self.test_label,
confusion_matrix(test_label,
predictions,
labels=unique_label),
index=['true:{:}'.format(x) for x in unique_label],
Expand Down Expand Up @@ -256,12 +263,14 @@ def classification_ml_tuned(self):

if mlp_score > random_forest_score:
self.confusion_matrix = self.custom_confusion_matrix(
self.test_label,
mlp_pred)
self.score = mlp_score
self.accuracy['accuracy'] = f'{self.score}'
return mlp_model

self.confusion_matrix = self.custom_confusion_matrix(
self.test_label,
random_forest_pred)
self.score = random_forest_score
self.accuracy['accuracy'] = f'{self.score}'
Expand Down Expand Up @@ -308,12 +317,15 @@ def classification_ml(self):
mlp_score = perfomance(mlp_pred)

if mlp_score > random_forest_score:
self.confusion_matrix = self.custom_confusion_matrix(mlp_pred)
self.confusion_matrix = self.custom_confusion_matrix(
self.test_label,
mlp_pred)
self.score = mlp_score
self.accuracy['accuracy'] = f'{self.score}'
return mlp_cla

self.confusion_matrix = self.custom_confusion_matrix(
self.test_label,
random_forest_pred)
self.score = random_forest_score
self.accuracy['accuracy'] = f'{self.score}'
Expand Down Expand Up @@ -523,15 +535,28 @@ def save_confusion_matrix(self, path):
:param path: path were file will be saved.
"""
path += f'{self.target}_ML_RiskTable.txt'
headers = ['RISK TABLE', 'ML PERFOMANCE']
with open(path, 'w') as f:
print(f'{headers[0]}\n',
file=f)
df_as_string = self.confusion_matrix.to_string(header=True,
headers = ['TRAIN DATASET RISK TABLE', 'TEST DATASET RISK TABLE', 'ML PERFOMANCE']
if type(self.model).__name__ in self.model_names:
with open(path, 'w') as f:
print(f'{headers[0]}\n',
file=f)
df_as_string_train = self.train_dataset_confusion_matrix.to_string(
header=True,
index=True)
f.write(df_as_string_train)

print(f'\n\n{headers[1]}\n',
file=f)
df_as_string_test = self.confusion_matrix.to_string(header=True,
index=True)
f.write(df_as_string)
print(f'\n\n{headers[1]}\n', file=f)
print(f'{self.score[0]}: {self.score[1]}', file=f)
f.write(df_as_string_test)
print(f'\n\n{headers[2]}\n', file=f)
print(f'{self.score[0]}: {self.score[1]}', file=f)
else:
with open(path, 'w') as f:
print(f'\n\n{headers[2]}\n', file=f)
print(f'Accuracy: {self.score}', file=f)


def run_model(self):
"""
Expand All @@ -552,5 +577,12 @@ def run_model(self):
self.model = self.regression_ml_tuned()
else:
self.model = self.regression_ml()
if type(self.model).__name__ in self.model_names:
train_predictions_for_confusion_matrix = self.model.predict(
self.train_feature_dataset_for_confusion_matrix)

self.train_dataset_confusion_matrix = self.custom_confusion_matrix(
self.train_label_dataset_for_confusion_matrix,
train_predictions_for_confusion_matrix)

self.accuracy['model'] = type(self.model).__name__
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def __init__(self,
self.missing_id = []
self.permission = None
self.permission_ids = None
self.x_train_for_confusion_matrix = None
self.y_train_for_confusion_matrix = None

"""
Class initializer function.
Expand Down Expand Up @@ -472,6 +474,9 @@ def prepare_for_ml(self):
("cat", OneHotEncoder(handle_unknown='ignore'), final_cat)
])

self.x_train_for_confusion_matrix = x_train.copy()
self.y_train_for_confusion_matrix = self.x_train_for_confusion_matrix[self.target]

# Check if label is imbalanced, if so,
# then it does Up sampling on train
if result[0]:
Expand All @@ -482,6 +487,7 @@ def prepare_for_ml(self):
y_test = x_test[self.target]
x_train = full_pipeline.fit_transform(x_train)
x_test = full_pipeline.transform(x_test)
self.x_train_for_confusion_matrix = full_pipeline.transform(self.x_train_for_confusion_matrix)
self.scaling = full_pipeline

return x_train, x_test, y_train, y_test
Expand Down

0 comments on commit 1f1673f

Please sign in to comment.