Add files via upload

xs018 · Aug 14, 2023 · 7b1aca3 · 7b1aca3
1 parent 8c2220d
commit 7b1aca3
Show file tree

Hide file tree

Showing 12 changed files with 232 additions and 0 deletions.
diff --git a/MLP_Regression&Classification_code/HW1-Xiaotong Sun-oct1.pdf b/MLP_Regression&Classification_code/HW1-Xiaotong Sun-oct1.pdf
diff --git a/MLP_Regression&Classification_code/dataset.py b/MLP_Regression&Classification_code/dataset.py
@@ -0,0 +1,2 @@
+import splitfolders
+splitfolders.ratio("/ocean/projects/mch210006p/shared/HW1/Classification", output="/ocean/projects/mch210006p/xs018/HW1/Classification", seed=1337, ratio=(.8, .1, .1), group_prefix=None) # default values
diff --git a/MLP_Regression&Classification_code/hw1_1_1.py b/MLP_Regression&Classification_code/hw1_1_1.py
@@ -0,0 +1,68 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import zscore
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.callbacks import EarlyStopping
+from sklearn import metrics
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import time 
+
+tf.random.set_seed(1234)
+def create_mlp(dim, regress=False):
+    model = Sequential()
+    model.add(Dense(256, input_dim=dim, activation="relu"))
+    model.add(Dense(512, activation="relu"))
+    model.add(Dense(128, activation="relu"))
+    model.add(Dense(64, activation="relu"))
+    if regress:
+        model.add(Dense(1, activation="linear"))
+    return model
+
+def main():
+    file_path = '/ocean/projects/mch210006p/shared/HW1/Regression/boiling-32_temp_heat_flux.txt'
+    data = pd.read_csv(file_path, delimiter = "\t", names=['Temperature(C)', 'Heat flux(W/cm2)'], header=0)
+    x = zscore(data['Temperature(C)'])
+    y = data['Heat flux(W/cm2)'].values
+    start=time.time()
+    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
+    model = create_mlp(1, True)
+    print(model.summary())
+    # rmse = tf.keras.metrics.RootMeanSquaredError()
+    model.compile(loss='mean_squared_error', optimizer='adam')
+    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, 
+                            patience=5, verbose=1, mode='auto', restore_best_weights=True)
+    history = model.fit(x_train,y_train, validation_data=(x_test,y_test), callbacks=[monitor], verbose=2, epochs=100)     
+    end=time.time()
+    print(f"Eclapse time: {end-start}s")
+
+    # print(history.history)
+    plt.figure()
+    plt.plot(history.history['loss'], label='training_loss')
+    plt.plot(history.history['val_loss'], label='val_loss')
+    plt.xlabel('epoches')
+    plt.ylabel('loss')
+    plt.savefig('res/hw1_1_1mse.png')
+    # plt.figure()
+    # plt.plot(history.history['rmse'], label='training accuracy')
+    # plt.plot(history.history['val_rmse'], label='val_accuracy')
+    # plt.savefig('res/hw1_1_rmse.png')
+
+    # Predict
+    pred = model.predict(x_test)
+
+    # Measure MSE error. 
+    score_mse = metrics.mean_squared_error(pred, y_test)
+
+    # Measure RMSE error.  RMSE is common for regression.
+    score_rmse = np.sqrt(score_mse)
+
+    print(f"Mean Square Error: {score_mse}")
+    print(f"Rooted Mean Square Error: {score_rmse}")
+
+
+
+if __name__ == '__main__':
+    main()
diff --git a/MLP_Regression&Classification_code/hw1_1_1.sh b/MLP_Regression&Classification_code/hw1_1_1.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+######## --send email ########
+#SBATCH --mail-type=begin
+#SBATCH --mail-type=end
+#SBATCH --mail-user=xs018@uark.edu
+
+######## Job Name: Train_Job ########
+#SBATCH -J HW1_Job
+#SBATCH -o log/HW1_Job.o%j
+#SBATCH -e log/HW1_Job.e%j
+
+#SBATCH -p GPU-shared
+#SBATCH -N 1
+#SBATCH --export=ALL
+#SBATCH --gres=gpu:1
+#SBATCH -t 00:10:00
+
+module load AI/anaconda3-tf2.2020.11
+conda activate /jet/home/xs018/envs
+cd /jet/home/xs018/code
+
+python hw1_1_1.py
diff --git a/MLP_Regression&Classification_code/hw1_1_1mse.png b/MLP_Regression&Classification_code/hw1_1_1mse.png
diff --git a/MLP_Regression&Classification_code/hw1_1_2.py b/MLP_Regression&Classification_code/hw1_1_2.py
@@ -0,0 +1,83 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import zscore
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.callbacks import EarlyStopping
+from sklearn import metrics
+import matplotlib.pyplot as plt
+from sklearn.model_selection import KFold
+from keras import losses
+import statistics
+import time
+import tensorflow as tf
+
+tf.random.set_seed(1234)
+def create_mlp(dim, regress=False):
+    model = Sequential()
+    model.add(Dense(256, input_dim=dim, activation="relu"))
+    model.add(Dense(512, activation="relu"))
+    model.add(Dense(128, activation="relu"))
+    model.add(Dense(64, activation="relu"))
+    if regress:
+        model.add(Dense(1, activation="linear"))
+    return model
+
+def main():
+    file_path = '/ocean/projects/mch210006p/shared/HW1/Regression/boiling-32_temp_heat_flux.txt'
+    data = pd.read_csv(file_path, delimiter = "\t", names=['Temperature(C)', 'Heat flux(W/cm2)'], header=0)
+    x = zscore(data['Temperature(C)'])
+    y = data['Heat flux(W/cm2)'].values
+    # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
+    start=time.time()
+    model = create_mlp(1, True)
+    print(model.summary())
+    model.compile(loss='mean_squared_error', optimizer='adam')
+    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, 
+                            patience=5, verbose=1, mode='auto', restore_best_weights=True)
+
+    best_loss = np.Infinity
+    best_losses_train = None
+    best_losses_validation = None
+    kf = KFold(100, shuffle=True, random_state=42)
+    fold=0
+    for train, test in kf.split(x):
+        x_train = x[train]
+        y_train = y[train]
+        x_test = x[test]
+        y_test = y[test]
+        fold+=1
+        print(f"Fold #{fold}")
+        # print(history.history)
+        history = model.fit(x_train,y_train, validation_data=(x_test,y_test), callbacks=[monitor], verbose=2, epochs=100) 
+        if statistics.mean(history.history['val_loss']) < best_loss: 
+            best_loss =  statistics.mean(history.history['val_loss'])
+            best_losses_train = history.history['loss']
+            best_losses_validation = history.history['val_loss']
+    end=time.time()
+    print(f"Eclapse time: {end-start}s")
+    plt.figure()
+    plt.plot(best_losses_train, label='training_loss')
+    plt.plot(best_losses_validation, label='val_loss')
+    plt.savefig('res/hw1_1_2mse.png')
+    # plt.figure()
+    # plt.plot(history.history['rmse'], label='training accuracy')
+    # plt.plot(history.history['val_rmse'], label='val_accuracy')
+    # plt.savefig('res/hw1_1_rmse.png')
+    # Predict
+    pred = model.predict(x_test)
+
+    # Measure MSE error. 
+    score_mse = metrics.mean_squared_error(pred, y_test)
+
+    # Measure RMSE error.  RMSE is common for regression.
+    score_rmse = np.sqrt(score_mse)
+
+    print(f"Mean Square Error: {score_mse}")
+    print(f"Rooted Mean Square Error: {score_rmse}")
+
+
+
+if __name__ == '__main__':
+    main()
diff --git a/MLP_Regression&Classification_code/hw1_2.sh b/MLP_Regression&Classification_code/hw1_2.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+######## --send email ########
+#SBATCH --mail-type=begin
+#SBATCH --mail-type=end
+#SBATCH --mail-user=xs018@uark.edu
+
+######## Job Name: Train_Job ########
+#SBATCH -J HW1_Job
+#SBATCH -o log/HW1_Job.o%j
+#SBATCH -e log/HW1_Job.e%j
+
+#SBATCH -p GPU-shared
+#SBATCH -N 1
+#SBATCH --export=ALL
+#SBATCH --gres=gpu:1
+#SBATCH -t 02:00:00
+
+module load AI/anaconda3-tf2.2020.11
+conda activate /jet/home/xs018/envs
+cd /jet/home/xs018/code
+
+python hw1_2.py
diff --git a/MLP_Regression&Classification_code/hw1_2ROC.png b/MLP_Regression&Classification_code/hw1_2ROC.png
diff --git a/MLP_Regression&Classification_code/hw1_2_train.png b/MLP_Regression&Classification_code/hw1_2_train.png
diff --git a/MLP_Regression&Classification_code/hw1_2confusion_matrix.png b/MLP_Regression&Classification_code/hw1_2confusion_matrix.png
diff --git a/MLP_Regression&Classification_code/hw1_2confusion_prediction.png b/MLP_Regression&Classification_code/hw1_2confusion_prediction.png
diff --git a/MLP_Regression&Classification_code/visualization.py b/MLP_Regression&Classification_code/visualization.py
@@ -0,0 +1,35 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from sklearn.metrics import confusion_matrix, auc, roc_curve, precision_recall_curve
+from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay, PrecisionRecallDisplay
+
+res = pd.read_csv('result.csv').values
+y_true = res[:, 1]
+y_pred = res[:, 0]
+y_prob = res[:, 2]
+
+cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
+
+tn, fp, fn, tp = cm.ravel() # where 1 is positive, 0 is negative
+print(f"True Negative: {tn}, False Positive: {fp}, False Negative: {fn}, True Postive: {tp}")
+
+disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['post-CHF(0)', 'pre-CHF(1)'])
+disp.plot()
+plt.savefig('res/hw1_2confusion_matrix.png')
+
+fpr, tpr, thresholds = roc_curve(y_true, y_prob)
+roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
+plt.savefig('res/hw1_2ROC.png')
+
+prec, recall, _  = precision_recall_curve(y_true, y_prob)
+pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot()
+plt.savefig('res/hw1_2confusion_prediction.png')
+
+print(f"Area Under Curve: {auc(fpr, tpr)}")
+print(f"Accuracy: {(tp+tn) / (tn + fp + fn+ tp)}")
+print(f"Precision: {(tp) / ( fp +  tp)}")
+print(f"Recall: {(tp) / ( fn +  tp)}")
+print(f"F1 Score: {tp / (tp + (fn + fp)/2)}")
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		import splitfolders
		splitfolders.ratio("/ocean/projects/mch210006p/shared/HW1/Classification", output="/ocean/projects/mch210006p/xs018/HW1/Classification", seed=1337, ratio=(.8, .1, .1), group_prefix=None) # default values