-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
86 lines (80 loc) · 2.52 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""Untitled7.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1dkd1OtzLdrjgfqr_2z83qLapdDEiaz7E
"""
import numpy as np
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
import sklearn.metrics as metrics
import pandas as pd
def makedataset():
labels=[]
X = []
X_dip=[]
# X_tri=[]
X_atom=[]
train = pd.read_csv('final_amino_acid_result.csv')
train_dip = pd.read_csv("final_dipeptide_result.csv")
train_label = pd.read_csv('train.csv')
# tripip_train = pd.read_csv('Tripeptide.csv')
atom_train = pd.read_csv('atom.csv')
for item in train.itertuples():
X.append(list(item[2:]))
for item in train_dip.itertuples():
X_dip.append(list(item[2:]))
X_dip = np.reshape(np.array(X_dip),(2550,400))
# X_tri.append([0]*8000)
# for item in tripip_train.itertuples():
# X_tri.append(list(item[1:]))
# X_tri = np.reshape(np.array(X_tri),(2550,8000))
for item in atom_train.itertuples():
X_atom.append(item[2:])
X_atom = np.reshape(np.array(X_atom),(2550,5))
for item in train_label.itertuples():
if(item[2]=='-1'):
labels.append(-1)
else:
labels.append(1)
labels = np.array(labels)
labels = np.reshape(labels,(2550))
X = np.reshape(np.array(X),(2550,20))
X = np.concatenate((X,X_dip,X_atom),axis=1)
return X,labels
# makedataset()
sequence,labels = makedataset()
# sequence, labels = shuffle(sequence, labels)
# X_t,actual = sequence[1785:],labels[1785:]
# sequence,labels = sequence[:1785],labels[:1785]
test = pd.read_csv('test_final.csv')
test_dip = pd.read_csv('dipitest.csv')
test_atom = pd.read_csv('atom_test.csv')
test_data = pd.read_csv('test.csv')
X_t=[]
X_dt =[]
X_at =[]
for item in test.itertuples():
X_t.append(list(item[2:]))
for item in test_dip.itertuples():
X_dt.append(list(item[2:]))
for item in test_atom.itertuples():
X_at.append(list(item[2:]))
X_t = np.reshape(np.array(X_t),(len(X_t),20))
X_dt = np.reshape(np.array(X_dt),(len(X_dt),400))
X_at = np.reshape(np.array(X_at),(len(X_at),5))
X_t = np.concatenate((X_t,X_dt,X_at),axis=1)
print(X_t.shape)
avg=0
for i in range(1):
clf = RandomForestClassifier(n_estimators=500)
# clf = svm.SVC()
# clf = MLPClassifier()
clf.fit(sequence,labels)
y_pred = clf.predict(X_t)
print(list(y_pred))
# print("Accuracy:",metrics.accuracy_score(actual, y_pred))
# avg+=metrics.accuracy_score(actual, y_pred)
# print(avg/10)