-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
77 lines (57 loc) · 2.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from builtins import print
from pandas import read_csv
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from models.linearsvc import linearsvc
from models.multinomialnb import multinomialnb
from models.bernoullinb import bernoullinb
from models.logistic_regression import logistic_regression
from models.perceptron import perceptron
from models.mlpclassifier import mlpclassifier
# numpy.set_printoptions(threshold=sys.maxsize)
if __name__ == "__main__":
# print("\n> Concatenate datasets:")
# a = read_csv("files/drugsComTrain_raw.tsv", delimiter="\t")
# print("\trows data train = " + len(a).__str__())
# b = read_csv("files/drugsComTest_raw.tsv", delimiter="\t")
# print("\trows data test = " + len(b).__str__())
# out = a.append(b)
# print("\trows all data = " + len(out).__str__())
# ------------- Save pre processing
# aa = pre_processing(out)
# aa.to_csv("output/output.csv")
# exit(0)
# ------------- Load File Pre processing
print("\n> Concatenate datasets:")
out = read_csv("output/output.csv")
print("\n> Train Test Split:")
X_train_raw, X_test_raw, y_train, y_test = train_test_split(out["clean_review"], out["score_rating"], shuffle=False,
test_size=0.25)
# ------------- Vectorization
print("\n> X Train Vectorizing :")
vectorizer = CountVectorizer()
vectorizer.fit(X_train_raw.tolist())
X_train = vectorizer.transform(X_train_raw.tolist())
print("\n> X Test Vectorizing :")
X_test = vectorizer.transform(X_test_raw.tolist())
# ------------- Normalize
print("\n> X_train Normalizing:")
transformer_train = Normalizer().fit(X_train)
X_train = transformer_train.transform(X_train)
print("\n> X_test Normalizing:")
transformer_test = Normalizer().fit(X_test)
X_test = transformer_test.transform(X_test)
# ------------- Models
print("\n> LinearSVC:")
linearsvc(X_train, X_test, y_test, y_train)
print("\n> BernoulliNB:")
bernoullinb(X_train, X_test, y_test, y_train)
print("\n> MultinomialNB:")
multinomialnb(X_train, X_test, y_test, y_train)
print("\n> LogisticRegression:")
logistic_regression(X_train, X_test, y_test, y_train)
print("\n> Perceptron:")
perceptron(X_train, X_test, y_test, y_train)
print("\n> MLPClassifier:")
mlpclassifier(X_train, X_test, y_test, y_train)