-
Notifications
You must be signed in to change notification settings - Fork 0
/
ai.py
137 lines (111 loc) · 4.65 KB
/
ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import json
import string
import random
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("omw-1.4")
words = [] # For Bow model/ vocabulary for patterns
classes = [] # For Bow model/ vocabulary for tags
data_X = [] # For storing each pattern
data_y = [] # For storing tag corresponding to each pattern in data_X
# initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()
def initialize():
global words
global classes
global data_X
global data_y
# Loading the Dataset: intents.json
data_file = open('./intents.json', encoding="utf8").read()
data = json.loads(data_file)
# Iterating over all the intents
for intent in data["intents"]:
for pattern in intent["patterns"]:
tokens = nltk.word_tokenize(pattern) # tokenize each pattern
words.extend(tokens) # and append tokens to words
data_X.append(pattern) # appending pattern to data_X
data_y.append(intent["tag"]), # appending the associated tag to each pattern
# adding the tag to the classes if it's not there already
if intent["tag"] not in classes:
classes.append(intent["tag"])
# lemmatize all the words in the vocab and convert them to lowercase
# if the words don't appear in punctuation (eats => eat, have => be, etc)
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))
# Text to Numbers
training = []
out_empty = [0] * len(classes)
# creating the bag of words model (BOW)
for idx, doc in enumerate(data_X):
bow = []
text = lemmatizer.lemmatize(doc.lower())
for word in words:
bow.append(1) if word in text else bow.append(0)
# mark the index of class that the current pattern is associated to
output_row = list(out_empty)
output_row[classes.index(data_y[idx])] = 1
# add the one hot encoded BoW and associated classes to training
training.append([bow, output_row])
# shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
# split the features and target labels
train_X = np.array(list(training[:, 0]))
train_Y = np.array(list(training[:, 1]))
# The Neural Network Model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_X[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_Y[0]), activation="softmax"))
adam = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',
optimizer=adam,
metrics=["accuracy"])
print(model.summary())
# Training the model
model.fit(x=train_X, y=train_Y, epochs=15000, verbose=1)
return model, words, classes, data
def pred_class(model, text, vocab, labels):
# Preprocessing the Input
def bag_of_words(text, vocab):
def clean_text(text):
tokens = nltk.word_tokenize(text)
tokens = [lemmatizer.lemmatize(word) for word in tokens]
return tokens
tokens = clean_text(text)
bow = [0] * len(vocab)
for w in tokens:
for idx, word in enumerate(vocab):
if word == w:
bow[idx] = 1
return np.array(bow)
bow = bag_of_words(text, vocab)
result = model.predict(np.array([bow]))[0] # Extracting probabilities
thresh = 0.5
y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
y_pred.sort(key=lambda x: x[1], reverse=True) # Sorting by values of probability in decreasing order
return_list = []
for r in y_pred:
return_list.append(labels[r[0]]) # Contains labels(tags) for highest probability
return return_list
def get_response(intents_list, intents_json):
if len(intents_list) == 0:
result = "Yeah... I didn't really understand. I'm maybe a god but I have AI capabillities. Try to form the question in another way."
else:
tag = intents_list[0]
list_of_intents = intents_json["intents"]
for i in list_of_intents:
if i["tag"] == tag:
result = random.choice(i["responses"])
break
return result