-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchatbot-training.py
More file actions
90 lines (70 loc) · 2.52 KB
/
chatbot-training.py
File metadata and controls
90 lines (70 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# import nltk
import json
import pickle
import numpy as np
import random
#keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
wnl = WordNetLemmatizer()
# mystr = "There are my friends. They eat at hotels and restaurants."
# mystr2 = mystr.replace("."," ")
# mylist = [ wnl.lemmatize(val.lower()) for val in mystr2.split() ]
# print(mylist)
#initialization
words = []
docs = []
classes = []
ignore_words = ['?','!','.']
path2json = open("./intents.json").read()
intents = json.loads(path2json)
for intent in intents["intents"]:
for pattern in intent["patterns"]:
word = word_tokenize(pattern)
words.extend(word)
docs.append((word, intent["tag"]))
if intent["tag"] not in classes:
classes.append(intent["tag"])
# print(words)
words = [wnl.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
print("Lemmatized tokens",len(words),"\n")
classes = sorted(list(set(classes)))
print("Tags classes", len(classes),"\n")
print("Combination of tags and patterns- ", len(docs),"\n")
# store above objects in pickle file .pkl
pickle.dump(words,open('words_stored.pkl','wb'))
pickle.dump(classes,open('classes_stored.pkl','wb'))
# generate bag of words i.e. 1 for word present, 0 for absent
# generate output list
training = []
for doc in docs:
curr_words = [ wnl.lemmatize(wo.lower()) for wo in doc[0] if wo not in ignore_words ]
bag = [ int(i in curr_words) for i in words ]
output_list = [ int(tag==doc[1]) for tag in classes ]
training.append([bag, output_list])
random.shuffle(training)
training = np.array(training)
x_train = list(training[:,0])
y_train = list(training[:,1])
# print("\n=====X-TRAIN=====\n")
# print(x_train)
# print("\n=====Y-TRAIN=====\n")
# print(y_train)
# print("\n==========\n")
print("Training data created.\n")
mymodel = Sequential()
mymodel.add(Dense(128, activation='relu', input_shape=(len(x_train[0]),)))
mymodel.add(Dropout(0.5))
mymodel.add(Dense(64, activation='relu'))
mymodel.add(Dropout(0.5))
mymodel.add(Dense(len(y_train[0]), activation='softmax'))
sgd = SGD(learning_rate=0.02,momentum=0.9,decay=1e-6,nesterov=True)
mymodel.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])
# fitting and saving model
hist = mymodel.fit(x_train, y_train, batch_size=5, epochs=10, verbose=1)
mymodel.save('chatbot_mymodel.h5',hist)
print("Model created and saved!\n")