-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmodel.py
More file actions
251 lines (229 loc) · 12.8 KB
/
model.py
File metadata and controls
251 lines (229 loc) · 12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import logging
from tqdm import tqdm
import tensorflow as tf
from parts import fuse, word_fusion, timedrop
logging.getLogger().setLevel("DEBUG")
birnn = tf.nn.bidirectional_dynamic_rnn
def build(*, batchsize, max_p_len, glove_dim,
cove_dim, max_q_len, sl_att_dim,
nerpos_dim, tf_dim, reading_rep_dim,
final_ques_under_dim, sh_att_dim,
su_att_dim, fully_fused_para_dim,
selfboost_att_dim, selfboost_rep_dim,
dropout_proba, is_train, **extras):
main_scope = 'Training' if is_train else 'Testing'
with tf.variable_scope(main_scope):
drop_p = 1.0 if not is_train else dropout_proba
# ---------------------reading
logging.info("Defining inputs")
# First we define shapes for the inputs we need
p_g_sh = (batchsize, max_p_len, glove_dim) # para, glove
q_g_sh = (batchsize, max_q_len, glove_dim) # ques, glove
p_c_sh = (batchsize, max_p_len, cove_dim) # para, cove
q_c_sh = (batchsize, max_q_len, cove_dim) # ques, cove
p_ner_sh = (batchsize, max_p_len, nerpos_dim) # para, ner + pos
p_tf_sh = (batchsize, max_p_len, tf_dim) # para, normalized term freq
p_em_sh = (batchsize, max_p_len, 1) # para, exact word match in q
p_mask_sh = (batchsize, ) # paragraph lengths
q_mask_sh = (batchsize, ) # question lengths
ans_st_exp = (batchsize, max_p_len) # answer start pointer
ans_end_exp = (batchsize, max_p_len) # answer end pointer
# we generate the placeholders based on the shapes defined
inp_para_glove = tf.placeholder(shape=p_g_sh, dtype=tf.float32)
inp_ques_glove = tf.placeholder(shape=q_g_sh, dtype=tf.float32)
inp_para_cove = tf.placeholder(shape=p_c_sh, dtype=tf.float32)
inp_ques_cove = tf.placeholder(shape=q_c_sh, dtype=tf.float32)
inp_para_mask = tf.placeholder(shape=p_mask_sh, dtype=tf.int32)
inp_ques_mask = tf.placeholder(shape=q_mask_sh, dtype=tf.int32)
para_nerpos = tf.placeholder(shape=p_ner_sh, dtype=tf.float32)
para_tf = tf.placeholder(shape=p_tf_sh, dtype=tf.float32)
para_em = tf.placeholder(shape=p_em_sh, dtype=tf.float32)
exp_ans_start = tf.placeholder(shape=ans_st_exp, dtype=tf.float32)
exp_ans_end = tf.placeholder(shape=ans_end_exp, dtype=tf.float32)
# -------------------embeddings dropout
para_glove = timedrop(inp_para_glove, drop_p, 'paraGlove')
para_cove = timedrop(inp_para_cove, drop_p, 'paraCove')
ques_glove = timedrop(inp_ques_glove, drop_p, 'quesGlove')
ques_cove = timedrop(inp_ques_cove, drop_p, 'quesCove')
# ------------------- mask generation
p_mask = tf.expand_dims(tf.one_hot(inp_para_mask, max_p_len), axis=2)
q_mask = tf.expand_dims(tf.one_hot(inp_ques_mask, max_q_len), axis=2)
# TODO: answer placeholder for training
logging.info("Word level infusion")
# fused_a = fuse(para_glove, ques_glove, attention_dim, 'test')
para_q_fused_glove = word_fusion(para_glove, ques_glove,
p_mask, q_mask)
para_w_rep = tf.concat([para_glove, para_cove,
para_nerpos, para_tf],
axis=2)
ques_w_rep = tf.concat([ques_glove, ques_cove],
axis=2)
para_enhanced_rep = tf.concat([para_w_rep, para_em,
para_q_fused_glove],
axis=2)
# ---------------------reading
logging.info("Building Reading section")
with tf.variable_scope("Reading"):
f_read_q_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
b_read_q_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
inp = timedrop(ques_w_rep, drop_p, 'question_low_inp')
ques_low_h, _ = birnn(cell_fw=f_read_q_low, cell_bw=b_read_q_low,
inputs=inp, dtype=tf.float32,
scope='ques_low_under',
sequence_length=inp_ques_mask)
ques_low_h = tf.concat(ques_low_h, axis=2)
f_read_q_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
b_read_q_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
inp = timedrop(ques_low_h, drop_p, 'question_high_inp')
ques_high_h, _ = birnn(cell_fw=f_read_q_high,
cell_bw=b_read_q_high,
inputs=inp,
dtype=tf.float32,
scope='ques_high_under',
sequence_length=inp_ques_mask)
ques_high_h = tf.concat(ques_high_h, axis=2)
f_read_p_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
b_read_p_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
inp = timedrop(para_enhanced_rep, drop_p, 'para_low_inp')
para_low_h, _ = birnn(cell_fw=f_read_p_low,
cell_bw=b_read_p_low,
inputs=inp,
dtype=tf.float32,
scope='para_low_under',
sequence_length=inp_para_mask)
para_low_h = tf.concat(para_low_h, axis=2)
f_read_p_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
b_read_p_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
inp = timedrop(para_low_h, drop_p, 'para_high_inp')
para_high_h, _ = birnn(cell_fw=f_read_p_high,
cell_bw=b_read_p_high,
inputs=inp,
dtype=tf.float32,
scope='para_high_under',
sequence_length=inp_ques_mask)
para_high_h = tf.concat(para_high_h, axis=2)
logging.info("Final Question Understanding")
with tf.variable_scope("final_q_und"):
f_uq = tf.contrib.rnn.LSTMCell(final_ques_under_dim//2)
b_uq = tf.contrib.rnn.LSTMCell(final_ques_under_dim//2)
inp = tf.concat([ques_low_h, ques_high_h], axis=2)
inp = timedrop(inp, drop_p, 'final_q_und_inp')
final_q_und, _ = birnn(cell_fw=f_uq,
cell_bw=b_uq,
inputs=inp,
dtype=tf.float32,
scope='final_q_und',
sequence_length=inp_ques_mask)
final_q_und = tf.concat(final_q_und, axis=2)
logging.info("Fusion High level")
with tf.variable_scope("high_level_fusion"):
para_HoW = tf.concat([para_glove, para_cove,
para_low_h, para_high_h],
axis=2)
ques_HoW = tf.concat([ques_glove, ques_cove,
ques_low_h, ques_high_h],
axis=2)
para_fused_l = fuse(para_HoW, ques_HoW,
p_mask, q_mask,
sl_att_dim,
B=ques_low_h,
scope='low_level_fusion')
para_fused_h = fuse(para_HoW, ques_HoW,
p_mask, q_mask,
sh_att_dim,
B=ques_high_h,
scope='high_level_fusion')
para_fused_u = fuse(para_HoW, ques_HoW,
p_mask, q_mask,
su_att_dim,
B=final_q_und,
scope='understanding_fusion')
inp = tf.concat([para_low_h, para_high_h,
para_fused_l, para_fused_h,
para_fused_u], axis=2)
inp = timedrop(inp, drop_p, 'full_fused_para_inp')
f_vc = tf.contrib.rnn.LSTMCell(fully_fused_para_dim//2)
b_vc = tf.contrib.rnn.LSTMCell(fully_fused_para_dim//2)
ff_para, _ = birnn(cell_fw=f_vc, cell_bw=b_vc, inputs=inp,
dtype=tf.float32, scope='full_fused_para',
sequence_length=inp_para_mask)
ff_para = tf.concat(ff_para, axis=2)
logging.info("Self boosting fusion")
with tf.variable_scope("self_boosting_fusion"):
para_HoW = tf.concat([para_glove, para_cove,
para_low_h, para_high_h,
para_fused_l, para_fused_h,
para_fused_u, ff_para],
axis=2)
ff_fused_para = fuse(para_HoW, para_HoW,
p_mask, p_mask,
selfboost_att_dim,
B=ff_para,
scope='self_boosted_fusion')
f_sb = tf.contrib.rnn.LSTMCell(selfboost_rep_dim//2)
b_sb = tf.contrib.rnn.LSTMCell(selfboost_rep_dim//2)
inp = tf.concat([ff_para, ff_fused_para], axis=2)
inp = timedrop(inp, drop_p, 'self_boosting_inp')
final_para_rep, _ = birnn(cell_fw=f_sb, cell_bw=b_sb, inputs=inp,
dtype=tf.float32, scope='self_boosted')
final_para_rep = tf.concat(final_para_rep, axis=2)
logging.info("Fusion Net construction complete")
logging.info("SQuAD specific construction begins")
# now we have U_c, U_q = final_para_rep, final_q_und
# The rest of the network is for SQuAD
# TODO: This part is a little confusing
logging.info("Sumarized question understanding vector")
with tf.variable_scope("summarized_question"):
w = tf.get_variable("W", shape=(final_ques_under_dim, 1),
dtype=tf.float32)
uq_s = tf.unstack(final_q_und, axis=1)
attention_weight = []
for i, uq in enumerate(tqdm(uq_s, desc='Question Summary Vector')):
s = tf.matmul(uq, w)
attention_weight.append(s)
attention_weight = tf.nn.softmax(tf.stack(attention_weight,
axis=1))
summarized_question = tf.reduce_sum(tf.multiply(final_q_und,
attention_weight),
axis=1)
logging.info("Span Start")
with tf.variable_scope("span_start"):
w = tf.get_variable("W", shape=(selfboost_rep_dim,
final_ques_under_dim),
dtype=tf.float32)
uc_s = tf.unstack(final_para_rep, axis=1)
attention_weight = []
for i, uc in enumerate(tqdm(uc_s, desc='StartSpan')):
s = tf.matmul(uc, w)
s = tf.reduce_sum(tf.multiply(s, summarized_question), axis=1)
attention_weight.append(s)
start_prediction = tf.nn.softmax(tf.stack(attention_weight,
axis=1))
logging.info("Span End")
with tf.variable_scope("span_end"):
# final memory of GRU
inp = tf.multiply(tf.expand_dims(start_prediction, axis=2),
final_para_rep)
inp = timedrop(inp, drop_p, 'span_end_ques_encode_inp')
sum_dim = summarized_question.get_shape().as_list()[-1]
out, _ = tf.nn.dynamic_rnn(tf.contrib.rnn.GRUCell(sum_dim),
inputs=inp, dtype=tf.float32,
initial_state=summarized_question,
scope='span_end_question_encoding',
sequence_length=inp_para_mask)
vq = tf.unstack(out, axis=1)[-1]
vq_dim = vq.get_shape().as_list()[-1]
w = tf.get_variable("W", shape=(selfboost_rep_dim, vq_dim),
dtype=tf.float32)
uc_s = tf.unstack(final_para_rep, axis=1)
attention_weight = []
for i, uc in enumerate(tqdm(uc_s, desc='StartSpan')):
s = tf.matmul(uc, w)
s = tf.reduce_sum(tf.multiply(s, vq), axis=1)
attention_weight.append(s)
end_prediction = tf.nn.softmax(tf.stack(attention_weight, axis=1))
logging.info("Model Creation Complete")
return (inp_para_glove, inp_ques_glove, inp_para_cove, inp_ques_cove,
para_nerpos, para_tf, para_em, start_prediction,
end_prediction, exp_ans_start, exp_ans_end,
inp_para_mask, inp_ques_mask)