FusionNet/model.py at master · theSage21/FusionNet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import logging
from tqdm import tqdm
import tensorflow as tf
from parts import fuse, word_fusion, timedrop


logging.getLogger().setLevel("DEBUG")
birnn = tf.nn.bidirectional_dynamic_rnn


def build(*, batchsize, max_p_len, glove_dim,
          cove_dim, max_q_len, sl_att_dim,
          nerpos_dim, tf_dim, reading_rep_dim,
          final_ques_under_dim, sh_att_dim,
          su_att_dim, fully_fused_para_dim,
          selfboost_att_dim, selfboost_rep_dim,
          dropout_proba, is_train, **extras):
    main_scope = 'Training' if is_train else 'Testing'
    with tf.variable_scope(main_scope):
        drop_p = 1.0 if not is_train else dropout_proba
        # ---------------------reading
        logging.info("Defining inputs")
        # First we define shapes for the inputs we need
        p_g_sh = (batchsize, max_p_len, glove_dim)  # para, glove
        q_g_sh = (batchsize, max_q_len, glove_dim)  # ques, glove
        p_c_sh = (batchsize, max_p_len, cove_dim)  # para, cove
        q_c_sh = (batchsize, max_q_len, cove_dim)  # ques, cove
        p_ner_sh = (batchsize, max_p_len, nerpos_dim)  # para, ner + pos
        p_tf_sh = (batchsize, max_p_len, tf_dim)  # para, normalized term freq
        p_em_sh = (batchsize, max_p_len, 1)  # para, exact word match in q
        p_mask_sh = (batchsize, )  # paragraph lengths
        q_mask_sh = (batchsize, )  # question lengths
        ans_st_exp = (batchsize, max_p_len)  # answer start pointer
        ans_end_exp = (batchsize, max_p_len)  # answer end pointer

        # we generate the placeholders based on the shapes defined
        inp_para_glove = tf.placeholder(shape=p_g_sh, dtype=tf.float32)
        inp_ques_glove = tf.placeholder(shape=q_g_sh, dtype=tf.float32)
        inp_para_cove = tf.placeholder(shape=p_c_sh, dtype=tf.float32)
        inp_ques_cove = tf.placeholder(shape=q_c_sh, dtype=tf.float32)
        inp_para_mask = tf.placeholder(shape=p_mask_sh, dtype=tf.int32)
        inp_ques_mask = tf.placeholder(shape=q_mask_sh, dtype=tf.int32)
        para_nerpos = tf.placeholder(shape=p_ner_sh, dtype=tf.float32)
        para_tf = tf.placeholder(shape=p_tf_sh, dtype=tf.float32)
        para_em = tf.placeholder(shape=p_em_sh, dtype=tf.float32)
        exp_ans_start = tf.placeholder(shape=ans_st_exp, dtype=tf.float32)
        exp_ans_end = tf.placeholder(shape=ans_end_exp, dtype=tf.float32)
        # -------------------embeddings dropout
        para_glove = timedrop(inp_para_glove, drop_p, 'paraGlove')
        para_cove = timedrop(inp_para_cove, drop_p, 'paraCove')
        ques_glove = timedrop(inp_ques_glove, drop_p, 'quesGlove')
        ques_cove = timedrop(inp_ques_cove, drop_p, 'quesCove')
        # ------------------- mask generation
        p_mask = tf.expand_dims(tf.one_hot(inp_para_mask, max_p_len), axis=2)
        q_mask = tf.expand_dims(tf.one_hot(inp_ques_mask, max_q_len), axis=2)

        # TODO: answer placeholder for training
        logging.info("Word level infusion")
        # fused_a = fuse(para_glove, ques_glove, attention_dim, 'test')
        para_q_fused_glove = word_fusion(para_glove, ques_glove,
                                         p_mask, q_mask)
        para_w_rep = tf.concat([para_glove, para_cove,
                                para_nerpos, para_tf],
                               axis=2)
        ques_w_rep = tf.concat([ques_glove, ques_cove],
                               axis=2)
        para_enhanced_rep = tf.concat([para_w_rep, para_em,
                                       para_q_fused_glove],
                                      axis=2)

        # ---------------------reading
        logging.info("Building Reading section")

        with tf.variable_scope("Reading"):
            f_read_q_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            b_read_q_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            inp = timedrop(ques_w_rep, drop_p, 'question_low_inp')
            ques_low_h, _ = birnn(cell_fw=f_read_q_low, cell_bw=b_read_q_low,
                                  inputs=inp, dtype=tf.float32,
                                  scope='ques_low_under',
                                  sequence_length=inp_ques_mask)
            ques_low_h = tf.concat(ques_low_h, axis=2)

            f_read_q_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            b_read_q_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            inp = timedrop(ques_low_h, drop_p, 'question_high_inp')
            ques_high_h, _ = birnn(cell_fw=f_read_q_high,
                                   cell_bw=b_read_q_high,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='ques_high_under',
                                   sequence_length=inp_ques_mask)
            ques_high_h = tf.concat(ques_high_h, axis=2)

            f_read_p_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            b_read_p_low = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            inp = timedrop(para_enhanced_rep, drop_p, 'para_low_inp')
            para_low_h, _ = birnn(cell_fw=f_read_p_low,
                                  cell_bw=b_read_p_low,
                                  inputs=inp,
                                  dtype=tf.float32,
                                  scope='para_low_under',
                                  sequence_length=inp_para_mask)
            para_low_h = tf.concat(para_low_h, axis=2)

            f_read_p_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            b_read_p_high = tf.contrib.rnn.LSTMCell(reading_rep_dim//2)
            inp = timedrop(para_low_h, drop_p, 'para_high_inp')
            para_high_h, _ = birnn(cell_fw=f_read_p_high,
                                   cell_bw=b_read_p_high,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='para_high_under',
                                   sequence_length=inp_ques_mask)
            para_high_h = tf.concat(para_high_h, axis=2)

        logging.info("Final Question Understanding")

        with tf.variable_scope("final_q_und"):
            f_uq = tf.contrib.rnn.LSTMCell(final_ques_under_dim//2)
            b_uq = tf.contrib.rnn.LSTMCell(final_ques_under_dim//2)
            inp = tf.concat([ques_low_h, ques_high_h], axis=2)
            inp = timedrop(inp, drop_p, 'final_q_und_inp')
            final_q_und, _ = birnn(cell_fw=f_uq,
                                   cell_bw=b_uq,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='final_q_und',
                                   sequence_length=inp_ques_mask)
            final_q_und = tf.concat(final_q_und, axis=2)

        logging.info("Fusion High level")

        with tf.variable_scope("high_level_fusion"):
            para_HoW = tf.concat([para_glove, para_cove,
                                  para_low_h, para_high_h],
                                 axis=2)
            ques_HoW = tf.concat([ques_glove, ques_cove,
                                  ques_low_h, ques_high_h],
                                 axis=2)
            para_fused_l = fuse(para_HoW, ques_HoW,
                                p_mask, q_mask,
                                sl_att_dim,
                                B=ques_low_h,
                                scope='low_level_fusion')
            para_fused_h = fuse(para_HoW, ques_HoW,
                                p_mask, q_mask,
                                sh_att_dim,
                                B=ques_high_h,
                                scope='high_level_fusion')
            para_fused_u = fuse(para_HoW, ques_HoW,
                                p_mask, q_mask,
                                su_att_dim,
                                B=final_q_und,
                                scope='understanding_fusion')
            inp = tf.concat([para_low_h, para_high_h,
                             para_fused_l, para_fused_h,
                             para_fused_u], axis=2)
            inp = timedrop(inp, drop_p, 'full_fused_para_inp')
            f_vc = tf.contrib.rnn.LSTMCell(fully_fused_para_dim//2)
            b_vc = tf.contrib.rnn.LSTMCell(fully_fused_para_dim//2)
            ff_para, _ = birnn(cell_fw=f_vc, cell_bw=b_vc, inputs=inp,
                               dtype=tf.float32, scope='full_fused_para',
                               sequence_length=inp_para_mask)
            ff_para = tf.concat(ff_para, axis=2)

        logging.info("Self boosting fusion")

        with tf.variable_scope("self_boosting_fusion"):
            para_HoW = tf.concat([para_glove, para_cove,
                                  para_low_h, para_high_h,
                                  para_fused_l, para_fused_h,
                                  para_fused_u, ff_para],
                                 axis=2)
            ff_fused_para = fuse(para_HoW, para_HoW,
                                 p_mask, p_mask,
                                 selfboost_att_dim,
                                 B=ff_para,
                                 scope='self_boosted_fusion')
            f_sb = tf.contrib.rnn.LSTMCell(selfboost_rep_dim//2)
            b_sb = tf.contrib.rnn.LSTMCell(selfboost_rep_dim//2)
            inp = tf.concat([ff_para, ff_fused_para], axis=2)
            inp = timedrop(inp, drop_p, 'self_boosting_inp')
            final_para_rep, _ = birnn(cell_fw=f_sb, cell_bw=b_sb, inputs=inp,
                                      dtype=tf.float32, scope='self_boosted')
            final_para_rep = tf.concat(final_para_rep, axis=2)

        logging.info("Fusion Net construction complete")
        logging.info("SQuAD specific construction begins")
        # now we have U_c, U_q = final_para_rep, final_q_und
        # The rest of the network is for SQuAD
        # TODO: This part is a little confusing

        logging.info("Sumarized question understanding vector")
        with tf.variable_scope("summarized_question"):
            w = tf.get_variable("W", shape=(final_ques_under_dim, 1),
                                dtype=tf.float32)
            uq_s = tf.unstack(final_q_und, axis=1)
            attention_weight = []
            for i, uq in enumerate(tqdm(uq_s, desc='Question Summary Vector')):
                s = tf.matmul(uq, w)
                attention_weight.append(s)
            attention_weight = tf.nn.softmax(tf.stack(attention_weight,
                                                      axis=1))
            summarized_question = tf.reduce_sum(tf.multiply(final_q_und,
                                                            attention_weight),
                                                axis=1)

        logging.info("Span Start")
        with tf.variable_scope("span_start"):
            w = tf.get_variable("W", shape=(selfboost_rep_dim,
                                            final_ques_under_dim),
                                dtype=tf.float32)
            uc_s = tf.unstack(final_para_rep, axis=1)
            attention_weight = []
            for i, uc in enumerate(tqdm(uc_s, desc='StartSpan')):
                s = tf.matmul(uc, w)
                s = tf.reduce_sum(tf.multiply(s, summarized_question), axis=1)
                attention_weight.append(s)
            start_prediction = tf.nn.softmax(tf.stack(attention_weight,
                                                      axis=1))

        logging.info("Span End")
        with tf.variable_scope("span_end"):
            # final memory of GRU
            inp = tf.multiply(tf.expand_dims(start_prediction, axis=2),
                              final_para_rep)
            inp = timedrop(inp, drop_p, 'span_end_ques_encode_inp')
            sum_dim = summarized_question.get_shape().as_list()[-1]
            out, _ = tf.nn.dynamic_rnn(tf.contrib.rnn.GRUCell(sum_dim),
                                       inputs=inp, dtype=tf.float32,
                                       initial_state=summarized_question,
                                       scope='span_end_question_encoding',
                                       sequence_length=inp_para_mask)
            vq = tf.unstack(out, axis=1)[-1]
            vq_dim = vq.get_shape().as_list()[-1]
            w = tf.get_variable("W", shape=(selfboost_rep_dim, vq_dim),
                                dtype=tf.float32)
            uc_s = tf.unstack(final_para_rep, axis=1)
            attention_weight = []
            for i, uc in enumerate(tqdm(uc_s, desc='StartSpan')):
                s = tf.matmul(uc, w)
                s = tf.reduce_sum(tf.multiply(s, vq), axis=1)
                attention_weight.append(s)
            end_prediction = tf.nn.softmax(tf.stack(attention_weight, axis=1))

        logging.info("Model Creation Complete")
    return (inp_para_glove, inp_ques_glove, inp_para_cove, inp_ques_cove,
            para_nerpos, para_tf, para_em, start_prediction,
            end_prediction, exp_ans_start, exp_ans_end,
            inp_para_mask, inp_ques_mask)