-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdecoder.py
More file actions
40 lines (31 loc) · 1.46 KB
/
decoder.py
File metadata and controls
40 lines (31 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import tensorflow as tf
def decoder(feature_vector_size, max_length, vocab_size):
"""
Create decoder model for caption generation
Args:
feature_vector_size (int): Size of the feature vector (100352 for the flatten output of ResNet50).
max_length (int): Max length of a sequence (always this length due to padding).
vocab_size (int): Number of words in the tokenizer.
Returns:
tf.keras.Model: Decoder model.
"""
# Image feature layers
inputs1 = tf.keras.layers.Input(shape=(feature_vector_size,))
fe1 = tf.keras.layers.Dropout(0.4)(inputs1)
fe2 = tf.keras.layers.Dense(256, activation='relu')(fe1)
# Sequence layers
inputs2 = tf.keras.layers.Input(shape=(max_length,))
se1 = tf.keras.layers.Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = tf.keras.layers.Dropout(0.4)(se1)
# First LSTM layer
se3 = tf.keras.layers.LSTM(256, return_sequences=True)(se2)
se4 = tf.keras.layers.Dropout(0.5)(se3)
# Second LSTM layer
se5 = tf.keras.layers.LSTM(256, return_sequences=False)(se4)
# Decoder model
decoder1 = tf.keras.layers.Concatenate()([fe2, se5])
decoder2 = tf.keras.layers.Dense(256, activation='relu')(decoder1)
outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(decoder2)
model = tf.keras.models.Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model