Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 45 additions & 3 deletions Deep Learning/template_p01.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ def softmax(vector):
return: np.array of shape (n, m)
Matrix where softmax is computed for every row independently
'''
nice_vector = vector - vector.max()
# 数值稳定处理:减去行最大值,并用 np.newaxis 保持维度
nice_vector = vector - np.max(vector, axis=1)[:, np.newaxis]

exp_vector = np.exp(nice_vector)
# 求分母:沿着 axis=1 求和,并用 np.newaxis 保持维度
exp_denominator = np.sum(exp_vector, axis=1)[:, np.newaxis]

softmax_ = exp_vector / exp_denominator
return softmax_

Expand All @@ -22,7 +26,18 @@ def multiplicative_attention(decoder_hidden_state, encoder_hidden_states, W_mult
return: np.array of shape (n_features_enc, 1)
Final attention vector
'''
# your code here
# 1. 计算 Attention Scores (e_i = s^T W_mult h_i)
# 步骤 1a: 计算 W_mult @ s
temp_dec_proj = np.dot(W_mult, decoder_hidden_state)

# 步骤 1b: 计算 (temp_dec_proj)^T @ h
attention_scores = np.dot(temp_dec_proj.T, encoder_hidden_states)

# 2. 计算 Attention Weights (Softmax)
attention_weights = softmax(attention_scores)

# 3. 计算 Context Vector (加权求和)
attention_vector = np.dot(encoder_hidden_states, attention_weights.T)

return attention_vector

Expand All @@ -37,6 +52,33 @@ def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add
return: np.array of shape (n_features_enc, 1)
Final attention vector
'''
# your code here
# 1. 投影 Q 和 K 到注意力空间 (n_features_int)

# 1a. 解码器状态投影 (W_add_dec @ s)
# dec_proj 形状: (n_features_int, 1)
dec_proj = np.dot(W_add_dec, decoder_hidden_state)

# 1b. 编码器状态投影 (W_add_enc @ h)
# enc_proj 形状: (n_features_int, n_states)
enc_proj = np.dot(W_add_enc, encoder_hidden_states)

# 2. 相加并应用激活函数 (tanh)
# H_sum = W_add_enc h + W_add_dec s
# dec_proj (n_features_int, 1) 会自动广播到 enc_proj (n_features_int, n_states)
Sum_Proj = enc_proj + dec_proj
H_tanh = np.tanh(Sum_Proj)

# 3. 计算 Attention Scores (e_i = v_add^T @ H_tanh)
# attention_scores 形状: (1, n_states)
attention_scores = np.dot(v_add.T, H_tanh)

# 4. 计算 Attention Weights (Softmax)
# attention_weights 形状: (1, n_states)
attention_weights = softmax(attention_scores)

# 5. 计算 Context Vector (加权求和)
# C = V @ alpha^T
# attention_vector 形状: (n_features_enc, 1)
attention_vector = np.dot(encoder_hidden_states, attention_weights.T)

return attention_vector