MSUcourses · q7lan1210-tech · Dec 12, 2025
diff --git a/Deep Learning/template_p01.py b/Deep Learning/template_p01.py
@@ -22,10 +22,23 @@ def multiplicative_attention(decoder_hidden_state, encoder_hidden_states, W_mult
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
+   # 1. 计算 Attention Scores (e_i = s^T W_mult h_i)
+    # 步骤 1a: 计算 W_mult @ s
+    temp_dec_proj = np.dot(W_mult, decoder_hidden_state) 
+
+    # 步骤 1b: 计算 (temp_dec_proj)^T @ h
+    attention_scores = np.dot(temp_dec_proj.T, encoder_hidden_states)
+
+    # 2. 计算 Attention Weights (Softmax)
+    attention_weights = softmax(attention_scores)
+
+    # 3. 计算 Context Vector (加权求和)
+    attention_vector = np.dot(encoder_hidden_states, attention_weights.T)
 
     return attention_vector
 
+    return attention_vector
+
 def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add_enc, W_add_dec):
     '''
     decoder_hidden_state: np.array of shape (n_features_dec, 1)
@@ -37,6 +50,35 @@ def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
+     # 1. 投影 Q 和 K 到注意力空间 (n_features_int)
+
+    # 1a. 解码器状态投影 (W_add_dec @ s)
+    # dec_proj 形状: (n_features_int, 1)
+    dec_proj = np.dot(W_add_dec, decoder_hidden_state)
+
+    # 1b. 编码器状态投影 (W_add_enc @ h)
+    # enc_proj 形状: (n_features_int, n_states)
+    enc_proj = np.dot(W_add_enc, encoder_hidden_states)
 
+    # 2. 相加并应用激活函数 (tanh)
+    # H_sum = W_add_enc h + W_add_dec s
+    # dec_proj (n_features_int, 1) 会自动广播到 enc_proj (n_features_int, n_states)
+    Sum_Proj = enc_proj + dec_proj
+    H_tanh = np.tanh(Sum_Proj)
+
+    # 3. 计算 Attention Scores (e_i = v_add^T @ H_tanh)
+    # attention_scores 形状: (1, n_states)
+    attention_scores = np.dot(v_add.T, H_tanh)
+
+    # 4. 计算 Attention Weights (Softmax)
+    # attention_weights 形状: (1, n_states)
+    attention_weights = softmax(attention_scores)
+
+    # 5. 计算 Context Vector (加权求和)
+    # C = V @ alpha^T
+    # attention_vector 形状: (n_features_enc, 1)
+    attention_vector = np.dot(encoder_hidden_states, attention_weights.T)
+
+    return attention_vector
+
     return attention_vector