MSUcourses · trvtt · Dec 4, 2025
diff --git a/Deep Learning/template_p01.py b/Deep Learning/template_p01.py
@@ -22,8 +22,14 @@ def multiplicative_attention(decoder_hidden_state, encoder_hidden_states, W_mult
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
-
+    # Считаем промежуточное произведение W_mult * h_i
+    transformed_encoder = W_mult.dot(encoder_hidden_states)  # shape: (n_features_dec, n_states)
+    # Скаляное произведение с состоянием декодера
+    attention_scores = decoder_hidden_state.T.dot(transformed_encoder)  # shape: (1, n_states)
+    # Применяем softmax для получения весов
+    attention_weights = softmax(attention_scores)
+    # Итоговый attention vector
+    attention_vector = attention_weights.dot(encoder_hidden_states.T).T  # shape: (n_features_enc, 1)
     return attention_vector
 
 def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add_enc, W_add_dec):
@@ -37,6 +43,20 @@ def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
+    n_states = encoder_hidden_states.shape[1]
+    attention_scores = np.zeros((1, n_states))
+
+    # Вычисляем e_i для каждого состояния энкодера
+    for i in range(n_states):
+        h_i = encoder_hidden_states[:, i][:, None]  # вектор состояния i
+        e_i = np.dot(v_add.T, np.tanh(np.dot(W_add_enc, h_i) + np.dot(W_add_dec, decoder_hidden_state)))
+        attention_scores[0, i] = e_i
+
+    # Применяем softmax для получения весов
+    attention_weights = softmax(attention_scores)
+
+    # Итоговый attention vector: взвешенная сумма состояний энкодера
+    attention_vector = attention_weights.dot(encoder_hidden_states.T).T
 
     return attention_vector
+