MSUcourses · k-gorbenko · Dec 10, 2024
diff --git a/Deep Learning/template_p01.py b/Deep Learning/template_p01.py
@@ -18,11 +18,24 @@ def multiplicative_attention(decoder_hidden_state, encoder_hidden_states, W_mult
     decoder_hidden_state: np.array of shape (n_features_dec, 1)
     encoder_hidden_states: np.array of shape (n_features_enc, n_states)
     W_mult: np.array of shape (n_features_dec, n_features_enc)
-    
+
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
+    # Убедитесь, что decoder_hidden_state имеет размерность (n_features_dec, 1)
+    assert decoder_hidden_state.shape[0] == W_mult.shape[0], "Size mismatch between decoder_hidden_state and W_mult"
+
+    # Вычисляем оценки внимания
+    attention_scores = W_mult.T @ decoder_hidden_state
+
+    # Умножаем оценки на скрытые состояния кодера
+    attention_weights = np.dot(attention_scores.T, encoder_hidden_states)  # (1, n_states)
+
+    # Применяем softmax
+    attention_weights = softmax(attention_weights)  # (1, n_states)
+
+    # Вычисляем итоговый вектор внимания
+    attention_vector = encoder_hidden_states @ attention_weights.T  # (n_features_enc, 1)
 
     return attention_vector
 
@@ -33,10 +46,27 @@ def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add
     v_add: np.array of shape (n_features_int, 1)
     W_add_enc: np.array of shape (n_features_int, n_features_enc)
     W_add_dec: np.array of shape (n_features_int, n_features_dec)
-    
+
     return: np.array of shape (n_features_enc, 1)
         Final attention vector
     '''
-    # your code here
-
+    # Применяем линейные преобразования
+    encoder_transformed = W_add_enc @ encoder_hidden_states  # (n_features_int, n_states)
+    decoder_transformed = W_add_dec @ decoder_hidden_state  # (n_features_int, 1)
+
+    # Расширяем декодерное состояние для суммирования
+    decoder_transformed_expanded = np.tile(decoder_transformed, (1, encoder_hidden_states.shape[1]))  # (n_features_int, n_states)
+
+    # Суммируем результаты
+    scores = v_add.T @ (encoder_transformed + decoder_transformed_expanded)  # (1, n_states)
+
+    # Применяем активацию tanh
+    scores = np.tanh(scores)
+
+    # Применяем softmax к оценкам
+    attention_weights = softmax(scores.T)  # (n_states, 1)
+
+    # Вычисляем итоговый вектор внимания
+    attention_vector = encoder_hidden_states @ attention_weights  # (n_features_enc, 1)
+
     return attention_vector