diff --git a/Deep Learning/template_p01.py b/Deep Learning/template_p01.py index 190fb4e..2e25f69 100644 --- a/Deep Learning/template_p01.py +++ b/Deep Learning/template_p01.py @@ -18,11 +18,24 @@ def multiplicative_attention(decoder_hidden_state, encoder_hidden_states, W_mult decoder_hidden_state: np.array of shape (n_features_dec, 1) encoder_hidden_states: np.array of shape (n_features_enc, n_states) W_mult: np.array of shape (n_features_dec, n_features_enc) - + return: np.array of shape (n_features_enc, 1) Final attention vector ''' - # your code here + # Убедитесь, что decoder_hidden_state имеет размерность (n_features_dec, 1) + assert decoder_hidden_state.shape[0] == W_mult.shape[0], "Size mismatch between decoder_hidden_state and W_mult" + + # Вычисляем оценки внимания + attention_scores = W_mult.T @ decoder_hidden_state + + # Умножаем оценки на скрытые состояния кодера + attention_weights = np.dot(attention_scores.T, encoder_hidden_states) # (1, n_states) + + # Применяем softmax + attention_weights = softmax(attention_weights) # (1, n_states) + + # Вычисляем итоговый вектор внимания + attention_vector = encoder_hidden_states @ attention_weights.T # (n_features_enc, 1) return attention_vector @@ -33,10 +46,27 @@ def additive_attention(decoder_hidden_state, encoder_hidden_states, v_add, W_add v_add: np.array of shape (n_features_int, 1) W_add_enc: np.array of shape (n_features_int, n_features_enc) W_add_dec: np.array of shape (n_features_int, n_features_dec) - + return: np.array of shape (n_features_enc, 1) Final attention vector ''' - # your code here - + # Применяем линейные преобразования + encoder_transformed = W_add_enc @ encoder_hidden_states # (n_features_int, n_states) + decoder_transformed = W_add_dec @ decoder_hidden_state # (n_features_int, 1) + + # Расширяем декодерное состояние для суммирования + decoder_transformed_expanded = np.tile(decoder_transformed, (1, encoder_hidden_states.shape[1])) # (n_features_int, n_states) + + # Суммируем результаты + scores = v_add.T @ (encoder_transformed + decoder_transformed_expanded) # (1, n_states) + + # Применяем активацию tanh + scores = np.tanh(scores) + + # Применяем softmax к оценкам + attention_weights = softmax(scores.T) # (n_states, 1) + + # Вычисляем итоговый вектор внимания + attention_vector = encoder_hidden_states @ attention_weights # (n_features_enc, 1) + return attention_vector