Merge pull request #75 from PaddlePaddle/revert-74-reborn

Revert "bug fix: apply dropout to logits before softmax"
2021-01-07 15:23:50 +08:00 · 2021-01-07 15:23:50 +08:00 · 61ac117df5
parent 91c54575fe e88cbace1c
commit 61ac117df5
1 changed files with 1 additions and 1 deletions
--- a/parakeet/modules/attention.py
+++ b/parakeet/modules/attention.py
@ -61,8 +61,8 @@ def scaled_dot_product_attention(q,
    if mask is not None:
        scaled_logit += paddle.scale((1.0 - mask), -1e9)  # hard coded here

-    scaled_logit = F.dropout(scaled_logit, dropout, training=training)
    attn_weights = F.softmax(scaled_logit, axis=-1)
+    attn_weights = F.dropout(attn_weights, dropout, training=training)
    out = paddle.matmul(attn_weights, v)
    return out, attn_weights