Merge pull request #74 from iclementine/reborn

bug fix: apply dropout to logits before softmax
This commit is contained in:
Feiyu Chan 2020-12-31 16:55:21 +08:00 committed by GitHub
commit 91c54575fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -61,8 +61,8 @@ def scaled_dot_product_attention(q,
if mask is not None:
scaled_logit += paddle.scale((1.0 - mask), -1e9) # hard coded here
scaled_logit = F.dropout(scaled_logit, dropout, training=training)
attn_weights = F.softmax(scaled_logit, axis=-1)
attn_weights = F.dropout(attn_weights, dropout, training=training)
out = paddle.matmul(attn_weights, v)
return out, attn_weights