33 lines
1.3 KiB
Python
33 lines
1.3 KiB
Python
import paddle
|
|
from paddle.fluid.layers import sequence_mask
|
|
|
|
def id_mask(input, padding_index=0, dtype="bool"):
|
|
return paddle.cast(input != padding_index, dtype)
|
|
|
|
def feature_mask(input, axis, dtype="bool"):
|
|
feature_sum = paddle.sum(paddle.abs(input), axis)
|
|
return paddle.cast(feature_sum != 0, dtype)
|
|
|
|
def combine_mask(padding_mask, no_future_mask):
|
|
"""
|
|
Combine the padding mask and no future mask for transformer decoder.
|
|
Padding mask is used to mask padding positions and no future mask is used
|
|
to prevent the decoder to see future information.
|
|
|
|
Args:
|
|
padding_mask (Tensor): shape(batch_size, time_steps), dtype: float32 or float64, decoder padding mask.
|
|
no_future_mask (Tensor): shape(time_steps, time_steps), dtype: float32 or float64, no future mask.
|
|
|
|
Returns:
|
|
Tensor: shape(batch_size, time_steps, time_steps), combined mask.
|
|
"""
|
|
# TODO: to support boolean mask by using logical_and?
|
|
if padding_mask.dtype == paddle.fluid.core.VarDesc.VarType.BOOL:
|
|
return paddle.logical_and(padding_mask, no_future_mask)
|
|
else:
|
|
return padding_mask * no_future_mask
|
|
|
|
def future_mask(time_steps, dtype="bool"):
|
|
mask = paddle.tril(paddle.ones([time_steps, time_steps]))
|
|
return paddle.cast(mask, dtype)
|