switch back to standard implementation of positional encoding

This commit is contained in:
chenfeiyu 2020-12-03 14:54:32 +08:00
parent 404add2caa
commit 6edc7d8474
1 changed files with 9 additions and 39 deletions

View File

@ -1,7 +1,9 @@
import math import math
import numpy as np
import paddle import paddle
from paddle.nn import functional as F from paddle.nn import functional as F
def positional_encoding(start_index, length, size, dtype=None): def positional_encoding(start_index, length, size, dtype=None):
""" """
Generate standard positional encoding. Generate standard positional encoding.
@ -9,9 +11,6 @@ def positional_encoding(start_index, length, size, dtype=None):
pe(pos, 2i) = sin(pos / 10000 ** (2i / size)) pe(pos, 2i) = sin(pos / 10000 ** (2i / size))
pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size)) pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size))
This implementation deviates from the standard implementation in that the
sin/cos channels are not interleaved.
Args: Args:
start_index (int): the start index. start_index (int): the start index.
length (int): the length of the positional encoding. length (int): the length of the positional encoding.
@ -23,40 +22,11 @@ def positional_encoding(start_index, length, size, dtype=None):
if (size % 2 != 0): if (size % 2 != 0):
raise ValueError("size should be divisible by 2") raise ValueError("size should be divisible by 2")
dtype = dtype or paddle.get_default_dtype() dtype = dtype or paddle.get_default_dtype()
channel = paddle.arange(0, size, 2, dtype=dtype) channel = np.arange(0, size, 2)
index = paddle.arange(start_index, start_index + length, 1, dtype=dtype) index = np.arange(start_index, start_index + length, 1)
p = paddle.unsqueeze(index, -1) / (10000 ** (channel / float(size))) p = np.expand_dims(index, -1) / (10000 ** (channel / float(size)))
encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1) encodings = np.zeros([length, size])
return encodings encodings[:, 0::2] = np.sin(p)
encodings[:, 1::2] = np.cos(p)
def scalable_positional_encoding(start_index, length, size, omega): encodings = paddle.to_tensor(encodings)
"""
A scalable positional encoding, which extends the standard positional
encoding by adding positioning rate (denoted as omega).
pe(pos, 2i) = sin(omega * pos / 10000 ** (2i / size))
pe(pos, 2i+1) = cos(omega * pos / 10000 ** (2i / size))
This implementation deviates from the standard implementation in that the
sin/cos channels are not interleaved.
Args:
start_index (int): the start index.
length (int): the length of the positional encoding.
size (int): positional encoding dimension.
omgea (Tensor): shape(batch_size, ), positional rates.
Returns:
encodings: shape(batch_size, length, size), position embedding, the
data type is the same as omega.
"""
dtype = omega.dtype
index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
channel = paddle.arange(0, size, 2, dtype=dtype)
p = paddle.unsqueeze(omega, [1, 2]) \
* paddle.unsqueeze(index, [1]) \
/ (10000 ** (channel / float(size)))
encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
return encodings return encodings