switch back to standard implementation of positional encoding
This commit is contained in:
parent
404add2caa
commit
6edc7d8474
|
@ -1,7 +1,9 @@
|
||||||
import math
|
import math
|
||||||
|
import numpy as np
|
||||||
import paddle
|
import paddle
|
||||||
from paddle.nn import functional as F
|
from paddle.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
def positional_encoding(start_index, length, size, dtype=None):
|
def positional_encoding(start_index, length, size, dtype=None):
|
||||||
"""
|
"""
|
||||||
Generate standard positional encoding.
|
Generate standard positional encoding.
|
||||||
|
@ -9,9 +11,6 @@ def positional_encoding(start_index, length, size, dtype=None):
|
||||||
pe(pos, 2i) = sin(pos / 10000 ** (2i / size))
|
pe(pos, 2i) = sin(pos / 10000 ** (2i / size))
|
||||||
pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size))
|
pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size))
|
||||||
|
|
||||||
This implementation deviates from the standard implementation in that the
|
|
||||||
sin/cos channels are not interleaved.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
start_index (int): the start index.
|
start_index (int): the start index.
|
||||||
length (int): the length of the positional encoding.
|
length (int): the length of the positional encoding.
|
||||||
|
@ -23,40 +22,11 @@ def positional_encoding(start_index, length, size, dtype=None):
|
||||||
if (size % 2 != 0):
|
if (size % 2 != 0):
|
||||||
raise ValueError("size should be divisible by 2")
|
raise ValueError("size should be divisible by 2")
|
||||||
dtype = dtype or paddle.get_default_dtype()
|
dtype = dtype or paddle.get_default_dtype()
|
||||||
channel = paddle.arange(0, size, 2, dtype=dtype)
|
channel = np.arange(0, size, 2)
|
||||||
index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
|
index = np.arange(start_index, start_index + length, 1)
|
||||||
p = paddle.unsqueeze(index, -1) / (10000 ** (channel / float(size)))
|
p = np.expand_dims(index, -1) / (10000 ** (channel / float(size)))
|
||||||
encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
|
encodings = np.zeros([length, size])
|
||||||
return encodings
|
encodings[:, 0::2] = np.sin(p)
|
||||||
|
encodings[:, 1::2] = np.cos(p)
|
||||||
def scalable_positional_encoding(start_index, length, size, omega):
|
encodings = paddle.to_tensor(encodings)
|
||||||
"""
|
|
||||||
A scalable positional encoding, which extends the standard positional
|
|
||||||
encoding by adding positioning rate (denoted as omega).
|
|
||||||
|
|
||||||
pe(pos, 2i) = sin(omega * pos / 10000 ** (2i / size))
|
|
||||||
pe(pos, 2i+1) = cos(omega * pos / 10000 ** (2i / size))
|
|
||||||
|
|
||||||
This implementation deviates from the standard implementation in that the
|
|
||||||
sin/cos channels are not interleaved.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
start_index (int): the start index.
|
|
||||||
length (int): the length of the positional encoding.
|
|
||||||
size (int): positional encoding dimension.
|
|
||||||
omgea (Tensor): shape(batch_size, ), positional rates.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
encodings: shape(batch_size, length, size), position embedding, the
|
|
||||||
data type is the same as omega.
|
|
||||||
"""
|
|
||||||
dtype = omega.dtype
|
|
||||||
index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
|
|
||||||
channel = paddle.arange(0, size, 2, dtype=dtype)
|
|
||||||
|
|
||||||
p = paddle.unsqueeze(omega, [1, 2]) \
|
|
||||||
* paddle.unsqueeze(index, [1]) \
|
|
||||||
/ (10000 ** (channel / float(size)))
|
|
||||||
|
|
||||||
encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
|
|
||||||
return encodings
|
return encodings
|
||||||
|
|
Loading…
Reference in New Issue