switch back to standard implementation of positional encoding

2020-12-03 14:54:32 +08:00 · 2020-12-03 14:54:32 +08:00 · 6edc7d8474
parent 404add2caa
commit 6edc7d8474
1 changed files with 9 additions and 39 deletions
--- a/parakeet/modules/positional_encoding.py
+++ b/parakeet/modules/positional_encoding.py
@ -1,7 +1,9 @@
 import math
 import numpy as np
 import paddle
 from paddle.nn import functional as F
 def positional_encoding(start_index, length, size, dtype=None):
    """
    Generate standard positional encoding.
@ -9,9 +11,6 @@ def positional_encoding(start_index, length, size, dtype=None):
    pe(pos, 2i) = sin(pos / 10000 ** (2i / size))
    pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size))
    This implementation deviates from the standard implementation in that the
    sin/cos channels are not interleaved.
    Args:
        start_index (int): the start index.
        length (int): the length of the positional encoding.
@ -23,40 +22,11 @@ def positional_encoding(start_index, length, size, dtype=None):
    if (size % 2 != 0):
        raise ValueError("size should be divisible by 2")
    dtype = dtype or paddle.get_default_dtype()
-    channel = paddle.arange(0, size, 2, dtype=dtype)
+    channel = np.arange(0, size, 2)
-    index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
+    index = np.arange(start_index, start_index + length, 1)
-    p = paddle.unsqueeze(index, -1) / (10000 ** (channel / float(size)))
+    p = np.expand_dims(index, -1) / (10000 ** (channel / float(size)))
-    encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
+    encodings = np.zeros([length, size])
-    return encodings
+    encodings[:, 0::2] = np.sin(p)
-
+    encodings[:, 1::2] = np.cos(p)
-def scalable_positional_encoding(start_index, length, size, omega):
+    encodings = paddle.to_tensor(encodings)
    """
    A scalable positional encoding, which extends the standard positional 
    encoding by adding positioning rate (denoted as omega).
    pe(pos, 2i) = sin(omega * pos / 10000 ** (2i / size))
    pe(pos, 2i+1) = cos(omega * pos / 10000 ** (2i / size))
    This implementation deviates from the standard implementation in that the
    sin/cos channels are not interleaved.
    Args:
        start_index (int): the start index.
        length (int): the length of the positional encoding.
        size (int): positional encoding dimension.
        omgea (Tensor): shape(batch_size, ), positional rates.
    Returns:
        encodings: shape(batch_size, length, size), position embedding, the 
        data type is the same as omega.
    """
    dtype = omega.dtype
    index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
    channel = paddle.arange(0, size, 2, dtype=dtype)
    p = paddle.unsqueeze(omega, [1, 2]) \
      * paddle.unsqueeze(index, [1]) \
      / (10000 ** (channel / float(size)))
    encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
    return encodings