From 6edc7d8474fa9ed557e4c2c097e18b3aac254c8f Mon Sep 17 00:00:00 2001
From: chenfeiyu <chenfeiyu@baidu.com>
Date: Thu, 3 Dec 2020 14:54:32 +0800
Subject: [PATCH] switch back to standard implementation of positional encoding

---
 parakeet/modules/positional_encoding.py | 48 +++++--------------------
 1 file changed, 9 insertions(+), 39 deletions(-)

diff --git a/parakeet/modules/positional_encoding.py b/parakeet/modules/positional_encoding.py
index f7e9c18..5d862ff 100644
--- a/parakeet/modules/positional_encoding.py
+++ b/parakeet/modules/positional_encoding.py
@@ -1,7 +1,9 @@
 import math
+import numpy as np
 import paddle
 from paddle.nn import functional as F
 
+
 def positional_encoding(start_index, length, size, dtype=None):
     """
     Generate standard positional encoding.
@@ -9,9 +11,6 @@ def positional_encoding(start_index, length, size, dtype=None):
     pe(pos, 2i) = sin(pos / 10000 ** (2i / size))
     pe(pos, 2i+1) = cos(pos / 10000 ** (2i / size))
     
-    This implementation deviates from the standard implementation in that the
-    sin/cos channels are not interleaved.
-
     Args:
         start_index (int): the start index.
         length (int): the length of the positional encoding.
@@ -23,40 +22,11 @@ def positional_encoding(start_index, length, size, dtype=None):
     if (size % 2 != 0):
         raise ValueError("size should be divisible by 2")
     dtype = dtype or paddle.get_default_dtype()
-    channel = paddle.arange(0, size, 2, dtype=dtype)
-    index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
-    p = paddle.unsqueeze(index, -1) / (10000 ** (channel / float(size)))
-    encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
-    return encodings
-
-def scalable_positional_encoding(start_index, length, size, omega):
-    """
-    A scalable positional encoding, which extends the standard positional 
-    encoding by adding positioning rate (denoted as omega).
-    
-    pe(pos, 2i) = sin(omega * pos / 10000 ** (2i / size))
-    pe(pos, 2i+1) = cos(omega * pos / 10000 ** (2i / size))
-    
-    This implementation deviates from the standard implementation in that the
-    sin/cos channels are not interleaved.
-    
-    Args:
-        start_index (int): the start index.
-        length (int): the length of the positional encoding.
-        size (int): positional encoding dimension.
-        omgea (Tensor): shape(batch_size, ), positional rates.
-
-    Returns:
-        encodings: shape(batch_size, length, size), position embedding, the 
-        data type is the same as omega.
-    """
-    dtype = omega.dtype
-    index = paddle.arange(start_index, start_index + length, 1, dtype=dtype)
-    channel = paddle.arange(0, size, 2, dtype=dtype)
-
-    p = paddle.unsqueeze(omega, [1, 2]) \
-      * paddle.unsqueeze(index, [1]) \
-      / (10000 ** (channel / float(size)))
-
-    encodings = paddle.concat([paddle.sin(p), paddle.cos(p)], axis=-1)
+    channel = np.arange(0, size, 2)
+    index = np.arange(start_index, start_index + length, 1)
+    p = np.expand_dims(index, -1) / (10000 ** (channel / float(size)))
+    encodings = np.zeros([length, size])
+    encodings[:, 0::2] = np.sin(p)
+    encodings[:, 1::2] = np.cos(p)
+    encodings = paddle.to_tensor(encodings)
     return encodings