1. fix format errors and typos

2020-12-18 16:09:38 +08:00 · 2020-12-18 16:09:38 +08:00 · 310366bb54
parent d78a8b4e1e
commit 310366bb54
6 changed files with 34 additions and 33 deletions
--- a/parakeet/modules/attention.py
+++ b/parakeet/modules/attention.py
@ -24,7 +24,7 @@ def scaled_dot_product_attention(q,
                                 mask=None,
                                 dropout=0.0,
                                 training=True):
-    """Scaled dot product attention with masking. 
+    r"""Scaled dot product attention with masking. 
    Assume that q, k, v all have the same leading dimensions (denoted as * in 
    descriptions below). Dropout is applied to attention weights before 
@ -33,24 +33,24 @@ def scaled_dot_product_attention(q,
    Parameters
    -----------
-    q : Tensor [shape=(*, T_q, d)]
+    q : Tensor [shape=(\*, T_q, d)]
        the query tensor.
-    k : Tensor [shape=(*, T_k, d)]
+    k : Tensor [shape=(\*, T_k, d)]
        the key tensor.
-    v : Tensor [shape=(*, T_k, d_v)]
+    v : Tensor [shape=(\*, T_k, d_v)]
        the value tensor.
-    mask : Tensor, [shape=(*, T_q, T_k) or broadcastable shape], optional
+    mask : Tensor, [shape=(\*, T_q, T_k) or broadcastable shape], optional
        the mask tensor, zeros correspond to paddings. Defaults to None.
    Returns
    ----------
-    out : Tensor [shape=(*, T_q, d_v)] 
+    out : Tensor [shape=(\*, T_q, d_v)] 
        the context vector.
-    attn_weights : Tensor [shape=(*, T_q, T_k)]
+    attn_weights : Tensor [shape=(\*, T_q, T_k)]
        the attention weights.
    """
    d = q.shape[-1]  # we only support imperative execution
@ -208,16 +208,16 @@ class MultiheadAttention(nn.Layer):
    k_dim : int, optional
        Feature size of the key of each scaled dot product attention. If not 
-        provided, it is set to `model_dim / num_heads`. Defaults to None.
+        provided, it is set to ``model_dim / num_heads``. Defaults to None.
    v_dim : int, optional
        Feature size of the key of each scaled dot product attention. If not 
-        provided, it is set to `model_dim / num_heads`. Defaults to None.
+        provided, it is set to ``model_dim / num_heads``. Defaults to None.
    Raises
    ---------
    ValueError
-        if `model_dim` is not divisible by `num_heads`.
+        If ``model_dim`` is not divisible by ``num_heads``.
    """
    def __init__(self,
                 model_dim: int,
--- a/parakeet/modules/audio.py
+++ b/parakeet/modules/audio.py
@ -151,7 +151,7 @@ class STFT(nn.Layer):
        Returns
        ------------
        Tensor [shape=(B, C, 1, T)] 
-            The power spectrum. (C = 1 + `n_fft` // 2)
+            The power spectrum.
        """
        real, imag = self(x)
        power = real**2 + imag**2
@ -168,7 +168,7 @@ class STFT(nn.Layer):
        Returns
        ------------
        Tensor [shape=(B, C, 1, T)] 
-            The magnitude of the spectrum. (C = 1 + `n_fft` // 2)
+            The magnitude of the spectrum.
        """
        power = self.power(x)
        magnitude = paddle.sqrt(power)
--- a/parakeet/modules/geometry.py
+++ b/parakeet/modules/geometry.py
@ -6,18 +6,18 @@ def shuffle_dim(x, axis, perm=None):
    Parameters
    ----------
-        x : Tensor
+    x : Tensor
-            The input tensor.
+        The input tensor.
-            
+        
-        axis : int
+    axis : int
-            The axis to shuffle.
+        The axis to shuffle.
-            
+        
-        perm : List[int], ndarray, optional
+    perm : List[int], ndarray, optional
-            The order to reorder the tensor along the `axis`-th dimension.
+        The order to reorder the tensor along the ``axis``-th dimension.
-            
+        
-            It is a permutation of ``[0, d)``, where d is the size of the 
+        It is a permutation of ``[0, d)``, where d is the size of the 
-            ``axis``-th dimension of the input tensor. If not provided, 
+        ``axis``-th dimension of the input tensor. If not provided, 
-            a random permutation is used. Defaults to None.
+        a random permutation is used. Defaults to None.
    Returns
    ---------
--- a/parakeet/modules/losses.py
+++ b/parakeet/modules/losses.py
@ -18,8 +18,8 @@ def weighted_mean(input, weight):
    -----------
    input : Tensor 
        The input tensor.
-    weight : Tensor [broadcastable shape with the input]
+    weight : Tensor
-        The weight tensor.
+        The weight tensor with broadcastable shape with the input.
    Returns
    ----------
--- a/parakeet/modules/masking.py
+++ b/parakeet/modules/masking.py
@ -54,7 +54,7 @@ def feature_mask(input, axis, dtype="bool"):
    Returns
    -------
    Tensor
-        The geenrated mask with `spatial` shape as mentioned above.
+        The geenrated mask with ``spatial`` shape as mentioned above.
        It has one less dimension than ``input`` does.
    """
@ -103,7 +103,7 @@ def future_mask(time_steps, dtype="bool"):
    time_steps : int
        Decoder time steps.
    dtype : str, optional
-        The data type of the generate mask, by default "bool"
+        The data type of the generate mask, by default "bool".
    Returns
    -------
--- a/parakeet/modules/transformer.py
+++ b/parakeet/modules/transformer.py
@ -43,16 +43,16 @@ class PositionwiseFFN(nn.Layer):
        self.hidden_szie = hidden_size
    def forward(self, x):
-        """Forward pass of positionwise feed forward network.
+        r"""Forward pass of positionwise feed forward network.
        Parameters
        ----------
-        x : Tensor [shape=(*, input_size)]
+        x : Tensor [shape=(\*, input_size)]
            The input tensor, where ``\*`` means arbitary shape.
        Returns
        -------
-        Tensor [shape=(*, input_size)]
+        Tensor [shape=(\*, input_size)]
            The output tensor.
        """
        l1 = self.dropout(F.relu(self.linear1(x)))
@ -104,8 +104,9 @@ class TransformerEncoderLayer(nn.Layer):
        x : Tensor [shape=(batch_size, time_steps, d_model)]
            The input.
-        mask : Tensor [shape=(batch_size, time_steps, time_steps) or broadcastable shape]
+        mask : Tensor
-            The padding mask.
+            The padding mask. The shape is (batch_size, time_steps, 
            time_steps) or broadcastable shape.
        Returns
        -------