1. fix format errors and typos
This commit is contained in:
parent
d78a8b4e1e
commit
310366bb54
|
@ -24,7 +24,7 @@ def scaled_dot_product_attention(q,
|
|||
mask=None,
|
||||
dropout=0.0,
|
||||
training=True):
|
||||
"""Scaled dot product attention with masking.
|
||||
r"""Scaled dot product attention with masking.
|
||||
|
||||
Assume that q, k, v all have the same leading dimensions (denoted as * in
|
||||
descriptions below). Dropout is applied to attention weights before
|
||||
|
@ -33,24 +33,24 @@ def scaled_dot_product_attention(q,
|
|||
Parameters
|
||||
-----------
|
||||
|
||||
q : Tensor [shape=(*, T_q, d)]
|
||||
q : Tensor [shape=(\*, T_q, d)]
|
||||
the query tensor.
|
||||
|
||||
k : Tensor [shape=(*, T_k, d)]
|
||||
k : Tensor [shape=(\*, T_k, d)]
|
||||
the key tensor.
|
||||
|
||||
v : Tensor [shape=(*, T_k, d_v)]
|
||||
v : Tensor [shape=(\*, T_k, d_v)]
|
||||
the value tensor.
|
||||
|
||||
mask : Tensor, [shape=(*, T_q, T_k) or broadcastable shape], optional
|
||||
mask : Tensor, [shape=(\*, T_q, T_k) or broadcastable shape], optional
|
||||
the mask tensor, zeros correspond to paddings. Defaults to None.
|
||||
|
||||
Returns
|
||||
----------
|
||||
out : Tensor [shape=(*, T_q, d_v)]
|
||||
out : Tensor [shape=(\*, T_q, d_v)]
|
||||
the context vector.
|
||||
|
||||
attn_weights : Tensor [shape=(*, T_q, T_k)]
|
||||
attn_weights : Tensor [shape=(\*, T_q, T_k)]
|
||||
the attention weights.
|
||||
"""
|
||||
d = q.shape[-1] # we only support imperative execution
|
||||
|
@ -208,16 +208,16 @@ class MultiheadAttention(nn.Layer):
|
|||
|
||||
k_dim : int, optional
|
||||
Feature size of the key of each scaled dot product attention. If not
|
||||
provided, it is set to `model_dim / num_heads`. Defaults to None.
|
||||
provided, it is set to ``model_dim / num_heads``. Defaults to None.
|
||||
|
||||
v_dim : int, optional
|
||||
Feature size of the key of each scaled dot product attention. If not
|
||||
provided, it is set to `model_dim / num_heads`. Defaults to None.
|
||||
provided, it is set to ``model_dim / num_heads``. Defaults to None.
|
||||
|
||||
Raises
|
||||
---------
|
||||
ValueError
|
||||
if `model_dim` is not divisible by `num_heads`.
|
||||
If ``model_dim`` is not divisible by ``num_heads``.
|
||||
"""
|
||||
def __init__(self,
|
||||
model_dim: int,
|
||||
|
|
|
@ -151,7 +151,7 @@ class STFT(nn.Layer):
|
|||
Returns
|
||||
------------
|
||||
Tensor [shape=(B, C, 1, T)]
|
||||
The power spectrum. (C = 1 + `n_fft` // 2)
|
||||
The power spectrum.
|
||||
"""
|
||||
real, imag = self(x)
|
||||
power = real**2 + imag**2
|
||||
|
@ -168,7 +168,7 @@ class STFT(nn.Layer):
|
|||
Returns
|
||||
------------
|
||||
Tensor [shape=(B, C, 1, T)]
|
||||
The magnitude of the spectrum. (C = 1 + `n_fft` // 2)
|
||||
The magnitude of the spectrum.
|
||||
"""
|
||||
power = self.power(x)
|
||||
magnitude = paddle.sqrt(power)
|
||||
|
|
|
@ -6,18 +6,18 @@ def shuffle_dim(x, axis, perm=None):
|
|||
|
||||
Parameters
|
||||
----------
|
||||
x : Tensor
|
||||
The input tensor.
|
||||
|
||||
axis : int
|
||||
The axis to shuffle.
|
||||
|
||||
perm : List[int], ndarray, optional
|
||||
The order to reorder the tensor along the `axis`-th dimension.
|
||||
|
||||
It is a permutation of ``[0, d)``, where d is the size of the
|
||||
``axis``-th dimension of the input tensor. If not provided,
|
||||
a random permutation is used. Defaults to None.
|
||||
x : Tensor
|
||||
The input tensor.
|
||||
|
||||
axis : int
|
||||
The axis to shuffle.
|
||||
|
||||
perm : List[int], ndarray, optional
|
||||
The order to reorder the tensor along the ``axis``-th dimension.
|
||||
|
||||
It is a permutation of ``[0, d)``, where d is the size of the
|
||||
``axis``-th dimension of the input tensor. If not provided,
|
||||
a random permutation is used. Defaults to None.
|
||||
|
||||
Returns
|
||||
---------
|
||||
|
|
|
@ -18,8 +18,8 @@ def weighted_mean(input, weight):
|
|||
-----------
|
||||
input : Tensor
|
||||
The input tensor.
|
||||
weight : Tensor [broadcastable shape with the input]
|
||||
The weight tensor.
|
||||
weight : Tensor
|
||||
The weight tensor with broadcastable shape with the input.
|
||||
|
||||
Returns
|
||||
----------
|
||||
|
|
|
@ -54,7 +54,7 @@ def feature_mask(input, axis, dtype="bool"):
|
|||
Returns
|
||||
-------
|
||||
Tensor
|
||||
The geenrated mask with `spatial` shape as mentioned above.
|
||||
The geenrated mask with ``spatial`` shape as mentioned above.
|
||||
|
||||
It has one less dimension than ``input`` does.
|
||||
"""
|
||||
|
@ -103,7 +103,7 @@ def future_mask(time_steps, dtype="bool"):
|
|||
time_steps : int
|
||||
Decoder time steps.
|
||||
dtype : str, optional
|
||||
The data type of the generate mask, by default "bool"
|
||||
The data type of the generate mask, by default "bool".
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
|
@ -43,16 +43,16 @@ class PositionwiseFFN(nn.Layer):
|
|||
self.hidden_szie = hidden_size
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass of positionwise feed forward network.
|
||||
r"""Forward pass of positionwise feed forward network.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : Tensor [shape=(*, input_size)]
|
||||
x : Tensor [shape=(\*, input_size)]
|
||||
The input tensor, where ``\*`` means arbitary shape.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Tensor [shape=(*, input_size)]
|
||||
Tensor [shape=(\*, input_size)]
|
||||
The output tensor.
|
||||
"""
|
||||
l1 = self.dropout(F.relu(self.linear1(x)))
|
||||
|
@ -104,8 +104,9 @@ class TransformerEncoderLayer(nn.Layer):
|
|||
x : Tensor [shape=(batch_size, time_steps, d_model)]
|
||||
The input.
|
||||
|
||||
mask : Tensor [shape=(batch_size, time_steps, time_steps) or broadcastable shape]
|
||||
The padding mask.
|
||||
mask : Tensor
|
||||
The padding mask. The shape is (batch_size, time_steps,
|
||||
time_steps) or broadcastable shape.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
Loading…
Reference in New Issue