1. fix format errors and typos
This commit is contained in:
parent
d78a8b4e1e
commit
310366bb54
|
@ -24,7 +24,7 @@ def scaled_dot_product_attention(q,
|
||||||
mask=None,
|
mask=None,
|
||||||
dropout=0.0,
|
dropout=0.0,
|
||||||
training=True):
|
training=True):
|
||||||
"""Scaled dot product attention with masking.
|
r"""Scaled dot product attention with masking.
|
||||||
|
|
||||||
Assume that q, k, v all have the same leading dimensions (denoted as * in
|
Assume that q, k, v all have the same leading dimensions (denoted as * in
|
||||||
descriptions below). Dropout is applied to attention weights before
|
descriptions below). Dropout is applied to attention weights before
|
||||||
|
@ -33,24 +33,24 @@ def scaled_dot_product_attention(q,
|
||||||
Parameters
|
Parameters
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
q : Tensor [shape=(*, T_q, d)]
|
q : Tensor [shape=(\*, T_q, d)]
|
||||||
the query tensor.
|
the query tensor.
|
||||||
|
|
||||||
k : Tensor [shape=(*, T_k, d)]
|
k : Tensor [shape=(\*, T_k, d)]
|
||||||
the key tensor.
|
the key tensor.
|
||||||
|
|
||||||
v : Tensor [shape=(*, T_k, d_v)]
|
v : Tensor [shape=(\*, T_k, d_v)]
|
||||||
the value tensor.
|
the value tensor.
|
||||||
|
|
||||||
mask : Tensor, [shape=(*, T_q, T_k) or broadcastable shape], optional
|
mask : Tensor, [shape=(\*, T_q, T_k) or broadcastable shape], optional
|
||||||
the mask tensor, zeros correspond to paddings. Defaults to None.
|
the mask tensor, zeros correspond to paddings. Defaults to None.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
----------
|
----------
|
||||||
out : Tensor [shape=(*, T_q, d_v)]
|
out : Tensor [shape=(\*, T_q, d_v)]
|
||||||
the context vector.
|
the context vector.
|
||||||
|
|
||||||
attn_weights : Tensor [shape=(*, T_q, T_k)]
|
attn_weights : Tensor [shape=(\*, T_q, T_k)]
|
||||||
the attention weights.
|
the attention weights.
|
||||||
"""
|
"""
|
||||||
d = q.shape[-1] # we only support imperative execution
|
d = q.shape[-1] # we only support imperative execution
|
||||||
|
@ -208,16 +208,16 @@ class MultiheadAttention(nn.Layer):
|
||||||
|
|
||||||
k_dim : int, optional
|
k_dim : int, optional
|
||||||
Feature size of the key of each scaled dot product attention. If not
|
Feature size of the key of each scaled dot product attention. If not
|
||||||
provided, it is set to `model_dim / num_heads`. Defaults to None.
|
provided, it is set to ``model_dim / num_heads``. Defaults to None.
|
||||||
|
|
||||||
v_dim : int, optional
|
v_dim : int, optional
|
||||||
Feature size of the key of each scaled dot product attention. If not
|
Feature size of the key of each scaled dot product attention. If not
|
||||||
provided, it is set to `model_dim / num_heads`. Defaults to None.
|
provided, it is set to ``model_dim / num_heads``. Defaults to None.
|
||||||
|
|
||||||
Raises
|
Raises
|
||||||
---------
|
---------
|
||||||
ValueError
|
ValueError
|
||||||
if `model_dim` is not divisible by `num_heads`.
|
If ``model_dim`` is not divisible by ``num_heads``.
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
model_dim: int,
|
model_dim: int,
|
||||||
|
|
|
@ -151,7 +151,7 @@ class STFT(nn.Layer):
|
||||||
Returns
|
Returns
|
||||||
------------
|
------------
|
||||||
Tensor [shape=(B, C, 1, T)]
|
Tensor [shape=(B, C, 1, T)]
|
||||||
The power spectrum. (C = 1 + `n_fft` // 2)
|
The power spectrum.
|
||||||
"""
|
"""
|
||||||
real, imag = self(x)
|
real, imag = self(x)
|
||||||
power = real**2 + imag**2
|
power = real**2 + imag**2
|
||||||
|
@ -168,7 +168,7 @@ class STFT(nn.Layer):
|
||||||
Returns
|
Returns
|
||||||
------------
|
------------
|
||||||
Tensor [shape=(B, C, 1, T)]
|
Tensor [shape=(B, C, 1, T)]
|
||||||
The magnitude of the spectrum. (C = 1 + `n_fft` // 2)
|
The magnitude of the spectrum.
|
||||||
"""
|
"""
|
||||||
power = self.power(x)
|
power = self.power(x)
|
||||||
magnitude = paddle.sqrt(power)
|
magnitude = paddle.sqrt(power)
|
||||||
|
|
|
@ -6,18 +6,18 @@ def shuffle_dim(x, axis, perm=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
x : Tensor
|
x : Tensor
|
||||||
The input tensor.
|
The input tensor.
|
||||||
|
|
||||||
axis : int
|
axis : int
|
||||||
The axis to shuffle.
|
The axis to shuffle.
|
||||||
|
|
||||||
perm : List[int], ndarray, optional
|
perm : List[int], ndarray, optional
|
||||||
The order to reorder the tensor along the `axis`-th dimension.
|
The order to reorder the tensor along the ``axis``-th dimension.
|
||||||
|
|
||||||
It is a permutation of ``[0, d)``, where d is the size of the
|
It is a permutation of ``[0, d)``, where d is the size of the
|
||||||
``axis``-th dimension of the input tensor. If not provided,
|
``axis``-th dimension of the input tensor. If not provided,
|
||||||
a random permutation is used. Defaults to None.
|
a random permutation is used. Defaults to None.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
---------
|
---------
|
||||||
|
|
|
@ -18,8 +18,8 @@ def weighted_mean(input, weight):
|
||||||
-----------
|
-----------
|
||||||
input : Tensor
|
input : Tensor
|
||||||
The input tensor.
|
The input tensor.
|
||||||
weight : Tensor [broadcastable shape with the input]
|
weight : Tensor
|
||||||
The weight tensor.
|
The weight tensor with broadcastable shape with the input.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
----------
|
----------
|
||||||
|
|
|
@ -54,7 +54,7 @@ def feature_mask(input, axis, dtype="bool"):
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
Tensor
|
Tensor
|
||||||
The geenrated mask with `spatial` shape as mentioned above.
|
The geenrated mask with ``spatial`` shape as mentioned above.
|
||||||
|
|
||||||
It has one less dimension than ``input`` does.
|
It has one less dimension than ``input`` does.
|
||||||
"""
|
"""
|
||||||
|
@ -103,7 +103,7 @@ def future_mask(time_steps, dtype="bool"):
|
||||||
time_steps : int
|
time_steps : int
|
||||||
Decoder time steps.
|
Decoder time steps.
|
||||||
dtype : str, optional
|
dtype : str, optional
|
||||||
The data type of the generate mask, by default "bool"
|
The data type of the generate mask, by default "bool".
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
|
@ -43,16 +43,16 @@ class PositionwiseFFN(nn.Layer):
|
||||||
self.hidden_szie = hidden_size
|
self.hidden_szie = hidden_size
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
"""Forward pass of positionwise feed forward network.
|
r"""Forward pass of positionwise feed forward network.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
x : Tensor [shape=(*, input_size)]
|
x : Tensor [shape=(\*, input_size)]
|
||||||
The input tensor, where ``\*`` means arbitary shape.
|
The input tensor, where ``\*`` means arbitary shape.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
Tensor [shape=(*, input_size)]
|
Tensor [shape=(\*, input_size)]
|
||||||
The output tensor.
|
The output tensor.
|
||||||
"""
|
"""
|
||||||
l1 = self.dropout(F.relu(self.linear1(x)))
|
l1 = self.dropout(F.relu(self.linear1(x)))
|
||||||
|
@ -104,8 +104,9 @@ class TransformerEncoderLayer(nn.Layer):
|
||||||
x : Tensor [shape=(batch_size, time_steps, d_model)]
|
x : Tensor [shape=(batch_size, time_steps, d_model)]
|
||||||
The input.
|
The input.
|
||||||
|
|
||||||
mask : Tensor [shape=(batch_size, time_steps, time_steps) or broadcastable shape]
|
mask : Tensor
|
||||||
The padding mask.
|
The padding mask. The shape is (batch_size, time_steps,
|
||||||
|
time_steps) or broadcastable shape.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
Loading…
Reference in New Issue