Adjust the directory structure

This commit is contained in:
lifuchen 2020-02-11 08:57:30 +00:00 committed by chenfeiyu
parent fc84ca2d4b
commit 6908ec520c
17 changed files with 115 additions and 439 deletions

View File

@ -35,7 +35,7 @@ epochs: 10000
lr: 0.001
save_step: 500
use_gpu: True
use_data_parallel: True
use_data_parallel: False
data_path: ../../dataset/LJSpeech-1.1
transtts_path: ../TransformerTTS/checkpoint/

View File

@ -4,7 +4,7 @@ import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.ffn import PositionwiseFeedForward
class FFTBlock(dg.Layer):
def __init__(self, d_model, d_inner, n_head, d_k, d_v, filter_size, padding, dropout=0.2):

View File

@ -4,7 +4,7 @@ import parakeet.models.fastspeech.utils
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
from parakeet.modules.layers import Conv, Linear
from parakeet.modules.customized import Conv1D
class LengthRegulator(dg.Layer):
def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
@ -82,20 +82,31 @@ class DurationPredictor(dg.Layer):
self.filter_size = filter_size
self.dropout = dropout
self.conv1 = Conv(in_channels = self.input_size,
k = math.sqrt(1 / self.input_size)
self.conv1 = Conv1D(in_channels = self.input_size,
out_channels = self.out_channels,
filter_size = self.filter_size,
padding=1,
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format='NTC')
self.conv2 = Conv(in_channels = self.out_channels,
k = math.sqrt(1 / self.out_channels)
self.conv2 = Conv1D(in_channels = self.out_channels,
out_channels = self.out_channels,
filter_size = self.filter_size,
padding=1,
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format='NTC')
self.layer_norm1 = dg.LayerNorm(self.out_channels)
self.layer_norm2 = dg.LayerNorm(self.out_channels)
self.linear =Linear(self.out_channels, 1)
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
k = math.sqrt(1 / self.out_channels)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear =dg.Linear(self.out_channels, 1, param_attr = self.weight,
bias_attr = self.bias)
def forward(self, encoder_output):
"""

View File

@ -1,9 +1,6 @@
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.FFTBlock import FFTBlock
class Decoder(dg.Layer):

View File

@ -1,9 +1,6 @@
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.FFTBlock import FFTBlock
class Encoder(dg.Layer):

View File

@ -1,10 +1,8 @@
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.utils import *
from parakeet.models.transformerTTS.post_convnet import PostConvNet
from parakeet.models.fastspeech.LengthRegulator import LengthRegulator
from parakeet.models.fastspeech.encoder import Encoder
from parakeet.models.fastspeech.decoder import Decoder
@ -39,7 +37,13 @@ class FastSpeech(dg.Layer):
fft_conv1d_kernel=cfg.fft_conv1d_filter,
fft_conv1d_padding=cfg.fft_conv1d_padding,
dropout=0.1)
self.mel_linear = Linear(cfg.fs_hidden_size, cfg.audio.num_mels * cfg.audio.outputs_per_step)
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
k = math.sqrt(1 / cfg.fs_hidden_size)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.mel_linear = dg.Linear(cfg.fs_hidden_size,
cfg.audio.num_mels * cfg.audio.outputs_per_step,
param_attr = self.weight,
bias_attr = self.bias,)
self.postnet = PostConvNet(n_mels=cfg.audio.num_mels,
num_hidden=512,
filter_size=5,

View File

@ -3,8 +3,8 @@ from parakeet.g2p.text.symbols import symbols
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv, Pool1D, Linear
from parakeet.modules.dynamicGRU import DynamicGRU
from parakeet.modules.customized import Pool1D, Conv1D
from parakeet.modules.dynamic_gru import DynamicGRU
import numpy as np
class CBHG(dg.Layer):
@ -23,16 +23,22 @@ class CBHG(dg.Layer):
self.hidden_size = hidden_size
self.projection_size = projection_size
self.conv_list = []
self.conv_list.append(Conv(in_channels = projection_size,
k = math.sqrt(1 / projection_size)
self.conv_list.append(Conv1D(in_channels = projection_size,
out_channels = hidden_size,
filter_size = 1,
padding = int(np.floor(1/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT"))
k = math.sqrt(1 / hidden_size)
for i in range(2,K+1):
self.conv_list.append(Conv(in_channels = hidden_size,
self.conv_list.append(Conv1D(in_channels = hidden_size,
out_channels = hidden_size,
filter_size = i,
padding = int(np.floor(i/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT"))
for i, layer in enumerate(self.conv_list):
@ -48,16 +54,22 @@ class CBHG(dg.Layer):
conv_outdim = hidden_size * K
self.conv_projection_1 = Conv(in_channels = conv_outdim,
k = math.sqrt(1 / conv_outdim)
self.conv_projection_1 = Conv1D(in_channels = conv_outdim,
out_channels = hidden_size,
filter_size = 3,
padding = int(np.floor(3/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT")
self.conv_projection_2 = Conv(in_channels = hidden_size,
k = math.sqrt(1 / hidden_size)
self.conv_projection_2 = Conv1D(in_channels = hidden_size,
out_channels = projection_size,
filter_size = 3,
padding = int(np.floor(3/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT")
self.batchnorm_proj_1 = dg.BatchNorm(hidden_size,
@ -73,8 +85,13 @@ class CBHG(dg.Layer):
h_0 = np.zeros((batch_size, hidden_size // 2), dtype="float32")
h_0 = dg.to_variable(h_0)
self.fc_forward1 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_reverse1 = Linear(hidden_size, hidden_size // 2 * 3)
k = math.sqrt(1 / hidden_size)
self.fc_forward1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.fc_reverse1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.gru_forward1 = DynamicGRU(size = self.hidden_size // 2,
is_reverse = False,
origin_mode = True,
@ -84,8 +101,12 @@ class CBHG(dg.Layer):
origin_mode=True,
h_0 = h_0)
self.fc_forward2 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_reverse2 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_forward2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.fc_reverse2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.gru_forward2 = DynamicGRU(size = self.hidden_size // 2,
is_reverse = False,
origin_mode = True,
@ -145,10 +166,14 @@ class Highwaynet(dg.Layer):
self.gates = []
self.linears = []
k = math.sqrt(1 / num_units)
for i in range(num_layers):
self.linears.append(Linear(num_units, num_units))
self.gates.append(Linear(num_units, num_units))
self.linears.append(dg.Linear(num_units, num_units,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
self.gates.append(dg.Linear(num_units, num_units,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
for i, (linear, gate) in enumerate(zip(self.linears,self.gates)):
self.add_sublayer("linears_{}".format(i), linear)

View File

@ -1,12 +1,12 @@
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.utils import *
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.prenet import PreNet
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.ffn import PositionwiseFeedForward
from parakeet.models.transformerTTS.prenet import PreNet
from parakeet.models.transformerTTS.post_convnet import PostConvNet
class Decoder(dg.Layer):
def __init__(self, num_hidden, config, num_head=4):
super(Decoder, self).__init__()
@ -24,7 +24,10 @@ class Decoder(dg.Layer):
hidden_size = num_hidden * 2,
output_size = num_hidden,
dropout_rate=0.2)
self.linear = Linear(num_hidden, num_hidden)
k = math.sqrt(1 / num_hidden)
self.linear = dg.Linear(num_hidden, num_hidden,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.selfattn_layers = [MultiheadAttention(num_hidden, num_hidden//num_head, num_hidden//num_head) for _ in range(3)]
for i, layer in enumerate(self.selfattn_layers):
@ -35,8 +38,12 @@ class Decoder(dg.Layer):
self.ffns = [PositionwiseFeedForward(num_hidden, num_hidden*num_head, filter_size=1) for _ in range(3)]
for i, layer in enumerate(self.ffns):
self.add_sublayer("ffns_{}".format(i), layer)
self.mel_linear = Linear(num_hidden, config.audio.num_mels * config.audio.outputs_per_step)
self.stop_linear = Linear(num_hidden, 1)
self.mel_linear = dg.Linear(num_hidden, config.audio.num_mels * config.audio.outputs_per_step,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.stop_linear = dg.Linear(num_hidden, 1,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.postconvnet = PostConvNet(config.audio.num_mels, config.hidden_size,
filter_size = 5, padding = 4, num_conv=5,

View File

@ -1,9 +1,8 @@
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.utils import *
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.ffn import PositionwiseFeedForward
from parakeet.models.transformerTTS.encoderprenet import EncoderPrenet
class Encoder(dg.Layer):

View File

@ -3,7 +3,7 @@ from parakeet.g2p.text.symbols import symbols
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv, Linear
from parakeet.modules.customized import Conv1D
import numpy as np
@ -16,17 +16,23 @@ class EncoderPrenet(dg.Layer):
self.embedding = dg.Embedding( size = [len(symbols), embedding_size],
padding_idx = None)
self.conv_list = []
self.conv_list.append(Conv(in_channels = embedding_size,
k = math.sqrt(1 / embedding_size)
self.conv_list.append(Conv1D(in_channels = embedding_size,
out_channels = num_hidden,
filter_size = 5,
padding = int(np.floor(5/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
use_cudnn = use_cudnn,
data_format = "NCT"))
k = math.sqrt(1 / num_hidden)
for _ in range(2):
self.conv_list.append(Conv(in_channels = num_hidden,
self.conv_list.append(Conv1D(in_channels = num_hidden,
out_channels = num_hidden,
filter_size = 5,
padding = int(np.floor(5/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
use_cudnn = use_cudnn,
data_format = "NCT"))
@ -39,7 +45,10 @@ class EncoderPrenet(dg.Layer):
for i, layer in enumerate(self.batch_norm_list):
self.add_sublayer("batch_norm_list_{}".format(i), layer)
self.projection = Linear(num_hidden, num_hidden)
k = math.sqrt(1 / num_hidden)
self.projection = dg.Linear(num_hidden, num_hidden,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
def forward(self, x):
x = self.embedding(x) #(batch_size, seq_len, embending_size)

View File

@ -1,6 +1,6 @@
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.customized import Conv1D
from parakeet.modules.utils import *
from parakeet.models.transformerTTS.CBHG import CBHG

View File

@ -1,52 +0,0 @@
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
class DynamicGRU(dg.Layer):
def __init__(self,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
h_0=None,
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = dg.GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
"""
Dynamic GRU block.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result compute by GRU.
"""
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = layers.concat(res, axis=1)
return res

View File

@ -1,52 +0,0 @@
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
import math
from parakeet.modules.layers import Conv
class PositionwiseFeedForward(dg.Layer):
''' A two-feed-forward-layer module '''
def __init__(self, d_in, num_hidden, filter_size, padding=0, use_cudnn=True, dropout=0.1):
super(PositionwiseFeedForward, self).__init__()
self.num_hidden = num_hidden
self.use_cudnn = use_cudnn
self.dropout = dropout
self.w_1 = Conv(in_channels = d_in,
out_channels = num_hidden,
filter_size = filter_size,
padding=padding,
use_cudnn = use_cudnn,
data_format = "NTC")
self.w_2 = Conv(in_channels = num_hidden,
out_channels = d_in,
filter_size = filter_size,
padding=padding,
use_cudnn = use_cudnn,
data_format = "NTC")
self.layer_norm = dg.LayerNorm(d_in)
def forward(self, input):
"""
Feed Forward Network.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result after FFN.
"""
#FFN Networt
x = self.w_2(layers.relu(self.w_1(input)))
# dropout
x = layers.dropout(x, self.dropout)
# residual connection
x = x + input
#layer normalization
output = self.layer_norm(x)
return output

View File

@ -1,177 +0,0 @@
import math
import numpy as np
import paddle
from paddle import fluid
import paddle.fluid.dygraph as dg
class Linear(dg.Layer):
def __init__(self, in_features, out_features, is_bias=True, dtype="float32"):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.dtype = dtype
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
self.bias = is_bias
if is_bias is not False:
k = math.sqrt(1 / in_features)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear = dg.Linear(in_features, out_features, param_attr = self.weight,
bias_attr = self.bias,)
def forward(self, x):
x = self.linear(x)
return x
class Conv(dg.Layer):
def __init__(self, in_channels, out_channels, filter_size=1,
padding=0, dilation=1, stride=1, use_cudnn=True,
data_format="NCT", is_bias=True):
super(Conv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.filter_size = filter_size
self.padding = padding
self.dilation = dilation
self.stride = stride
self.use_cudnn = use_cudnn
self.data_format = data_format
self.is_bias = is_bias
self.weight_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer())
self.bias_attr = None
if is_bias is not False:
k = math.sqrt(1 / in_channels)
self.bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k))
self.conv = Conv1D( in_channels = in_channels,
out_channels = out_channels,
filter_size = filter_size,
padding = padding,
dilation = dilation,
stride = stride,
param_attr = self.weight_attr,
bias_attr = self.bias_attr,
use_cudnn = use_cudnn,
data_format = data_format)
def forward(self, x):
x = self.conv(x)
return x
class Conv1D(dg.Layer):
"""
A convolution 1D block implemented with Conv2D. Form simplicity and
ensuring the output has the same length as the input, it does not allow
stride > 1.
"""
def __init__(self,
in_channels,
out_channels,
filter_size=3,
padding=0,
dilation=1,
stride=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
act=None,
data_format='NCT',
dtype="float32"):
super(Conv1D, self).__init__(dtype=dtype)
self.padding = padding
self.in_channels = in_channels
self.num_filters = out_channels
self.filter_size = filter_size
self.stride = stride
self.dilation = dilation
self.padding = padding
self.act = act
self.data_format = data_format
self.conv = dg.Conv2D(
num_channels=in_channels,
num_filters=out_channels,
filter_size=(1, filter_size),
stride=(1, stride),
dilation=(1, dilation),
padding=(0, padding),
groups=groups,
param_attr=param_attr,
bias_attr=bias_attr,
use_cudnn=use_cudnn,
act=act,
dtype=dtype)
def forward(self, x):
"""
Args:
x (Variable): Shape(B, C_in, 1, T), the input, where C_in means
input channels.
Returns:
x (Variable): Shape(B, C_out, 1, T), the outputs, where C_out means
output channels (num_filters).
"""
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
x = fluid.layers.unsqueeze(x, [2])
x = self.conv(x)
x = fluid.layers.squeeze(x, [2])
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
return x
class Pool1D(dg.Layer):
"""
A Pool 1D block implemented with Pool2D.
"""
def __init__(self,
pool_size=-1,
pool_type='max',
pool_stride=1,
pool_padding=0,
global_pooling=False,
use_cudnn=True,
ceil_mode=False,
exclusive=True,
data_format='NCT'):
super(Pool1D, self).__init__()
self.pool_size = pool_size
self.pool_type = pool_type
self.pool_stride = pool_stride
self.pool_padding = pool_padding
self.global_pooling = global_pooling
self.use_cudnn = use_cudnn
self.ceil_mode = ceil_mode
self.exclusive = exclusive
self.data_format = data_format
self.pool2d = dg.Pool2D([1,pool_size], pool_type = pool_type,
pool_stride = [1,pool_stride], pool_padding = [0, pool_padding],
global_pooling = global_pooling, use_cudnn = use_cudnn,
ceil_mode = ceil_mode, exclusive = exclusive)
def forward(self, x):
"""
Args:
x (Variable): Shape(B, C_in, 1, T), the input, where C_in means
input channels.
Returns:
x (Variable): Shape(B, C_out, 1, T), the outputs, where C_out means
output channels (num_filters).
"""
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
x = fluid.layers.unsqueeze(x, [2])
x = self.pool2d(x)
x = fluid.layers.squeeze(x, [2])
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
return x

View File

@ -1,8 +1,28 @@
import math
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
from parakeet.modules.layers import Linear
class Linear(dg.Layer):
def __init__(self, in_features, out_features, is_bias=True, dtype="float32"):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.dtype = dtype
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
self.bias = is_bias
if is_bias is not False:
k = math.sqrt(1 / in_features)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear = dg.Linear(in_features, out_features, param_attr = self.weight,
bias_attr = self.bias,)
def forward(self, x):
x = self.linear(x)
return x
class ScaledDotProductAttention(dg.Layer):
def __init__(self, d_key):

View File

@ -1,80 +0,0 @@
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv
class PostConvNet(dg.Layer):
def __init__(self,
n_mels=80,
num_hidden=512,
filter_size=5,
padding=0,
num_conv=5,
outputs_per_step=1,
use_cudnn=True,
dropout=0.1,
batchnorm_last=False):
super(PostConvNet, self).__init__()
self.dropout = dropout
self.num_conv = num_conv
self.batchnorm_last = batchnorm_last
self.conv_list = []
self.conv_list.append(Conv(in_channels = n_mels * outputs_per_step,
out_channels = num_hidden,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT"))
for _ in range(1, num_conv-1):
self.conv_list.append(Conv(in_channels = num_hidden,
out_channels = num_hidden,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT") )
self.conv_list.append(Conv(in_channels = num_hidden,
out_channels = n_mels * outputs_per_step,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT"))
for i, layer in enumerate(self.conv_list):
self.add_sublayer("conv_list_{}".format(i), layer)
self.batch_norm_list = [dg.BatchNorm(num_hidden,
data_layout='NCHW') for _ in range(num_conv-1)]
if self.batchnorm_last:
self.batch_norm_list.append(dg.BatchNorm(n_mels * outputs_per_step,
data_layout='NCHW'))
for i, layer in enumerate(self.batch_norm_list):
self.add_sublayer("batch_norm_list_{}".format(i), layer)
def forward(self, input):
"""
Post Conv Net.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result after postconvnet.
"""
input = layers.transpose(input, [0,2,1])
len = input.shape[-1]
for i in range(self.num_conv-1):
batch_norm = self.batch_norm_list[i]
conv = self.conv_list[i]
input = layers.dropout(layers.tanh(batch_norm(conv(input)[:,:,:len])), self.dropout)
conv = self.conv_list[self.num_conv-1]
input = conv(input)[:,:,:len]
if self.batchnorm_last:
batch_norm = self.batch_norm_list[self.num_conv-1]
input = layers.dropout(batch_norm(input), self.dropout)
output = layers.transpose(input, [0,2,1])
return output

View File

@ -1,32 +0,0 @@
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
from parakeet.modules.layers import Linear
class PreNet(dg.Layer):
def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
"""
:param input_size: dimension of input
:param hidden_size: dimension of hidden unit
:param output_size: dimension of output
"""
super(PreNet, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_rate = dropout_rate
self.linear1 = Linear(input_size, hidden_size)
self.linear2 = Linear(hidden_size, output_size)
def forward(self, x):
"""
Pre Net before passing through the network.
Args:
x (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
x (Variable), Shape(B, T, C), the result after pernet.
"""
x = layers.dropout(layers.relu(self.linear1(x)), self.dropout_rate)
x = layers.dropout(layers.relu(self.linear2(x)), self.dropout_rate)
return x