diff --git a/parakeet/models/fastspeech/fastspeech.py b/parakeet/models/fastspeech/fastspeech.py index 5ee2de1..96d5074 100644 --- a/parakeet/models/fastspeech/fastspeech.py +++ b/parakeet/models/fastspeech/fastspeech.py @@ -62,7 +62,7 @@ class FastSpeech(dg.Layer): dropout=0.1) self.weight = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) - k = math.sqrt(1 / cfg['fs_hidden_size']) + k = math.sqrt(1.0 / cfg['fs_hidden_size']) self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k)) self.mel_linear = dg.Linear( diff --git a/parakeet/models/fastspeech/length_regulator.py b/parakeet/models/fastspeech/length_regulator.py index 6fc6702..478d780 100644 --- a/parakeet/models/fastspeech/length_regulator.py +++ b/parakeet/models/fastspeech/length_regulator.py @@ -115,7 +115,7 @@ class DurationPredictor(dg.Layer): self.filter_size = filter_size self.dropout = dropout - k = math.sqrt(1 / self.input_size) + k = math.sqrt(1.0 / self.input_size) self.conv1 = Conv1D( num_channels=self.input_size, num_filters=self.out_channels, @@ -126,7 +126,7 @@ class DurationPredictor(dg.Layer): bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k))) #data_format='NTC') - k = math.sqrt(1 / self.out_channels) + k = math.sqrt(1.0 / self.out_channels) self.conv2 = Conv1D( num_channels=self.out_channels, num_filters=self.out_channels, @@ -142,7 +142,7 @@ class DurationPredictor(dg.Layer): self.weight = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) - k = math.sqrt(1 / self.out_channels) + k = math.sqrt(1.0 / self.out_channels) self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k)) diff --git a/parakeet/models/transformer_tts/cbhg.py b/parakeet/models/transformer_tts/cbhg.py index 5a28ebd..9a330f9 100644 --- a/parakeet/models/transformer_tts/cbhg.py +++ b/parakeet/models/transformer_tts/cbhg.py @@ -46,7 +46,7 @@ class CBHG(dg.Layer): self.hidden_size = hidden_size self.projection_size = projection_size self.conv_list = [] - k = math.sqrt(1 / projection_size) + k = math.sqrt(1.0 / projection_size) self.conv_list.append( Conv1D( num_channels=projection_size, @@ -58,7 +58,7 @@ class CBHG(dg.Layer): bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-k, high=k)))) - k = math.sqrt(1 / hidden_size) + k = math.sqrt(1.0 / hidden_size) for i in range(2, K + 1): self.conv_list.append( Conv1D( @@ -86,7 +86,7 @@ class CBHG(dg.Layer): conv_outdim = hidden_size * K - k = math.sqrt(1 / conv_outdim) + k = math.sqrt(1.0 / conv_outdim) self.conv_projection_1 = Conv1D( num_channels=conv_outdim, num_filters=hidden_size, @@ -97,7 +97,7 @@ class CBHG(dg.Layer): bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k))) - k = math.sqrt(1 / hidden_size) + k = math.sqrt(1.0 / hidden_size) self.conv_projection_2 = Conv1D( num_channels=hidden_size, num_filters=projection_size, @@ -121,7 +121,7 @@ class CBHG(dg.Layer): h_0 = np.zeros((batch_size, hidden_size // 2), dtype="float32") h_0 = dg.to_variable(h_0) - k = math.sqrt(1 / hidden_size) + k = math.sqrt(1.0 / hidden_size) self.fc_forward1 = dg.Linear( hidden_size, hidden_size // 2 * 3, @@ -240,7 +240,7 @@ class Highwaynet(dg.Layer): self.gates = [] self.linears = [] - k = math.sqrt(1 / num_units) + k = math.sqrt(1.0 / num_units) for i in range(num_layers): self.linears.append( dg.Linear( diff --git a/parakeet/models/transformer_tts/decoder.py b/parakeet/models/transformer_tts/decoder.py index 0b47e4f..4275a56 100644 --- a/parakeet/models/transformer_tts/decoder.py +++ b/parakeet/models/transformer_tts/decoder.py @@ -55,7 +55,7 @@ class Decoder(dg.Layer): hidden_size=num_hidden * 2, output_size=num_hidden, dropout_rate=0.2) - k = math.sqrt(1 / num_hidden) + k = math.sqrt(1.0 / num_hidden) self.linear = dg.Linear( num_hidden, num_hidden, diff --git a/parakeet/models/transformer_tts/encoderprenet.py b/parakeet/models/transformer_tts/encoderprenet.py index d692cea..a32f5a8 100644 --- a/parakeet/models/transformer_tts/encoderprenet.py +++ b/parakeet/models/transformer_tts/encoderprenet.py @@ -39,7 +39,7 @@ class EncoderPrenet(dg.Layer): param_attr=fluid.initializer.Normal( loc=0.0, scale=1.0)) self.conv_list = [] - k = math.sqrt(1 / embedding_size) + k = math.sqrt(1.0 / embedding_size) self.conv_list.append( Conv1D( num_channels=embedding_size, @@ -52,7 +52,7 @@ class EncoderPrenet(dg.Layer): initializer=fluid.initializer.Uniform( low=-k, high=k)), use_cudnn=use_cudnn)) - k = math.sqrt(1 / num_hidden) + k = math.sqrt(1.0 / num_hidden) for _ in range(2): self.conv_list.append( Conv1D( @@ -78,7 +78,7 @@ class EncoderPrenet(dg.Layer): for i, layer in enumerate(self.batch_norm_list): self.add_sublayer("batch_norm_list_{}".format(i), layer) - k = math.sqrt(1 / num_hidden) + k = math.sqrt(1.0 / num_hidden) self.projection = dg.Linear( num_hidden, num_hidden, diff --git a/parakeet/models/transformer_tts/post_convnet.py b/parakeet/models/transformer_tts/post_convnet.py index da458ed..6ad8e5d 100644 --- a/parakeet/models/transformer_tts/post_convnet.py +++ b/parakeet/models/transformer_tts/post_convnet.py @@ -48,7 +48,7 @@ class PostConvNet(dg.Layer): self.num_conv = num_conv self.batchnorm_last = batchnorm_last self.conv_list = [] - k = math.sqrt(1 / (n_mels * outputs_per_step)) + k = math.sqrt(1.0 / (n_mels * outputs_per_step)) self.conv_list.append( Conv1D( num_channels=n_mels * outputs_per_step, @@ -62,7 +62,7 @@ class PostConvNet(dg.Layer): low=-k, high=k)), use_cudnn=use_cudnn)) - k = math.sqrt(1 / num_hidden) + k = math.sqrt(1.0 / num_hidden) for _ in range(1, num_conv - 1): self.conv_list.append( Conv1D( diff --git a/parakeet/models/transformer_tts/prenet.py b/parakeet/models/transformer_tts/prenet.py index b033860..eaf4bc8 100644 --- a/parakeet/models/transformer_tts/prenet.py +++ b/parakeet/models/transformer_tts/prenet.py @@ -33,7 +33,7 @@ class PreNet(dg.Layer): self.output_size = output_size self.dropout_rate = dropout_rate - k = math.sqrt(1 / input_size) + k = math.sqrt(1.0 / input_size) self.linear1 = dg.Linear( input_size, hidden_size, @@ -41,7 +41,7 @@ class PreNet(dg.Layer): initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k))) - k = math.sqrt(1 / hidden_size) + k = math.sqrt(1.0 / hidden_size) self.linear2 = dg.Linear( hidden_size, output_size, diff --git a/parakeet/modules/ffn.py b/parakeet/modules/ffn.py index 199f668..bf68c1c 100644 --- a/parakeet/modules/ffn.py +++ b/parakeet/modules/ffn.py @@ -41,7 +41,7 @@ class PositionwiseFeedForward(dg.Layer): self.use_cudnn = use_cudnn self.dropout = dropout - k = math.sqrt(1 / d_in) + k = math.sqrt(1.0 / d_in) self.w_1 = Conv1D( num_channels=d_in, num_filters=num_hidden, @@ -52,7 +52,7 @@ class PositionwiseFeedForward(dg.Layer): bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k)), use_cudnn=use_cudnn) - k = math.sqrt(1 / num_hidden) + k = math.sqrt(1.0 / num_hidden) self.w_2 = Conv1D( num_channels=num_hidden, num_filters=d_in, diff --git a/parakeet/modules/multihead_attention.py b/parakeet/modules/multihead_attention.py index c6907e8..4c350b2 100644 --- a/parakeet/modules/multihead_attention.py +++ b/parakeet/modules/multihead_attention.py @@ -33,7 +33,7 @@ class Linear(dg.Layer): self.bias = is_bias if is_bias is not False: - k = math.sqrt(1 / in_features) + k = math.sqrt(1.0 / in_features) self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-k, high=k))