diff --git a/examples/deepvoice3/train.py b/examples/deepvoice3/train.py index d363e6f..0d5a54b 100644 --- a/examples/deepvoice3/train.py +++ b/examples/deepvoice3/train.py @@ -196,8 +196,8 @@ if __name__ == "__main__": beta1, beta2, epsilon=epsilon, - parameter_list=dv3.parameters()) - gradient_clipper = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.1) + parameter_list=dv3.parameters(), + grad_clip=fluid.clip.GradientClipByGlobalNorm(0.1)) # generation synthesis_config = config["synthesis"] @@ -258,15 +258,19 @@ if __name__ == "__main__": text_lengths, frames) l = losses["loss"] l.backward() + # record learning rate before updating writer.add_scalar("learning_rate", optim._learning_rate.step().numpy(), global_step) - optim.minimize(l, grad_clip=gradient_clipper) + optim.minimize(l) optim.clear_gradients() # ==================all kinds of tedious things================= # record step loss into tensorboard - step_loss = {k: v.numpy()[0] for k, v in losses.items()} + step_loss = { + k: v.numpy()[0] + for k, v in losses.items() if v is not None + } tqdm.tqdm.write("global_step: {}\tloss: {}".format( global_step, step_loss["loss"])) for k, v in step_loss.items(): diff --git a/parakeet/models/deepvoice3/loss.py b/parakeet/models/deepvoice3/loss.py index abf6d73..8c7029d 100644 --- a/parakeet/models/deepvoice3/loss.py +++ b/parakeet/models/deepvoice3/loss.py @@ -262,7 +262,7 @@ class TTSLoss(object): if compute_lin_loss: lin_hyp = lin_hyp[:, :-self.time_shift, :] lin_ref = lin_ref[:, self.time_shift:, :] - lin_mask = lin_mask[:, self.time_shift:, :] + lin_mask = lin_mask[:, self.time_shift:] lin_l1_loss = self.l1_loss( lin_hyp, lin_ref, lin_mask, priority_bin=self.priority_bin) lin_bce_loss = self.binary_divergence(lin_hyp, lin_ref, lin_mask) @@ -273,7 +273,7 @@ class TTSLoss(object): if compute_mel_loss: mel_hyp = mel_hyp[:, :-self.time_shift, :] mel_ref = mel_ref[:, self.time_shift:, :] - mel_mask = mel_mask[:, self.time_shift:, :] + mel_mask = mel_mask[:, self.time_shift:] mel_l1_loss = self.l1_loss(mel_hyp, mel_ref, mel_mask) mel_bce_loss = self.binary_divergence(mel_hyp, mel_ref, mel_mask) # print("=====>", mel_l1_loss.numpy()[0], mel_bce_loss.numpy()[0]) diff --git a/parakeet/models/deepvoice3/position_embedding.py b/parakeet/models/deepvoice3/position_embedding.py index 032feff..e76d2c3 100644 --- a/parakeet/models/deepvoice3/position_embedding.py +++ b/parakeet/models/deepvoice3/position_embedding.py @@ -31,8 +31,10 @@ def compute_position_embedding(radians, speaker_position_rate): """ _, embed_dim = radians.shape batch_size = speaker_position_rate.shape[0] - speaker_position_rate = F.unsqueeze(speaker_position_rate, [1, 2]) - scaled_radians = speaker_position_rate * radians + scaled_radians = F.elementwise_mul( + F.expand(F.unsqueeze(radians, [0]), [batch_size, 1, 1]), + speaker_position_rate, + axis=0) odd_mask = (np.arange(embed_dim) % 2).astype(np.float32) odd_mask = dg.to_variable(odd_mask) diff --git a/parakeet/modules/weight_norm.py b/parakeet/modules/weight_norm.py index 20af6c0..b48a686 100644 --- a/parakeet/modules/weight_norm.py +++ b/parakeet/modules/weight_norm.py @@ -84,13 +84,15 @@ class WeightNormWrapper(dg.Layer): w_v, self.create_parameter( shape=original_weight.shape, dtype=original_weight.dtype)) - F.assign(original_weight, getattr(self, w_v)) + with dg.no_grad(): + F.assign(original_weight, getattr(self, w_v)) delattr(layer, param_name) temp = norm_except(getattr(self, w_v), self.dim, self.power) self.add_parameter( w_g, self.create_parameter( shape=temp.shape, dtype=temp.dtype)) - F.assign(temp, getattr(self, w_g)) + with dg.no_grad(): + F.assign(temp, getattr(self, w_g)) # also set this when setting up setattr(self.layer, self.param_name,