From 5b93de8a2ed16922c0f16b6c5b6f6956a04fdfd2 Mon Sep 17 00:00:00 2001 From: lfchener Date: Mon, 14 Dec 2020 08:57:08 +0000 Subject: [PATCH 1/4] fix EnglishCharacter frontend, add space in sentence ids --- parakeet/frontend/normalizer/normalizer.py | 2 +- parakeet/frontend/phonectic.py | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/parakeet/frontend/normalizer/normalizer.py b/parakeet/frontend/normalizer/normalizer.py index fe7d9f8..3da6d6f 100644 --- a/parakeet/frontend/normalizer/normalizer.py +++ b/parakeet/frontend/normalizer/normalizer.py @@ -29,4 +29,4 @@ def normalize(sentence): sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence) sentence = sentence.replace("i.e.", "that is") sentence = sentence.replace("e.g.", "for example") - return sentence.split() + return sentence diff --git a/parakeet/frontend/phonectic.py b/parakeet/frontend/phonectic.py index 2b41db5..6f0de1d 100644 --- a/parakeet/frontend/phonectic.py +++ b/parakeet/frontend/phonectic.py @@ -79,23 +79,14 @@ class EnglishCharacter(Phonetics): self.vocab = Vocab(self.graphemes + self.punctuations) def phoneticize(self, sentence): - start = self.vocab.start_symbol - end = self.vocab.end_symbol - - words = ([] if start is None else [start]) \ - + normalize(sentence) \ - + ([] if end is None else [end]) + words = normalize(sentence) return words - def numericalize(self, words): - ids = [] - for word in words: - if word in self.vocab.stoi: - ids.append(self.vocab.lookup(word)) - continue - for char in word: - if char in self.vocab.stoi: - ids.append(self.vocab.lookup(char)) + def numericalize(self, sentence): + ids = [ + self.vocab.lookup(item) for item in sentence + if item in self.vocab.stoi + ] return ids def reverse(self, ids): From c864612dc3067c0896c2a278ef3979bf2100a8a2 Mon Sep 17 00:00:00 2001 From: lfchener Date: Tue, 15 Dec 2020 09:07:40 +0000 Subject: [PATCH 2/4] plot spectrogram --- parakeet/utils/display.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/parakeet/utils/display.py b/parakeet/utils/display.py index bd94789..b552a94 100644 --- a/parakeet/utils/display.py +++ b/parakeet/utils/display.py @@ -19,8 +19,11 @@ import matplotlib.pylab as plt from matplotlib import cm, pyplot __all__ = [ - "pack_attention_images", "add_attention_plots", "plot_alignment", - "min_max_normalize" + "pack_attention_images", + "add_attention_plots", + "plot_alignment", + "min_max_normalize", + "add_spectrogram_plots", ] @@ -48,6 +51,13 @@ def pack_attention_images(attention_weights, rotate=False): return img +def save_figure_to_numpy(fig): + # save it to a numpy array. + data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) + return data + + def plot_alignment(alignment, title=None): fig, ax = plt.subplots(figsize=(6, 4)) im = ax.imshow( @@ -61,8 +71,7 @@ def plot_alignment(alignment, title=None): plt.tight_layout() fig.canvas.draw() - data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) - data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) + data = save_figure_to_numpy(fig) plt.close() return data @@ -83,5 +92,20 @@ def add_multi_attention_plots(writer, tag, attention_weights, global_step): dataformats="HWC") +def add_spectrogram_plots(writer, tag, spec, global_step): + spec = spec.numpy() + fig, ax = plt.subplots(figsize=(12, 3)) + im = ax.imshow(spec, aspect="auto", origin="lower", interpolation='none') + plt.colorbar(im, ax=ax) + plt.xlabel("Frames") + plt.ylabel("Channels") + plt.tight_layout() + + fig.canvas.draw() + data = save_figure_to_numpy(fig) + plt.close() + writer.add_image(tag, data, global_step, dataformats="HWC") + + def min_max_normalize(v): return (v - v.min()) / (v.max() - v.min()) From a5c81c75d5ed69607447b675ef9d004504a935b2 Mon Sep 17 00:00:00 2001 From: lfchener Date: Tue, 15 Dec 2020 11:27:11 +0000 Subject: [PATCH 3/4] fix add_spectrogram_plots --- parakeet/utils/display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parakeet/utils/display.py b/parakeet/utils/display.py index b552a94..6c13931 100644 --- a/parakeet/utils/display.py +++ b/parakeet/utils/display.py @@ -93,7 +93,7 @@ def add_multi_attention_plots(writer, tag, attention_weights, global_step): def add_spectrogram_plots(writer, tag, spec, global_step): - spec = spec.numpy() + spec = spec.numpy().T fig, ax = plt.subplots(figsize=(12, 3)) im = ax.imshow(spec, aspect="auto", origin="lower", interpolation='none') plt.colorbar(im, ax=ax) From 6420da619704b1107b78f65a67e474c464618d93 Mon Sep 17 00:00:00 2001 From: lfchener Date: Thu, 17 Dec 2020 02:56:45 +0000 Subject: [PATCH 4/4] fix some bugs --- parakeet/models/tacotron2.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/parakeet/models/tacotron2.py b/parakeet/models/tacotron2.py index 599e6a2..9949e1d 100644 --- a/parakeet/models/tacotron2.py +++ b/parakeet/models/tacotron2.py @@ -238,10 +238,7 @@ class Tacotron2Decoder(nn.Layer): querys = paddle.concat( [ paddle.zeros( - shape=[ - querys.shape[0], 1, - querys.shape[-1] * self.reduction_factor - ], + shape=[querys.shape[0], 1, querys.shape[-1]], dtype=querys.dtype), querys ], axis=1) @@ -266,7 +263,7 @@ class Tacotron2Decoder(nn.Layer): return mel_outputs, stop_logits, alignments def infer(self, key, stop_threshold=0.5, max_decoder_steps=1000): - decoder_input = paddle.zeros( + query = paddle.zeros( shape=[key.shape[0], self.d_mels * self.reduction_factor], dtype=key.dtype) #[B, C] @@ -275,8 +272,8 @@ class Tacotron2Decoder(nn.Layer): mel_outputs, stop_logits, alignments = [], [], [] while True: - decoder_input = self.prenet(decoder_input) - mel_output, stop_logit, alignment = self._decode(decoder_input) + query = self.prenet(query) + mel_output, stop_logit, alignment = self._decode(query) mel_outputs += [mel_output] stop_logits += [stop_logit] @@ -288,7 +285,7 @@ class Tacotron2Decoder(nn.Layer): print("Warning! Reached max decoder steps!!!") break - decoder_input = mel_output + query = mel_output alignments = paddle.stack(alignments, axis=1) stop_logits = paddle.concat(stop_logits, axis=1) @@ -350,7 +347,7 @@ class Tacotron2(nn.Layer): attention_kernel_size, p_prenet_dropout, p_attention_dropout, p_decoder_dropout) self.postnet = DecoderPostNet( - d_mels=d_mels, + d_mels=d_mels * reduction_factor, d_hidden=d_postnet, kernel_size=postnet_kernel_size, padding=int((postnet_kernel_size - 1) / 2),