diff --git a/examples/parallelwave_gan/baker/synthesize.py b/examples/parallelwave_gan/baker/synthesize.py new file mode 100644 index 0000000..4c4e754 --- /dev/null +++ b/examples/parallelwave_gan/baker/synthesize.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import logging +import argparse +from pathlib import Path + +import yaml +import jsonlines +import paddle +import numpy as np +import soundfile as sf +from paddle import distributed as dist + +from parakeet.datasets.data_table import DataTable +from parakeet.models.parallel_wavegan import PWGGenerator + +from config import get_cfg_default + +parser = argparse.ArgumentParser( + description="synthesize with parallel wavegan.") +parser.add_argument( + "--config", type=str, help="config file to overwrite default config") +parser.add_argument("--params", type=str, help="generator parameter file") +parser.add_argument("--test-metadata", type=str, help="dev data") +parser.add_argument("--output-dir", type=str, help="output dir") +parser.add_argument("--verbose", type=int, default=1, help="verbose") + +args = parser.parse_args() +config = get_cfg_default() +if args.config: + config.merge_from_file(args.config) + +print("========Args========") +print(yaml.safe_dump(vars(args))) +print("========Config========") +print(config) +print( + f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}" +) + +generator = PWGGenerator(**config["generator_params"]) +state_dict = paddle.load(args.params) +generator.set_state_dict(state_dict) + +generator.remove_weight_norm() +generator.eval() +with jsonlines.open(args.test_metadata, 'r') as reader: + metadata = list(reader) + +test_dataset = DataTable( + metadata, + fields=['utt_id', 'feats'], + converters={ + 'utt_id': None, + 'feats': np.load, + }) +output_dir = Path(args.output_dir) +output_dir.mkdir(parents=True, exist_ok=True) + +for example in test_dataset: + utt_id = example['utt_id'] + mel = example['feats'] + mel = paddle.to_tensor(mel) # (T, C) + wav = generator.inference(c=mel) + wav = wav.numpy() + print(f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}") + sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=24000) diff --git a/examples/parallelwave_gan/baker/train.py b/examples/parallelwave_gan/baker/train.py index 2854b66..7232f0b 100644 --- a/examples/parallelwave_gan/baker/train.py +++ b/examples/parallelwave_gan/baker/train.py @@ -134,7 +134,8 @@ def train_sp(args, config): parameters=generator.parameters(), **config["generator_optimizer_params"]) lr_schedule_d = StepDecay(**config["discriminator_scheduler_params"]) - gradient_clip_d = nn.ClipGradByGlobalNorm(config["discriminator_grad_norm"]) + gradient_clip_d = nn.ClipGradByGlobalNorm(config[ + "discriminator_grad_norm"]) optimizer_d = Adam( learning_rate=lr_schedule_d, grad_clip=gradient_clip_d, @@ -180,7 +181,6 @@ def train_sp(args, config): }, dataloader=dev_dataloader, lambda_adv=config.lambda_adv, ) - trainer = Trainer( updater, stop_trigger=(config.train_max_steps, "iteration"), diff --git a/parakeet/models/parallel_wavegan.py b/parakeet/models/parallel_wavegan.py index 6f2b44b..ea183ef 100644 --- a/parakeet/models/parallel_wavegan.py +++ b/parakeet/models/parallel_wavegan.py @@ -519,7 +519,7 @@ class PWGGenerator(nn.Layer): if c is not None: c = paddle.transpose(c, [1, 0]).unsqueeze(0) # pseudo batch - c = nn.Pad1D(self.aux_context_window, mode='edge')(c) + c = nn.Pad1D(self.aux_context_window, mode='replicate')(c) out = self.forward(x, c).squeeze(0).transpose([1, 0]) return out