ParakeetRebeccaRosario/parakeet/models/fastspeech/parse.py

94 lines
5.1 KiB
Python
Raw Normal View History

2020-01-03 16:25:17 +08:00
import jsonargparse
def add_config_options_to_parser(parser):
parser.add_argument('--audio.num_mels', type=int, default=80,
help="the number of mel bands when calculating mel spectrograms.")
parser.add_argument('--audio.n_fft', type=int, default=2048,
help="the number of fft components.")
parser.add_argument('--audio.sr', type=int, default=22050,
help="the sampling rate of audio data file.")
parser.add_argument('--audio.preemphasis', type=float, default=0.97,
help="the preemphasis coefficient.")
parser.add_argument('--audio.hop_length', type=float, default=128,
help="the number of samples to advance between frames.")
parser.add_argument('--audio.win_length', type=float, default=1024,
help="the length (width) of the window function.")
parser.add_argument('--audio.power', type=float, default=1.4,
help="the power to raise before griffin-lim.")
parser.add_argument('--audio.min_level_db', type=int, default=-100,
help="the minimum level db.")
parser.add_argument('--audio.ref_level_db', type=int, default=20,
help="the reference level db.")
parser.add_argument('--audio.outputs_per_step', type=int, default=1,
help="the outputs per step.")
parser.add_argument('--embedding_size', type=int, default=256,
help="the dim size of embedding.")
parser.add_argument('--encoder_n_layer', type=int, default=6,
help="the number of FFT Block in encoder.")
parser.add_argument('--encoder_head', type=int, default=2,
help="the attention head number in encoder.")
parser.add_argument('--encoder_conv1d_filter_size', type=int, default=1024,
help="the filter size of conv1d in encoder.")
parser.add_argument('--max_sep_len', type=int, default=2048,
help="the max length of sequence.")
parser.add_argument('--encoder_output_size', type=int, default=256,
help="the output channel size of encoder.")
parser.add_argument('--decoder_n_layer', type=int, default=6,
help="the number of FFT Block in decoder.")
parser.add_argument('--decoder_head', type=int, default=2,
help="the attention head number in decoder.")
parser.add_argument('--decoder_conv1d_filter_size', type=int, default=1024,
help="the filter size of conv1d in decoder.")
parser.add_argument('--decoder_output_size', type=int, default=256,
help="the output channel size of decoder.")
parser.add_argument('--hidden_size', type=int, default=256,
help="the hidden size in model.")
parser.add_argument('--duration_predictor_output_size', type=int, default=256,
help="the output size of duration predictior.")
parser.add_argument('--duration_predictor_filter_size', type=int, default=3,
help="the filter size of conv1d in duration prediction.")
parser.add_argument('--fft_conv1d_filter', type=int, default=3,
help="the filter size of conv1d in fft.")
parser.add_argument('--fft_conv1d_padding', type=int, default=1,
help="the padding size of conv1d in fft.")
parser.add_argument('--dropout', type=float, default=0.1,
help="the dropout in network.")
parser.add_argument('--transformer_head', type=int, default=4,
help="the attention head num of transformerTTS.")
parser.add_argument('--warm_up_step', type=int, default=4000,
help="the warm up step of learning rate.")
parser.add_argument('--grad_clip_thresh', type=float, default=1.0,
help="the threshold of grad clip.")
parser.add_argument('--batch_size', type=int, default=32,
help="batch size for training.")
parser.add_argument('--epochs', type=int, default=10000,
help="the number of epoch for training.")
parser.add_argument('--lr', type=float, default=0.001,
help="the learning rate for training.")
parser.add_argument('--save_step', type=int, default=500,
help="checkpointing interval during training.")
parser.add_argument('--use_gpu', type=bool, default=True,
help="use gpu or not during training.")
parser.add_argument('--use_data_parallel', type=bool, default=False,
help="use data parallel or not during training.")
parser.add_argument('--data_path', type=str, default='./dataset/LJSpeech-1.1',
help="the path of dataset.")
parser.add_argument('--checkpoint_path', type=str, default=None,
help="the path to load checkpoint or pretrain model.")
parser.add_argument('--save_path', type=str, default='./checkpoint',
help="the path to save checkpoint.")
parser.add_argument('--log_dir', type=str, default='./log',
help="the directory to save tensorboard log.")
parser.add_argument('--sample_path', type=str, default='./sample',
help="the directory to save audio sample in synthesis.")
parser.add_argument('--transtts_path', type=str, default='./log',
help="the directory to load pretrain transformerTTS model.")
parser.add_argument('--transformer_step', type=int, default=70000,
help="the step to load transformerTTS model.")
parser.add_argument('-c', '--config', action=jsonargparse.ActionConfigFile)