43 lines
824 B
YAML
43 lines
824 B
YAML
audio:
|
|
num_mels: 80
|
|
n_fft: 2048
|
|
sr: 22050
|
|
preemphasis: 0.97
|
|
hop_length: 275
|
|
win_length: 1102
|
|
power: 1.2
|
|
min_level_db: -100
|
|
ref_level_db: 20
|
|
outputs_per_step: 1
|
|
|
|
encoder_n_layer: 6
|
|
encoder_head: 2
|
|
encoder_conv1d_filter_size: 1536
|
|
max_sep_len: 2048
|
|
encoder_output_size: 384
|
|
embedding_size: 384
|
|
decoder_n_layer: 6
|
|
decoder_head: 2
|
|
decoder_conv1d_filter_size: 1536
|
|
decoder_output_size: 384
|
|
hidden_size: 384
|
|
duration_predictor_output_size: 256
|
|
duration_predictor_filter_size: 3
|
|
fft_conv1d_filter: 3
|
|
fft_conv1d_padding: 1
|
|
dropout: 0.1
|
|
transformer_head: 4
|
|
|
|
warm_up_step: 4000
|
|
grad_clip_thresh: 0.1
|
|
batch_size: 32
|
|
epochs: 10000
|
|
lr: 0.001
|
|
save_step: 500
|
|
use_gpu: True
|
|
use_data_parallel: False
|
|
|
|
data_path: ../../../dataset/LJSpeech-1.1
|
|
transtts_path: ../transformerTTS/checkpoint
|
|
transformer_step: 20
|
|
log_dir: ./log |