audio: num_mels: 80 n_fft: 2048 sr: 22050 preemphasis: 0.97 hop_length: 275 win_length: 1102 power: 1.2 min_level_db: -100 ref_level_db: 20 outputs_per_step: 1 encoder_n_layer: 6 encoder_head: 2 encoder_conv1d_filter_size: 1536 max_sep_len: 2048 encoder_output_size: 384 embedding_size: 384 decoder_n_layer: 6 decoder_head: 2 decoder_conv1d_filter_size: 1536 decoder_output_size: 384 hidden_size: 384 duration_predictor_output_size: 256 duration_predictor_filter_size: 3 fft_conv1d_filter: 3 fft_conv1d_padding: 1 dropout: 0.1 transformer_head: 4 warm_up_step: 4000 grad_clip_thresh: 0.1 batch_size: 32 epochs: 10000 lr: 0.001 save_step: 500 use_gpu: True use_data_parallel: False data_path: ../../../dataset/LJSpeech-1.1 transtts_path: ../transformerTTS/checkpoint transformer_step: 20 log_dir: ./log