Parakeet/examples/fastspeech/config/synthesis.yaml

26 lines
500 B
YAML

audio:
num_mels: 80
n_fft: 2048
sr: 22050
preemphasis: 0.97
hop_length: 275
win_length: 1102
power: 1.2
min_level_db: -100
ref_level_db: 20
outputs_per_step: 1
encoder_n_layer: 6
encoder_head: 2
encoder_conv1d_filter_size: 1536
max_seq_len: 2048
decoder_n_layer: 6
decoder_head: 2
decoder_conv1d_filter_size: 1536
fs_hidden_size: 384
duration_predictor_output_size: 256
duration_predictor_filter_size: 3
fft_conv1d_filter: 3
fft_conv1d_padding: 1
dropout: 0.1
transformer_head: 4