Parakeet/examples/deepvoice3/configs/ljspeech.yaml

46 lines
686 B
YAML

# data processing
p_pronunciation: 0.99
sample_rate: 22050 # Hz
n_fft: 1024
win_length: 1024
hop_length: 256
n_mels: 80
reduction_factor: 4
# model-s2s
n_speakers: 1
speaker_dim: 16
char_dim: 256
encoder_dim: 64
kernel_size: 5
encoder_layers: 7
decoder_layers: 8
prenet_sizes: [128]
attention_dim: 128
# model-postnet
postnet_layers: 5
postnet_dim: 256
# position embedding
position_weight: 1.0
position_rate: 5.54
forward_step: 4
backward_step: 0
dropout: 0.05
# output-griffinlim
sharpening_factor: 1.4
# optimizer:
learning_rate: 0.001
clip_value: 5.0
clip_norm: 100.0
# training:
max_iteration: 1000000
batch_size: 16
report_interval: 10000
save_interval: 10000
valid_size: 5