Parakeet/examples/tacotron2_aishell3/voice_cloning.ipynb

384 lines
1013 KiB
Plaintext
Raw Normal View History

add ge2e and tacotron2_aishell3 example (#107) * hacky thing, add tone support for acoustic model * fix experiments for waveflow and wavenet, only write visual log in rank-0 * use emb add in tacotron2 * 1. remove space from numericalized representation; 2. fix decoder paddign mask's unsqueeze dim. * remove bn in postnet * refactoring code * add an option to normalize volume when loading audio. * add an embedding layer. * 1. change the default min value of LogMagnitude to 1e-5; 2. remove stop logit prediction from tacotron2 model. * WIP: baker * add ge2e * fix lstm speaker encoder * fix lstm speaker encoder * fix speaker encoder and add support for 2 more datasets * simplify visualization code * add a simple strategy to support multispeaker for tacotron. * add vctk example for refactored tacotron * fix indentation * fix class name * fix visualizer * fix root path * fix root path * fix root path * fix typos * fix bugs * fix text log extention name * add example for baker and aishell3 * update experiment and display * format code for tacotron_vctk, add plot_waveform to display * add new trainer * minor fix * add global condition support for tacotron2 * add gst layer * add 2 frontend * fix fmax for example/waveflow * update collate function, data loader not does not convert nested list into numpy array. * WIP: add hifigan * WIP:update hifigan * change stft to use conv1d * add audio datasets * change batch_text_id, batch_spec, batch_wav to include valid lengths in the returned value * change wavenet to use on-the-fly prepeocessing * fix typos * resolve conflict * remove imports that are removed * remove files not included in this release * remove imports to deleted modules * move tacotron2_msp * clean code * fix argument order * fix argument name * clean code for data processing * WIP: add README * add more details to thr README, fix some preprocess scripts * add voice cloning notebook * add an optional to alter the loss and model structure of tacotron2, add an alternative config * add plot_multiple_attentions and update visualization code in transformer_tts * format code * remove tacotron2_msp * update tacotron2 from_pretrained, update setup.py * update tacotron2 * update tacotron_aishell3's README * add images for exampels/tacotron2_aishell3's README * update README for examples/ge2e * add STFT back * add extra_config keys into the default config of tacotron * fix typos and docs * update README and doc * update docstrings for tacotron * update doc * update README * add links to downlaod pretrained models * refine READMEs and clean code * add praatio into requirements for running the experiments * format code with pre-commit * simplify text processing code and update notebook
2021-05-13 17:49:50 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import paddle\n",
"from matplotlib import pyplot as plt\n",
"from IPython import display as ipd\n",
"import soundfile as sf\n",
"import librosa.display\n",
"from parakeet.utils import display\n",
"paddle.set_device(\"gpu:0\")\n",
"import sys\n",
"sys.path.append(\"../../\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 加载模型"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"vocab_phones:\n",
" Vocab(size: 68,\n",
"stoi:\n",
"OrderedDict([('<pad>', 0), ('<unk>', 1), ('<s>', 2), ('</s>', 3), ('$', 4), ('%', 5), ('&r', 6), ('a', 7), ('ai', 8), ('an', 9), ('ang', 10), ('ao', 11), ('b', 12), ('c', 13), ('ch', 14), ('d', 15), ('e', 16), ('ea', 17), ('ei', 18), ('en', 19), ('eng', 20), ('er', 21), ('f', 22), ('g', 23), ('h', 24), ('i', 25), ('ia', 26), ('iai', 27), ('ian', 28), ('iang', 29), ('iao', 30), ('ie', 31), ('ien', 32), ('ieng', 33), ('ii', 34), ('iii', 35), ('io', 36), ('iou', 37), ('j', 38), ('k', 39), ('l', 40), ('m', 41), ('n', 42), ('o', 43), ('ou', 44), ('p', 45), ('q', 46), ('r', 47), ('s', 48), ('sh', 49), ('t', 50), ('u', 51), ('ua', 52), ('uai', 53), ('uan', 54), ('uang', 55), ('uei', 56), ('uen', 57), ('ueng', 58), ('uo', 59), ('v', 60), ('van', 61), ('ve', 62), ('ven', 63), ('veng', 64), ('x', 65), ('z', 66), ('zh', 67)]))\n",
"vocab_tones:\n",
" Vocab(size: 10,\n",
"stoi:\n",
"OrderedDict([('<pad>', 0), ('<unk>', 1), ('<s>', 2), ('</s>', 3), ('0', 4), ('1', 5), ('2', 6), ('3', 7), ('4', 8), ('5', 9)]))\n"
]
}
],
"source": [
"from examples.ge2e.audio_processor import SpeakerVerificationPreprocessor\n",
"from parakeet.models.lstm_speaker_encoder import LSTMSpeakerEncoder\n",
"\n",
"# speaker encoder\n",
"p = SpeakerVerificationPreprocessor(\n",
" sampling_rate=16000, \n",
" audio_norm_target_dBFS=-30, \n",
" vad_window_length=30, \n",
" vad_moving_average_width=8, \n",
" vad_max_silence_length=6, \n",
" mel_window_length=25, \n",
" mel_window_step=10, \n",
" n_mels=40, \n",
" partial_n_frames=160, \n",
" min_pad_coverage=0.75, \n",
" partial_overlap_ratio=0.5)\n",
"speaker_encoder = LSTMSpeakerEncoder(n_mels=40, num_layers=3, hidden_size=256, output_size=256)\n",
"speaker_encoder_params_path = \"../../pretrained/ge2e/ge2e_ckpt_0.3/step-3000000.pdparams\"\n",
"speaker_encoder.set_state_dict(paddle.load(speaker_encoder_params_path))\n",
"speaker_encoder.eval()\n",
"\n",
"# synthesizer\n",
"from parakeet.models.tacotron2 import Tacotron2\n",
"from examples.tacotron2_aishell3.chinese_g2p import convert_sentence\n",
"from examples.tacotron2_aishell3.aishell3 import voc_phones, voc_tones\n",
"\n",
"synthesizer = Tacotron2(\n",
" vocab_size=68,\n",
" n_tones=10,\n",
" d_mels= 80,\n",
" d_encoder= 512,\n",
" encoder_conv_layers = 3,\n",
" encoder_kernel_size= 5,\n",
" d_prenet= 256,\n",
" d_attention_rnn= 1024,\n",
" d_decoder_rnn = 1024,\n",
" attention_filters = 32,\n",
" attention_kernel_size = 31,\n",
" d_attention= 128,\n",
" d_postnet = 512,\n",
" postnet_kernel_size = 5,\n",
" postnet_conv_layers = 5,\n",
" reduction_factor = 1,\n",
" p_encoder_dropout = 0.5,\n",
" p_prenet_dropout= 0.5,\n",
" p_attention_dropout= 0.1,\n",
" p_decoder_dropout= 0.1,\n",
" p_postnet_dropout= 0.5,\n",
" d_global_condition=256,\n",
" use_stop_token=False,\n",
")\n",
"params_path = \"../../pretrained/tacotron2_aishell3/tacotron2_aishell3_ckpt_0.3/step-450000.pdparams\"\n",
"synthesizer.set_state_dict(paddle.load(params_path))\n",
"synthesizer.eval()\n",
"\n",
"# vocoder\n",
"from parakeet.models import ConditionalWaveFlow\n",
"vocoder = ConditionalWaveFlow(upsample_factors=[16, 16], n_flows=8, n_layers=8, n_group=16, channels=128, n_mels=80, kernel_size=[3, 3])\n",
"params_path = \"../../pretrained/waveflow/waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams\"\n",
"vocoder.set_state_dict(paddle.load(params_path))\n",
"vocoder.eval()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 生成 speaker encoding"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"首先在当前文件夹下新建文件夹 `ref_audio`,把要作为参考的音频存在在这个文件夹中。格式要求是 wav 格式,采样率会被重采样至 16kHz."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <audio controls=\"controls\" >\n",
" <source src=\"data:audio/x-wav;base64,UklGRmaYBQBXQVZFZm10IBAAAAABAAEAgLsAAAB3AQACABAATElTVDoAAABJTkZPSU5BTRcAAADnmb7luqbnp5HmioDlm60y5Y+35qW8AABJU0ZUDgAAAExhdmY1OC40NS4x
" Your browser does not support the audio element.\n",
" </audio>\n",
" "
],
"text/plain": [
"<IPython.lib.display.Audio object>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ref_name = \"女声2.wav\"\n",
"ref_audio_path = f\"./ref_audio/{ref_name}\"\n",
"ipd.Audio(ref_audio_path, normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mel_sequences: (2, 160, 40)\n",
"embed shape: [256]\n"
]
}
],
"source": [
"mel_sequences = p.extract_mel_partials(p.preprocess_wav(ref_audio_path))\n",
"print(\"mel_sequences: \", mel_sequences.shape)\n",
"with paddle.no_grad():\n",
" embed = speaker_encoder.embed_utterance(paddle.to_tensor(mel_sequences))\n",
"print(\"embed shape: \", embed.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 合成频谱"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"因为 AISHELL-3 数据集中使用 `%` 和 `$` 表示韵律词和韵律短语的边界,它们大约对应着较短和较长的停顿,在文本中可以使用 `%` 和 `$` 来调节韵律。\n",
"\n",
"值得的注意的是,句子的有效字符集仅包含汉字和 `%`, `$`, 因此输入的句子只能包含这些字符。"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['m', 'ei', 'd', 'ang', 'n', 'i', 'j', 've', 'd', 'e', '%', 'x', 'iang', 'iao', 'p', 'i', 'p', 'ieng', 'sh', 'en', 'm', 'e', 'r', 'en', 'd', 'e', 'sh', 'iii', 'h', 'ou', '$', 'n', 'i', 'q', 'ie', 'iao', 'j', 'i', 'zh', 'e', '%', 'zh', 'e', 'g', 'e', 'sh', 'iii', 'j', 'ie', 'sh', 'ang', 'd', 'e', 'r', 'en', '%', 'b', 'ieng', 'f', 'ei', 'd', 'ou', 'j', 'v', 'b', 'ei', 'n', 'i', 'b', 'ieng', 'iou', 'd', 'e', 't', 'iao', 'j', 'ian', '$']\n",
"['0', '3', '0', '1', '0', '3', '0', '2', '0', '5', '0', '0', '3', '4', '0', '1', '0', '2', '0', '2', '0', '5', '0', '2', '0', '5', '0', '2', '0', '4', '0', '0', '3', '0', '4', '4', '0', '4', '0', '5', '0', '0', '4', '0', '4', '0', '4', '0', '4', '0', '4', '0', '5', '0', '2', '0', '0', '4', '0', '1', '0', '1', '0', '4', '0', '4', '0', '3', '0', '3', '3', '0', '5', '0', '2', '0', '4', '0']\n"
]
}
],
"source": [
"sentence = \"每当你觉得%想要批评什么人的时候$你切要记着%这个世界上的人%并非都具备你禀有的条件$\"\n",
"phones, tones = convert_sentence(sentence)\n",
"print(phones)\n",
"print(tones)\n",
"\n",
"phones = np.array([voc_phones.lookup(item) for item in phones], dtype=np.int64)\n",
"tones = np.array([voc_tones.lookup(item) for item in tones], dtype=np.int64)\n",
"\n",
"phones = paddle.to_tensor(phones).unsqueeze(0)\n",
"tones = paddle.to_tensor(tones).unsqueeze(0)\n",
"utterance_embeds = paddle.unsqueeze(embed, 0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 73%|███████▎ | 733/1000 [00:02<00:01, 255.71it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"content exhausted!\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEYCAYAAAB2qXBEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZgdZZn+8e99Tm/pzr6QBAIkSIiAQoAMu4iyowPq8HPAZSIyZhwVdXAB1HHUmXHihrigGAGJjooMikRFAUFUZEvYdwgBTJAkkIXsvZzz/P6o6uSk6eUk3adPdff9ua66uqpOLU+nkzz9vvW+TykiMDMz21m5agdgZmYDmxOJmZn1ihOJmZn1ihOJmZn1ihOJmZn1Sk21AyhHneqjgaZqh2FmQ5WA9gGuw4eR260VgD3q1lCrHEIE20bAPre0jZdWF9QXtz7pDU2xanWh7OPvebD5hog4uS/uXa4BkUgaaOIwHVftMMxsKMrlUU5EWxsAMXMmDV9cTo0KfHPqtUzON5JXjkIUt55y+MnL+uz2q1YXuPuGPco+Pj/5qfF9dvMyDYhEYmY2VAVQpNjjcdXkRGJmVkK1dURrC6qtg5zIjR6FJL585y/Zu7aGuS+1cON/HwMRzLnhVArr10MEaFtP1lPFNX0YUWzX2skiJxIzswxLWiTZrkDiRGJmlmFB0BrlP2yvBicSMxu6SrqjcsOHs/SDr+WYf7iXr06+m1YKFCN4ri1PCzk+cdjpFFasBInhWghRpFBaq7CCdQvdIjEzs50WQMGJxMzMesMtEjOzaku7sHLDhxNbmlFDPRSLsPce6K/LufbhG1lRaOb1N83gifP35623tkIUO3RXrYRcPlkt9t8zi4Dtu9AyyInEzCzjsj3414nEzCzTgvAzEjOz/qb6elRTk5Q1KRTIjRuLcjmor+N9N93CpPzL7FqzmZ+tO5ATmh7ltCmHg3LMyD8IUSS6mgDYj11aWwUUsp1HnEjMzLIsmZCYbU4kZmaZJgr0SSHhinEiMbOBL63Qq7o6KBbJjRtLtLTCli1QKMDYUUShyD/96ha+f/wbKK5eS3HDBlRTy+/bDk9GZ0X6JKIa3VfdCKDori0zM+sNt0jMzGynBdAa2X6ZrROJmQ1Iqq0j1zQM8nkYM4porGfL5OGs2r+Ot87+I8cMf5xd8+sZkSvy/TWHUYgcP3j1VJR/Yes1olBA+fzWl1ZlrVsL2kukuEViZmY7KRAFst0iqVh0kmZIur9kWSfpo5LGSrpJ0lPp1zGVisHMbDAohspeqqFiLZKIeAKYCSApDzwPXAtcANwcEXMlXZBun1+pOMxsAEtrZKmmFtXVUty0CZRjy6mH8NLZG/nygT9nr5rVPNG6C8tbR3Hb2un8dck0Fr11Onc+27CtXlY6qoto29aNBclIrYxP0hgIXVv91V46Dng6Ip4DTgfmp/vnA2/ppxjMzAYgUYhc2Us19NczkjOBn6brEyOi/WnXcmBiZydImgPMAWigseIBmpllUTKzPdvPSCqeSCTVAacBF3b8LCJCUqdTbSJiHjAPYKTGZnw6jpn1pY4jsqivo2WXJuruf4Yv3vs7vvrCCDbM35dLPnAMhZUvlpz5MntzP20dy64Xs9+F1Z2sd231R4vkFODeiFiRbq+QNDkiXpA0GVjZDzGYmQ1IEapal1W5+iO6s9jWrQWwAJidrs8GruuHGMzMBqwiKnuphoq2SCQ1AScA/1Kyey5wtaRzgOeAt1cyBjMbIHJ5ck2NxIw9eeb0EdRuEM0HbuKCg37Hlqhl0bqpfHzSjXzi706jsGo1E/L3UGhr7fAWw8EnGbWV7RZJRRNJRGwExnXYt4pkFJeZmfUgEK2R7bnj2Y7OzMwoVGmiYbmcSMysOiRUU0tuWANqaiTGjmLjXqP469sKDHtaNI8LRt46jJ+/fx+KmzYRxU2cx9EQL4FyxBDo1oKBUSLFicTMLOOKGR+15URiZpZhQ/5hu5lZO9XXkx8zmsKqNUShwF8/exjnn3kNo/ObmJBfx1Mtk7jqb3/Hrt+dwuh7/kas30hh1WoK7fWypG1dWZG9cu+VEsjPSMzMrHeGfIkUMzPbeRFkfma7E4mZVUyuoYEoFJO6WZN3SdYLRZ59/3S2TGrj6rcdi17eQNsLK5KS7/yN4fE8bRIot/0bC4fACK3OVW/GermcSMzMMixwi8TMzHrJo7bMbMhoL/9eeHkdyud5+sp9+Ooh1zC99iX+vHlvXmodwUkjHuKMmz/AjPffT6HQYfSVckA6SmsIjczqTlC9V+iWy4nEzCzDAlxry8zMekN+sZWZDULtkwPT0VXK51E+R27XSaw7cCJ/vuR7NEcrh3/xUL53wfEUlv2NKBQggj9yKPuwiCidYNjO3VmvELhEipmZ9VLWWyTZTnNmZkNchChGruylJ5JOlvSEpMWSLujk8z0k/UHSfZIelHRqT9d0i8TMepbLo3yeXNMwihs2EsVg6acO42Pv+gUnNC2mUaI1gps2TWXes8dw0q4zAdgldxdtpbWySg3ZCYY7rq/mkUjKA5eQvLl2GbBQ0oKIeLTksM8AV0fEdyXtB1wPTO3uum6RmJllWNCn72w/FFgcEUsiogW4Cji9k1uOTNdHAX/r6aJukZiZZZp2tEUyXtKiku15ETEvXd8NWFry2TLgsA7nfw64UdK5QBNwfE83dCIxsy7lx41NurWGNxL5HNTWkHtxNY3Xis2PtXLNmW/gmiXTKW7clNTKUo6meGbbBdq7tcBdWTspGbW1Qw/bX4qIWb245VnAlRHxNUlHAD+S9JqIKHZ1ghOJmVnG9WGJlOeB3Uu2p6T7Sp0DnAwQEXdIagDGAyu7umhFn5FIGi3pGkmPS3pM0hGSxkq6SdJT6dcxlYzBzGwgay+RUu7Sg4XAdEnTJNUBZwILOhzzV+A4AEn7Ag3Ai91dtNItkm8Av4uIM9KgG4FPATdHxNx06NkFwPkVjsPMepLLkxvWgBrqoRgU16/nxfnj+dyrF7CxWM+mYj0vto3ghKZHOfdjH2bGr+6j2Na6rctKQjmBaonWlmSfu7N6LQJaI99H14o2SR8CbgDywBUR8YikLwCLImIB8DHg+5L+jaRn7T0R3f8gK5ZIJI0CjgHek34DLUCLpNOBY9PD5gO34kRiZtalvizaGBHXkwzpLd332ZL1R4GjduSalWyRTCNpDv1A0oHAPcBHgIkR8UJ6zHJgYmcnS5oDzAFooLGCYZqZZVfStZXtmRqVTCQ1wMHAuRFxl6RvkHRjbRURIanTJlM6XG0ewEiNdfvYrFIkcsOGoalTaJ48gr+eUMdV//gN9q2FE86bxSU3H0lxw0YkEW1t/L7tMBp1N+Tz23ddRRBtbVX7NgazoVwiZRmwLCLuSrevIUksKyRNBki/djkSwMxsqGsf/ttHD9sromKJJCKWA0slzUh3HQc8SjJCYHa6bzZwXaViMDMb+Pq21lYlVHrU1rnAj9MRW0uAs0mS19WSzgGeA95e4RjMrKNcHooFVn7wSC77+MUA3L5pOgvX7cnzN76Gz+x/LMVNmxhRs4hCWxtIhLuxqqaM0idVVdFEEhH3A53NsDyukvc1MxssIqDgV+2amVlvDOVRW2Zm1kvtM9uzzInEbLAreR1ubtQIaGll/fH7csu3vsMBt7+Wzx7x98SWLcSWZorN69mTOyimz0OikL761jPUq2pIPyMxM7Pe2Ynqv/3OicTMLMtCtBX7ptZWpTiRmA02Esrnk24p5VBO5MaNpbjrBJYfPpL8m1axdl0zb54yiz30KG3FtPsql9/aBeaii9nR/obELHMiMTPLOHdtmZnZTvMzEjOrPAnV1JIb3oQah7F5/11ZcUgd+576JN/a85fUSjzcMoLbN07n8geOZMJVY5l+3xoKEWzXY5J2cUV7V5dlhhOJmZntNM8jMTOzXvPDdjPrc/mRI5P3gaQTBtU4jJePnsryw3IUxreQX1XkpbnTOPvGNxJtrclIrGKwNw9CFNnaeeVurOwLd22ZmVkv+GG7mZn1mhOJmfUJ1dSQa2xETY1snLk7dS+3sOK
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"outputs = synthesizer.infer(phones, tones=tones, global_condition=utterance_embeds)\n",
"mel_input = paddle.transpose(outputs[\"mel_outputs_postnet\"], [0, 2, 1])\n",
"fig = display.plot_alignment(outputs[\"alignments\"][0].numpy().T)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 合成语音"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"合成的语音会保存在 `syn_audio` 目录下,使用和 reference 相同的文件名。"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"time: 19.793312788009644s\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.collections.PolyCollection at 0x7f09e00f6fd0>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEGCAYAAABmXi5tAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2dd5QVVbbGv90BkJxzaDICAkKLGJCoA7QDKhgY8zx1zOm9x6DOGEcFdUwzijJiHBV9mBiJoiAgQYIgWQGbnBGQTHef90ff6q6urlynUtf+rcXihrpVu++t+uqcfXYgIQQYhmGYsk9a2AYwDMMwwcCCzzAMkxBY8BmGYRICCz7DMExCYMFnGIZJCBlhG2BE7dq1RVZWVthmMAzDxIolS5bsFULU0XsvsoKflZWFxYsXh20GwzBMrCCiTUbvsUuHYRgmIbDgMwzDJAQWfIZhmITAgs8wDJMQWPAZhmESAgs+wzBMQmDBZxiGSQgs+AzDMAmBBT9gVm8/hKFj5iEvvyBsUxiGSRgs+AEzddVOLNn0K976LjdsUxiGSRgs+AGTX1A4sn9y8pqQLWEYJmmw4AfMT7sOh20CwzAJhQU/QIQQOJHHvnuGYcKBBT9AJq3Ygdk/7QnbDIZhEgoLfoBs2X8sbBMYhkkwLPgB8s683LBNYBgmwbDgB8jOQ8fDNoFhmATDgs8wDJMQWPAZhmESAgs+wzBMQmDBZxiGSQhSBJ+I3iSi3US00uB9IqKXiWg9Ef1IRF1lHJdhGIaxj6wR/tsABpi8PxBA69S/WwCMkXRchmEYxiZSBF8IMRvAfpNNhgB4VxSyAEB1Imog49gMwzCMPYLy4TcCsEX1fGvqtRIQ0S1EtJiIFu/ZwyUIGIZhZBKpRVshxFghRLYQIrtOnTphm8MwDFOmCErwtwFoonreOPUawzAMExBBCf5EANelonV6ADgohNgR0LEZhmEYABkydkJEHwLoDaA2EW0F8AiATAAQQrwGYDKAQQDWAzgK4EYZx2UYhmHsI0XwhRDDLd4XAO6QcSyGYdyxduch/Lj1IK7IbmK9MVMmidSiLcMw/vHs1HUYMeHHsM1gQoQFn2ESgkj9n5fPbTaTCgs+wySEb9buBgAMHTMvZEuYsGDBZ5iEsXzrwbBNYEKCBT8gCtetGYZhwoMFPyBO5LHflGGYcGHBZxiGSQgs+CGy9dejYZvAMEyCYMEPkaMn88M2IbG0eWgKlm05ELYZDBMoLPghQmEbkGBO5hdg1XaOVmGSBQs+k1iIb7lMwmDBDxFivQmVv09fF7YJDBMoLPghMmsdd/UKk31HToZtAsMECgt+QOjlXXHGI8MwQcKCHxDbDx4r9dp/lm8PwRKGYZIKC35ALMn9NWwTGIZJOCz4TOK4d/wPYZvAMKHAgh8QAlw8LSp8voxdaYeOnwrbBCYEWPAD4okv14RtApNg9h4+UeL5kk3sYkwiLPgBcfhEXtgmMAlm58HjYZvARAAW/IRyKr/Ac43+k3kFOBLzG1lBAbvaosyq7QeRNXISvvyR3XAyYMEPmWMhFVBr/dAUPDPNfabp+ws3oc1fpqDzY9MlWhU817/1fdgmhEJcbnQ5L88FANz5AS+0y4AFP2Q27Dkc2rHnbdjn+rMPfbYSAJAXE+EwYs7Pe8M2IRRenPFz2CYwIcCCHzIZ6eEV1HF76B8284Jf3FmxjbO8kwgLviTy8gtcuWfSQqygtv2Au4W8GWt2SbZEPjsOHsPt7y8J2wyGiRRlSvA/WrQ5tGbhf/1iJU5/eKrjz+0+dMJ6I5/Yecid4Id5k7LLsDHzMXnFzrDNsOSjRZtD86cfOBrt4nEyruWnJq/B0ZPxDiyQSZkS/D9/sgIn88NpFv7h91tcfe6t736RbIk1SbgAth0orF20duehkC0xZufB4/jzJyuwfKv/nbfmri+9VnEqP9rrL+t2/eZ5H2Nnb8Tq7YfQ69mZaPngZAx8aY4Ey+JLmRJ8xh67PM4q/vHNekmW+M+AF6NzgXd4eCryVAOSLamexpe+Ok/K/s06eP0kQTyDZOrKnfhxi7x1hk37jiK/QGDNjugOAIKgzAn+6Cnxamrx9drdYZvAeGTtzkMlhNyIIyfz8Y3q9/506daix/ke3TpLN/+KnJfnYsbq6K+v2OHWfy/BiE9+9LQPZU1Nb3aTVMqc4L8Zgoskbhw/JTf2Py4x3bJ5+eufMWrKWgx4cQ4++2Gb6bbfpUTnb5P0S2zs99iM5dmphQOd+z9epr9BAn8ixXW5cluyR/VqYif4r8xcjzGzNgR6TCFEmRk5AcBr38r9/kZ+6m0kFjZuR9fPf/VT0Xe5+zdzN9nVbywEAGzef7ToNVItfsvqjXDouP76zAmdGcipkNa7gmJHqpxE1BengyR2gv/stHUYPXVtqVGqjBX9nJfnYMmm/SVee29+Lpo/MBk3vbvY8/6jwr7DxRfA3sMnMHzsAk/7+2Sp+ejWLrN/2mPqh5bBybzSIrfvsPdIqWdNspa1hcsUpq8qjiJa7dG3PH+jeRLdpB93lHrNalYSd2atK3SfxT05UCZSBJ+IBhDROiJaT0Qjdd6/gYj2ENGy1L+b3BxHXQBKKwxeskaBwjj6VdsPYeiY+SWyX0dNWetpv1FE7dMc+ckKzN+4DwePuS+X69X/rHDdm98XpdJ7QRv59PLXxVmlBSGE7V75+nzd1/eqbrwTlmzV3cZP8iIapaN3U3aDMoNatsX/KKigWbX9IP75jfNsac+CT0TpAF4BMBBAewDDiai9zqYfCSG6pP694eZYD322oujx0DElLyKvq+9rdxZHMfT7+7fIGjkJeyym6Qrf/7LfeqOIoiRRLc4N92+QuQ7w2H9Wl3j+/Fc/mW7vt+xt2HPE5yPIYfuBY5i6cmdouSwKSvSSlhN54dSdiiIvzfgZz003P6/1yJBw7O4A1gshNgIAEY0HMATAatNPucDsNPTqVvh6TelombU7D+GIKnv2l71H0Lx2pVLbyfaJh0HI1zjyJRngxl979lNfI3dUjpTja1lo4WoJE23+3DXjFmKj6ubk13ei5tVZ67F531Gc16o2BnSsj8z0NMMZo9NTxCg/8ERePspnpDu0NFqscZlfIsOl0wiAOutoa+o1LUOJ6EcimkBETSQcVyp6J4d2hP/vBZt0P/uNRWhl2CMmOxyVHLnjlHFzi90wXlxEVjkCQc/GrvS4PhIkB46WdOtt2uf/zOSZqeswftEW3PXhD1hkMct0LPjQV/zDBgvbcWLL/mOuPhfUou1/AGQJIToB+ArAO3obEdEtRLSYiBbv2bOn1PtmwurVpZOmc26ofb9ASVEqazw6cVWox//X7I1Fj3/Z615orH6j//m/5a73Xdb4h8YHrA0N/crnyDRtL4X1u80rx749L1f39X2HT+Cvn68s9brRCH9HzJvBeHFtyRD8bQDUI/bGqdeKEELsE0Iow+U3AHTT25EQYqwQIlsIkV2nTh0JptmHdM6O6I/L5WHXFbJ5n75/1Sv7VGLT//lvpUTO6GEVPpkkrEorPDnZ37acHR6ZVuL5w1+YDzpGT9UPoHh6ylq8ZzD71kOJ3okje347gSMnwhX8RQBaE1FzIioH4CoAE9UbEFED1dPBABLV4DUMj44QAqu2H8SW/UdtdaWy60XZ9Zv+6GjHQXdTTCN+PSq3ybY23Jaxxs/zVuYivRLhlP23Gba21xvcxYWznpyBERPc5714FnwhRB6AOwFMQ6GQfyyEWEVEjxPR4NRmdxPRKiJaDuBuADc4PU7WyEleTTXlE52wODcn/PKIhICt2/Ubcl6ei57PzCwR3eSVLw0ShLT+X6/0f/5bqfsbN/cX03PIKHSSKcZthrZeyO99RhnBHjDKdygrrEz1MFCXJz903Nl1J8WHL4SYLIRoI4RoKYR4MvXaw0KIianHDwghOgghOgsh+gghAg9ut1oQ2qjjN1ZnRSpYjUz0PhMGp/KK7fQSY6/lnfn6U2d1fLvT0dtug1mDTKxKJS/0YTFXXSvHDlt/PWoqqkIIPG3DzeImjv2gwQ1bCThYv/sw2v3VfvnvE3n
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"with paddle.no_grad():\n",
" wav = vocoder.infer(mel_input)\n",
"wav = wav.numpy()[0]\n",
"sf.write(f\"syn_audio/{ref_name}\", wav, samplerate=22050)\n",
"librosa.display.waveplot(wav)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <audio controls=\"controls\" >\n",
" <source src=\"data:audio/wav;base64,UklGRgS6BQBXQVZFZm10IBAAAAABAAEAIlYAAESsAAACABAAZGF0YeC5BQAJAAIAAgD///3//v8AAAIA/v/9//v/9v/y/+//9v/4//v/+P/7//7//f8AAPr/+//4//j/8v/v/+//6v/v/+z/6v/v//H/7v/p/+j/5v/s/+n/7P/z//H/8//0//X/9//4//T/8//z//f/8//1//v/+//8//7/AgD8/wAABwAKAAoADgAJAAsADgANAA0ACwARAAsAEAAQAAoAAwAIAAkACQANAAcACAAKAA8ACwAIAAkABwAKAAQABgAKAAsACQAIAAoACQAMAAsADwAPAA8ADQAOABIACwAJAAoACwANAAUAAgAHAAMABAAFAAoABwAFAAQAAAABAAAAAAACAAMAAgAHAAYAAAACAAAA+v/9/////v////r//v/7//r/AAD2//r//P/7//3/+v/5//3//v/4//r/AAD5//T/+f/4//r//P////v/+f/5/wAAAAD7//3/+f/7//z/AAD///z/+v/8//z/+P/9////+f/6//7/+//6//z/9v/6/wEA+v/+//v/+f8AAPv/+v/1//L/7//v//T/8v/z//v/+v/1//v/8//4//b/9//2//X/+f/w//n/8P/u/+7/7P/r/+v/8P/1//P/8f/0//P/9f/z//H/7//x/+//7f/n/+7/7v/x/+z/6//u/+r/7P/l/+b/6//w/+r/8f/o/+3/8P/r//T/8P/5//T/8P/w//f//f/6//z////6//r/BQACAAcA//8AAAAA/P////b/+P/8/wMACQAIAAkAFQAQAA4ACAAJAAkABwANAAUACAAKAAoACgARAA8AEQAOABAAGAAYABoAEgASAAkABQAKAAsAEwANAAwADAAIAAkADAAPAA0ADQANAAsACAALAAMABAAJAAkACwAIAAUACAALAAkABwAEAAgACwAMAAYACwAKAAcACgAKABAAHgAeABwAGgAbAB8AHAAdABUAFQAYABcAFAARABQAGQAWABEAEwASAA0AEwAQAA0AEQAQABMABwAHAAQA/v8CAP//AwD+//z/+//x//L/8v/x//L/9v/5//P/8P/x/+v/8P/o/+H/5//p/+L/3v/l/+n/7f/1/+//6//2//P/+f/9/wAA///z//H/8//v//D/7f/5//j/+f/8//7/BwAGAA4ADAAQAAsADQABAAEAAAAAAAQADQAOAA0AFgAZAB4AFAAgABcAIgAbAA8ADAAHABUADgARABcAFwAXACEAHgAoACgAIgAgACUAJgAhACQAIgAgABsAGwAYABQADAAZABYADwAWABMAGQANABEAEQAQAA4ABQAPAA0AEAAKAA8AFwAYABoAHAAfABEAFgAQABYAFwAdAB4AFwATABAAEAAPABYAEQAaABYAEAAKAAAABwAFAAMABQAAAAEA//8DAAEA/f8KAAAACAAAAPj/BAAAAAIA+P8AAPz//f/5//j/BQD//wAA/f/+//f/9//z//H/+f/0//n/+f/4//f/9P/v//P/8//1//b/8//w//X/7v/s/+z/5v/w//X/+f/r/+//8P/2//T/8//4//D/9P/t//L/9P/t/+z/7f/n/+X/6f/w/+n/6v/3//r/+f/3//T/7f/u/+z/7P/o/+v/7f/o/+L/5//l/+T/6P/t//P/5P/e/+n/7P/k/+D/7f/y//L/9P/n//P/8//0//X/8v/z/+7/8//v//P/8v/6//X/9f/y/+//8f/q/+z/9P/z//b/8f/w//v/AQACAP3////9/wwABgAIAA8ABgAMAAoADwAOAAkAEwATABQAFgAaABIACAAVABYAHQAXABUAFAAPABUAHwAkAB0AHQAaAB4AFwAIAAMAAgAJABEADQAVABAABgAJAAIADQAIAAsABQAEAAIA8v/w//L//f/z/+7/6f/n/+z//f/1//3/DgAJAAkA/f/p/+3/5f/s/+3/6P/0//L/9P/y/+v/5//y/+z/+P/7//f/8f/n/+v/7f/s//b//v/5/wAA9P/1//P/+f8BAAUAAAD7//j/8f/+//X/BAD7/wEAAADt//r/9P/1//X/9f/o/+r/6v/m/+j/3//h/+X/7v/k/+H/6f/n/+f/7f/s/+T/5//q/+7/6P/6/wMAAAAIAAsABAAEAAoABAAQAAwAFgATAA8AGgAVAB4AEwAYABwAHgAiACEAIAAlACYAFwAbABMAFQAYABoAHgAaACAAHQAcABkAGQAVAAcABQD1/+//9//m/+P/3//h/+X/1f/U/8b/yf/P/8T/xf+8/8D/u//A/87/zf/b/9n/1P/b/9T/2v/W/9//7f/n//L/9f/y//z/8v/7/wgAHAAmAB8ALAArAEoAQQBBAE0ARgBPAE8AUwA8ADoAOAAdABoAHAAWAB4AEgAcABQADwAZAAYAEwD///T/9v/g/9z/zP/A/8b/vP/F/77/yf/Z/9P/6f/Z/+j/5v/k//r/4v/r/+3/3v/p/9r/5f/i/97/6//Y/+f/2P/d/9//0f/b/87/2f/g/93/9P/Z/+T/5//f//z/7f/6//n//P/0/+r/8f/p/+P/4v/p/+L/3f/m/+P/5v/t//b/CAARABIABwAFAP3/+//y//z/CgALABQAGgAfAA4AEAANAAYAFwARAP3/8P/d/9f/1P/O/9T/0//Q/9L/xv/C/8f/uf+3/8P/v//H/7n/tP+4/6X/uv+0/7z/yf/J/9D/yP/Q/8j/1v/L/9T/7v/g/wEA/P8AACEACAAxAC0AJgBLADkAUwBLAEUAZgBrAHoAfwB7AJEAkACgAK4AqQCyALkAtQC3AMwAtgC5ALAAnAClAIYAhACCAHIAfwB1AF4AWABDAEIASwA3ADMALgAoAAsABQAAAPX/AgDm/9P/sf+Y/4T/Zv9o/1r/Vf84/xn/Hf/9/uz+8/7h/v3+//7d/uT+xf7D/sn+yv7x/hr/Vv+a/73/5v8OADkAZgCSANAABwEyATsBVAFIAU8BYQFuAbIB3AH1ARMCFgIKAvwB+gERAhACJQIaAv4BxQGHAWYBUQE+AQYBtABnAEIAEQDv/67/S//5/qT+e/4K/ov9aP03/S399vzA/Kz8m/yf/Kb8sfyM/FD8L/wf/Cf8Zfy6/BD9N/1p/bD9Ev5g/qD+E/9v/8n/FwBpAM0ATwHTATkCpQIYA4oD7wNcBNcENgV+BdoFDAZVBqEG9gZfB6EH9QcgCCYIGwgJCPMHwweCBzkH2AZzBv0FaQXjBDMEeAPTAv4BMQFmAKL/BP9A/pH90fwY/Iz77vpQ+pL55/hg+P33r/dh9wH3ufa39tP20vbu9jL3jff991n4sfgB+Xv57vl8+iD7pPsf/IX8DP2o/Ur+Ff9AAEMBRAIZA4gDMQTXBKEFowZwBykIwQgJCVUJngn1CakKNgu+CzMMHQxSDFAMXAydDJIMVwyrC4oKSwn7B6EGhwU4BPsCzwFsACr/5f2t/Kb7jPp3+XH4Hffs9ez09/Ng857yFfK88VfxQPH68MXw4vDi8Pnwj/Hm8b/yyfPH9EX2KPcz+Pz4ivkr+rX6Vvvi+4P8Ff2r/T/+DP/F/x8AlwAEAd0B3gKbAxYFZQZoB8kICAmrCYEKrwrpCx0MegwRDcsMtw2FDiYPQBBcEHwQoxBZD/kOzg1XDLgLEgoaCRMInQbWBa0EYQN/AvYA2P+F/gD9LPzg+tz5evnN+Gb4i/eT9hL2RfW69FP0wvNv8xnzkvJ28inyJfKE8rnyR/Ox8w/0ovQL9YX17fU29n72uvYF94n37/eK+CT5lPkt+l76SPq9+nf7VP0Z/80AEgOUBCcGPwf/B+cInwkeCn8LYwxNDYUOHA9wEFQRSRJnE4cTwBOhE1wS6hHJEIUPWg8ADnsNnwy8C0MLAgrLCFcHQgVXA4wBzv9Z/nP8H/vq+az4oPek9o/1t/QD9BzzVfJr8ZfwEPCB71/vde9E75jvze/X7xDw7e8g8EfwPPCy8ALxmvF78kTzm/SX9Rb2AveG93j
" Your browser does not support the audio element.\n",
" </audio>\n",
" "
],
"text/plain": [
"<IPython.lib.display.Audio object>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ipd.Audio(wav, rate=22050)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}