2021-07-19 14:31:52 +08:00
|
|
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
import numpy as np
|
2021-08-04 17:38:08 +08:00
|
|
|
import paddle
|
2021-08-17 15:29:30 +08:00
|
|
|
|
2021-07-19 14:31:52 +08:00
|
|
|
from parakeet.data.batch import batch_sequences
|
|
|
|
|
|
|
|
|
|
|
|
def collate_baker_examples(examples):
|
|
|
|
# fields = ["text", "text_lengths", "speech", "speech_lengths", "durations", "pitch", "energy"]
|
|
|
|
text = [np.array(item["text"], dtype=np.int64) for item in examples]
|
|
|
|
speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
|
|
|
|
pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
|
|
|
|
energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
|
|
|
|
durations = [
|
2021-08-17 15:29:30 +08:00
|
|
|
np.array(item["durations"], dtype=np.int64) for item in examples
|
2021-07-19 14:31:52 +08:00
|
|
|
]
|
|
|
|
text_lengths = np.array([item["text_lengths"] for item in examples])
|
|
|
|
speech_lengths = np.array([item["speech_lengths"] for item in examples])
|
|
|
|
|
|
|
|
text = batch_sequences(text)
|
|
|
|
pitch = batch_sequences(pitch)
|
|
|
|
speech = batch_sequences(speech)
|
|
|
|
durations = batch_sequences(durations)
|
|
|
|
energy = batch_sequences(energy)
|
|
|
|
|
2021-08-04 17:38:08 +08:00
|
|
|
# convert each batch to paddle.Tensor
|
|
|
|
text = paddle.to_tensor(text)
|
|
|
|
pitch = paddle.to_tensor(pitch)
|
|
|
|
speech = paddle.to_tensor(speech)
|
|
|
|
durations = paddle.to_tensor(durations)
|
|
|
|
energy = paddle.to_tensor(energy)
|
|
|
|
text_lengths = paddle.to_tensor(text_lengths)
|
|
|
|
speech_lengths = paddle.to_tensor(speech_lengths)
|
|
|
|
|
2021-07-19 14:31:52 +08:00
|
|
|
batch = {
|
|
|
|
"text": text,
|
|
|
|
"text_lengths": text_lengths,
|
|
|
|
"durations": durations,
|
|
|
|
"speech": speech,
|
|
|
|
"speech_lengths": speech_lengths,
|
|
|
|
"pitch": pitch,
|
|
|
|
"energy": energy
|
|
|
|
}
|
2021-08-17 15:29:30 +08:00
|
|
|
return batch
|