ParakeetEricRoss/parakeet/models/deepvoice3/compute_timestamp_ratio.py

72 lines
2.0 KiB
Python

# Part of code was adpated from https://github.com/r9y9/deepvoice3_pytorch/tree/master/compute_timestamp_ratio.py
# Copyright (c) 2017: Ryuichi Yamamoto.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import io
import numpy as np
# sys.path.append("../")
from hparams import hparams, hparams_debug_string
from data import TextDataSource, MelSpecDataSource
from nnmnkwii.datasets import FileSourceDataset
from tqdm import trange
from parakeet import g2p as frontend
def build_parser():
parser = argparse.ArgumentParser(
description="Compute output/input timestamp ratio.")
parser.add_argument(
"--hparams", type=str, default="", help="Hyper parameters.")
parser.add_argument(
"--preset",
type=str,
required=True,
help="Path of preset parameters (json).")
parser.add_argument("data_root", type=str, help="path of the dataset.")
return parser
if __name__ == "__main__":
parser = build_parser()
args, _ = parser.parse_known_args()
data_root = args.data_root
preset = args.preset
# Load preset if specified
if preset is not None:
with io.open(preset) as f:
hparams.parse_json(f.read())
# Override hyper parameters
hparams.parse(args.hparams)
assert hparams.name == "deepvoice3"
# Code below
X = FileSourceDataset(TextDataSource(data_root))
Mel = FileSourceDataset(MelSpecDataSource(data_root))
in_sizes = []
out_sizes = []
for i in trange(len(X)):
x, m = X[i], Mel[i]
if X.file_data_source.multi_speaker:
x = x[0]
in_sizes.append(x.shape[0])
out_sizes.append(m.shape[0])
in_sizes = np.array(in_sizes)
out_sizes = np.array(out_sizes)
input_timestamps = np.sum(in_sizes)
output_timestamps = np.sum(
out_sizes) / hparams.outputs_per_step / hparams.downsample_step
print(input_timestamps, output_timestamps,
output_timestamps / input_timestamps)
sys.exit(0)