minor fixes to refine code.

This commit is contained in:
chenfeiyu 2021-07-01 16:14:55 +08:00
parent afe9d4a4f1
commit 3ebed00c96
4 changed files with 23 additions and 16 deletions

View File

@ -25,7 +25,7 @@ class Clip(object):
batch_max_steps=20480,
hop_size=256,
aux_context_window=0, ):
"""Initialize customized collater for PyTorch DataLoader.
"""Initialize customized collater for DataLoader.
Args:
batch_max_steps (int): The maximum length of input signal in batch.

View File

@ -39,7 +39,9 @@ def main():
parser.add_argument(
"--metadata", type=str, help="json file with id and file paths ")
parser.add_argument(
"--field-name", type=str, help="json file with id and file paths ")
"--field-name",
type=str,
help="name of the field to compute statistics for.")
parser.add_argument(
"--config", type=str, help="yaml format configuration file.")
parser.add_argument(

View File

@ -18,9 +18,8 @@ fmax: 7600 # Maximum frequency in mel basis calculation.
# global_gain_scale: 1.0 # Will be multiplied to all of waveform.
trim_silence: false # Whether to trim the start and end of silence.
top_db: 60 # Need to tune carefully if the recording is not good.
trim_frame_length: 2048 # Frame size in trimming.
trim_hop_length: 512 # Hop size in trimming.
# format: "npy" # Feature file format. "npy" or "hdf5" is supported.
trim_frame_length: 2048 # Frame size in trimming.(in samples)
trim_hop_length: 512 # Hop size in trimming.(in samples)
###########################################################
# GENERATOR NETWORK ARCHITECTURE SETTING #
@ -119,11 +118,11 @@ discriminator_train_start_steps: 100000 # Number of steps to start to train disc
train_max_steps: 400000 # Number of training steps.
save_interval_steps: 5000 # Interval steps to save checkpoint.
eval_interval_steps: 1000 # Interval steps to evaluate the network.
log_interval_steps: 100 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
num_snapshots: 10
seed: 42
num_snapshots: 10 # max number of snapshots to keep while training
seed: 42 # random seed for paddle, random, and np.random

View File

@ -147,8 +147,11 @@ def process_sentence(config: Dict[str, Any],
# adjust time to make num_samples == num_frames * hop_length
num_frames = logmel.shape[1]
y = np.pad(y, (0, config.n_fft), mode="reflect")
y = y[:num_frames * config.hop_length]
if y.size < num_frames * config.hop_length:
y = np.pad(y, (0, num_frames * config.hop_length - y.size),
mode="reflect")
else:
y = y[:num_frames * config.hop_length]
num_sample = y.shape[0]
mel_path = output_dir / (utt_id + "_feats.npy")
@ -241,13 +244,16 @@ def main():
list((root_dir / "PhoneLabeling").rglob("*.interval")))
# split data into 3 sections
train_wav_files = wav_files[:9800]
dev_wav_files = wav_files[9800:9900]
test_wav_files = wav_files[9900:]
num_train = 9800
num_dev = 100
train_alignment_files = alignment_files[:9800]
dev_alignment_files = alignment_files[9800:9900]
test_alignment_files = alignment_files[9900:]
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]
train_alignment_files = alignment_files[:num_train]
dev_alignment_files = alignment_files[num_train:num_train + num_dev]
test_alignment_files = alignment_files[num_train + num_dev:]
train_dump_dir = dumpdir / "train" / "raw"
train_dump_dir.mkdir(parents=True, exist_ok=True)