minor fixes to refine code.
This commit is contained in:
parent
afe9d4a4f1
commit
3ebed00c96
|
@ -25,7 +25,7 @@ class Clip(object):
|
||||||
batch_max_steps=20480,
|
batch_max_steps=20480,
|
||||||
hop_size=256,
|
hop_size=256,
|
||||||
aux_context_window=0, ):
|
aux_context_window=0, ):
|
||||||
"""Initialize customized collater for PyTorch DataLoader.
|
"""Initialize customized collater for DataLoader.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
batch_max_steps (int): The maximum length of input signal in batch.
|
batch_max_steps (int): The maximum length of input signal in batch.
|
||||||
|
|
|
@ -39,7 +39,9 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--metadata", type=str, help="json file with id and file paths ")
|
"--metadata", type=str, help="json file with id and file paths ")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--field-name", type=str, help="json file with id and file paths ")
|
"--field-name",
|
||||||
|
type=str,
|
||||||
|
help="name of the field to compute statistics for.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--config", type=str, help="yaml format configuration file.")
|
"--config", type=str, help="yaml format configuration file.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
@ -18,9 +18,8 @@ fmax: 7600 # Maximum frequency in mel basis calculation.
|
||||||
# global_gain_scale: 1.0 # Will be multiplied to all of waveform.
|
# global_gain_scale: 1.0 # Will be multiplied to all of waveform.
|
||||||
trim_silence: false # Whether to trim the start and end of silence.
|
trim_silence: false # Whether to trim the start and end of silence.
|
||||||
top_db: 60 # Need to tune carefully if the recording is not good.
|
top_db: 60 # Need to tune carefully if the recording is not good.
|
||||||
trim_frame_length: 2048 # Frame size in trimming.
|
trim_frame_length: 2048 # Frame size in trimming.(in samples)
|
||||||
trim_hop_length: 512 # Hop size in trimming.
|
trim_hop_length: 512 # Hop size in trimming.(in samples)
|
||||||
# format: "npy" # Feature file format. "npy" or "hdf5" is supported.
|
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
||||||
|
@ -119,11 +118,11 @@ discriminator_train_start_steps: 100000 # Number of steps to start to train disc
|
||||||
train_max_steps: 400000 # Number of training steps.
|
train_max_steps: 400000 # Number of training steps.
|
||||||
save_interval_steps: 5000 # Interval steps to save checkpoint.
|
save_interval_steps: 5000 # Interval steps to save checkpoint.
|
||||||
eval_interval_steps: 1000 # Interval steps to evaluate the network.
|
eval_interval_steps: 1000 # Interval steps to evaluate the network.
|
||||||
log_interval_steps: 100 # Interval steps to record the training log.
|
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
# OTHER SETTING #
|
# OTHER SETTING #
|
||||||
###########################################################
|
###########################################################
|
||||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||||
num_snapshots: 10
|
num_snapshots: 10 # max number of snapshots to keep while training
|
||||||
seed: 42
|
seed: 42 # random seed for paddle, random, and np.random
|
|
@ -147,8 +147,11 @@ def process_sentence(config: Dict[str, Any],
|
||||||
|
|
||||||
# adjust time to make num_samples == num_frames * hop_length
|
# adjust time to make num_samples == num_frames * hop_length
|
||||||
num_frames = logmel.shape[1]
|
num_frames = logmel.shape[1]
|
||||||
y = np.pad(y, (0, config.n_fft), mode="reflect")
|
if y.size < num_frames * config.hop_length:
|
||||||
y = y[:num_frames * config.hop_length]
|
y = np.pad(y, (0, num_frames * config.hop_length - y.size),
|
||||||
|
mode="reflect")
|
||||||
|
else:
|
||||||
|
y = y[:num_frames * config.hop_length]
|
||||||
num_sample = y.shape[0]
|
num_sample = y.shape[0]
|
||||||
|
|
||||||
mel_path = output_dir / (utt_id + "_feats.npy")
|
mel_path = output_dir / (utt_id + "_feats.npy")
|
||||||
|
@ -241,13 +244,16 @@ def main():
|
||||||
list((root_dir / "PhoneLabeling").rglob("*.interval")))
|
list((root_dir / "PhoneLabeling").rglob("*.interval")))
|
||||||
|
|
||||||
# split data into 3 sections
|
# split data into 3 sections
|
||||||
train_wav_files = wav_files[:9800]
|
num_train = 9800
|
||||||
dev_wav_files = wav_files[9800:9900]
|
num_dev = 100
|
||||||
test_wav_files = wav_files[9900:]
|
|
||||||
|
|
||||||
train_alignment_files = alignment_files[:9800]
|
train_wav_files = wav_files[:num_train]
|
||||||
dev_alignment_files = alignment_files[9800:9900]
|
dev_wav_files = wav_files[num_train:num_train + num_dev]
|
||||||
test_alignment_files = alignment_files[9900:]
|
test_wav_files = wav_files[num_train + num_dev:]
|
||||||
|
|
||||||
|
train_alignment_files = alignment_files[:num_train]
|
||||||
|
dev_alignment_files = alignment_files[num_train:num_train + num_dev]
|
||||||
|
test_alignment_files = alignment_files[num_train + num_dev:]
|
||||||
|
|
||||||
train_dump_dir = dumpdir / "train" / "raw"
|
train_dump_dir = dumpdir / "train" / "raw"
|
||||||
train_dump_dir.mkdir(parents=True, exist_ok=True)
|
train_dump_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
Loading…
Reference in New Issue