diff --git a/.gitignore b/.gitignore index 909b4a7..7906666 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# IDES +*.wpr +*.wpu +*.udb +*.ann + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/parakeet/data/datasets.py b/parakeet/data/datasets.py new file mode 100644 index 0000000..023577d --- /dev/null +++ b/parakeet/data/datasets.py @@ -0,0 +1,22 @@ +from paddle.io import Dataset + +from os import listdir +from os.path import splitext, join +import librosa + +class AudioFolderDataset(Dataset): + def __init__(self, path, sample_rate, extension="wav"): + self.root = path + self.sample_rate = sample_rate + self.extension = extension + self.file_names = [join(self.root, x) for x in listdir(self.root) \ + if splitext(x)[-1] == self.extension] + self.length = len(self.file_names) + + def __len__(self): + return self.length + + def __getitem__(self, i): + file_name = self.file_names[i] + y, sr = librosa.load(file_name, sr=self.sample_rate) # pylint: disable=unused-variable + return y \ No newline at end of file