From a8b10f50fb48ab67fe02936358ed8879c163216b Mon Sep 17 00:00:00 2001 From: lfchener Date: Fri, 11 Dec 2020 08:31:34 +0000 Subject: [PATCH] fix EnglishCharacter numericalize in phonectic.py --- parakeet/frontend/phonectic.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/parakeet/frontend/phonectic.py b/parakeet/frontend/phonectic.py index 50cf432..2b41db5 100644 --- a/parakeet/frontend/phonectic.py +++ b/parakeet/frontend/phonectic.py @@ -82,16 +82,20 @@ class EnglishCharacter(Phonetics): start = self.vocab.start_symbol end = self.vocab.end_symbol - chars = ([] if start is None else [start]) \ + words = ([] if start is None else [start]) \ + normalize(sentence) \ + ([] if end is None else [end]) - return chars + return words - def numericalize(self, chars): - ids = [ - self.vocab.lookup(item) for item in chars - if item in self.vocab.stoi - ] + def numericalize(self, words): + ids = [] + for word in words: + if word in self.vocab.stoi: + ids.append(self.vocab.lookup(word)) + continue + for char in word: + if char in self.vocab.stoi: + ids.append(self.vocab.lookup(char)) return ids def reverse(self, ids):