fix EnglishCharacter frontend, add space in sentence ids

This commit is contained in:
lfchener 2020-12-14 08:57:08 +00:00
parent 3a19150344
commit 5b93de8a2e
2 changed files with 7 additions and 16 deletions

View File

@ -29,4 +29,4 @@ def normalize(sentence):
sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence) sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
sentence = sentence.replace("i.e.", "that is") sentence = sentence.replace("i.e.", "that is")
sentence = sentence.replace("e.g.", "for example") sentence = sentence.replace("e.g.", "for example")
return sentence.split() return sentence

View File

@ -79,23 +79,14 @@ class EnglishCharacter(Phonetics):
self.vocab = Vocab(self.graphemes + self.punctuations) self.vocab = Vocab(self.graphemes + self.punctuations)
def phoneticize(self, sentence): def phoneticize(self, sentence):
start = self.vocab.start_symbol words = normalize(sentence)
end = self.vocab.end_symbol
words = ([] if start is None else [start]) \
+ normalize(sentence) \
+ ([] if end is None else [end])
return words return words
def numericalize(self, words): def numericalize(self, sentence):
ids = [] ids = [
for word in words: self.vocab.lookup(item) for item in sentence
if word in self.vocab.stoi: if item in self.vocab.stoi
ids.append(self.vocab.lookup(word)) ]
continue
for char in word:
if char in self.vocab.stoi:
ids.append(self.vocab.lookup(char))
return ids return ids
def reverse(self, ids): def reverse(self, ids):