fix EnglishCharacter frontend, add space in sentence ids
This commit is contained in:
parent
3a19150344
commit
5b93de8a2e
|
@ -29,4 +29,4 @@ def normalize(sentence):
|
||||||
sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
|
sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
|
||||||
sentence = sentence.replace("i.e.", "that is")
|
sentence = sentence.replace("i.e.", "that is")
|
||||||
sentence = sentence.replace("e.g.", "for example")
|
sentence = sentence.replace("e.g.", "for example")
|
||||||
return sentence.split()
|
return sentence
|
||||||
|
|
|
@ -79,23 +79,14 @@ class EnglishCharacter(Phonetics):
|
||||||
self.vocab = Vocab(self.graphemes + self.punctuations)
|
self.vocab = Vocab(self.graphemes + self.punctuations)
|
||||||
|
|
||||||
def phoneticize(self, sentence):
|
def phoneticize(self, sentence):
|
||||||
start = self.vocab.start_symbol
|
words = normalize(sentence)
|
||||||
end = self.vocab.end_symbol
|
|
||||||
|
|
||||||
words = ([] if start is None else [start]) \
|
|
||||||
+ normalize(sentence) \
|
|
||||||
+ ([] if end is None else [end])
|
|
||||||
return words
|
return words
|
||||||
|
|
||||||
def numericalize(self, words):
|
def numericalize(self, sentence):
|
||||||
ids = []
|
ids = [
|
||||||
for word in words:
|
self.vocab.lookup(item) for item in sentence
|
||||||
if word in self.vocab.stoi:
|
if item in self.vocab.stoi
|
||||||
ids.append(self.vocab.lookup(word))
|
]
|
||||||
continue
|
|
||||||
for char in word:
|
|
||||||
if char in self.vocab.stoi:
|
|
||||||
ids.append(self.vocab.lookup(char))
|
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
def reverse(self, ids):
|
def reverse(self, ids):
|
||||||
|
|
Loading…
Reference in New Issue