diff --git a/parakeet/models/lstm_speaker_encoder.py b/parakeet/models/lstm_speaker_encoder.py index f6d02a9..0d3f285 100644 --- a/parakeet/models/lstm_speaker_encoder.py +++ b/parakeet/models/lstm_speaker_encoder.py @@ -33,7 +33,7 @@ class LSTMSpeakerEncoder(nn.Layer): if reduce: embed = paddle.mean(normalized_embeds, 0) embed = F.normalize(embed, axis=0) - return embeds + return embed def embed_utterance(self, utterances, initial_states=None): # utterances: [B, T, C] -> embed [C']