Dynamic to static

2021-06-23 15:18:37 +08:00 · 2021-06-23 15:18:37 +08:00 · ea5cb8e71f
parent d96e2828b8
commit ea5cb8e71f
2 changed files with 41 additions and 28 deletions
--- a/examples/tacotron2/README.md
+++ b/examples/tacotron2/README.md
@ -58,10 +58,10 @@ For more help on arguments

 ## Synthesis

-After training the Tacotron2, spectrogram can be synthesized by running ``synthesis.py``.
+After training the Tacotron2, spectrogram can be synthesized by running ``synthesize.py``.

 ```bash
-python synthesis.py \
+python synthesize.py \
 --config=${CONFIGPATH} \
 --checkpoint_path=${CHECKPOINTPATH} \
 --input=${TEXTPATH} \
--- a/parakeet/models/waveflow.py
+++ b/parakeet/models/waveflow.py
@ -44,7 +44,8 @@ def fold(x, n_group):
    Tensor : [shape=(\*, time_steps // n_group, group)]
        Folded tensor.
    """
-    *spatial_shape, time_steps = x.shape
+    spatial_shape = list(x.shape[:-1])   
+    time_steps = paddle.shape(x)[-1]
    new_shape = spatial_shape + [time_steps // n_group, n_group]
    return paddle.reshape(x, new_shape)

@ -232,7 +233,7 @@ class ResidualBlock(nn.Layer):
        """
        if self.training:
            raise ValueError("Only use start sequence at evaluation mode.")
-        self._conv_buffer = None
+        self._conv_buffer = paddle.zeros([1])

        # NOTE: call self.conv's weight norm hook expliccitly since
        # its weight will be visited directly in `add_input` without
@ -263,10 +264,9 @@ class ResidualBlock(nn.Layer):
            A row of the skip output.
        """
        x_row_in = x_row
-        if self._conv_buffer is None:
+        if len(paddle.shape(self._conv_buffer)) == 1:
            self._init_buffer(x_row)
        self._update_buffer(x_row)
-
        rw = self.rw
        x_row = F.conv2d(
            self._conv_buffer,
@ -275,7 +275,6 @@ class ResidualBlock(nn.Layer):
            padding=[0, 0, rw // 2, (rw - 1) // 2],
            dilation=self.dilations)
        x_row += self.condition_proj(condition_row)
-
        content, gate = paddle.chunk(x_row, 2, axis=1)
        x_row = paddle.tanh(content) * F.sigmoid(gate)

@ -329,7 +328,7 @@ class ResidualNet(nn.LayerList):
        if len(dilations_h) != n_layer:
            raise ValueError(
                "number of dilations_h should equals num of layers")
-        super().__init__()
+        super(ResidualNet, self).__init__()
        for i in range(n_layer):
            dilation = (dilations_h[i], 2**i)
            layer = ResidualBlock(residual_channels, condition_channels,
@ -539,27 +538,21 @@ class Flow(nn.Layer):
            transformation from x to z.
        """
        z_0 = z[:, :, :1, :]
-        x = []
-        logs_list = []
-        b_list = []
-        x.append(z_0)
+        x = paddle.zeros_like(z)
+        x[:, :, :1, :] = z_0

        self._start_sequence()
-        for i in range(1, self.n_group):
-            x_row = x[-1]  # actuallt i-1:i
+
+        num_step = paddle.ones([1], dtype='int32') * (self.n_group)
+        for i in range(1, num_step):
+            x_row = x[:, :, i - 1:i, :]
            z_row = z[:, :, i:i + 1, :]
            condition_row = condition[:, :, i:i + 1, :]
-
            x_next_row, (logs, b) = self._inverse_row(z_row, x_row,
-                                                      condition_row)
-            x.append(x_next_row)
-            logs_list.append(logs)
-            b_list.append(b)
-
-        x = paddle.concat(x, 2)
-        logs = paddle.concat(logs_list, 2)
-        b = paddle.concat(b_list, 2)
-        return x, (logs, b)
+                                                        condition_row)
+            x[:, :, i:i+1, :] = x_next_row
+        
+        return x


 class WaveFlow(nn.LayerList):
@ -611,16 +604,18 @@ class WaveFlow(nn.LayerList):
        perms = []
        for i in range(n_flows):
            if i < n_flows // 2:
-                perms.append(indices[::-1])
+                perm = indices[::-1]
            else:
                perm = list(reversed(indices[:half])) + list(
                    reversed(indices[half:]))
-                perms.append(perm)
+            perm = paddle.to_tensor(perm)
+            self.register_buffer(perm.name, perm)
+            perms.append(perm)
        return perms

    def _trim(self, x, condition):
        assert condition.shape[-1] >= x.shape[-1]
-        pruned_len = int(x.shape[-1] // self.n_group * self.n_group)
+        pruned_len = int(paddle.shape(x)[-1] // self.n_group * self.n_group)  

        if x.shape[-1] > pruned_len:
            x = x[:, :pruned_len]
@ -707,7 +702,7 @@ class WaveFlow(nn.LayerList):
        for i in reversed(range(self.n_flows)):
            z = geo.shuffle_dim(z, 2, perm=self.perms[i])
            condition = geo.shuffle_dim(condition, 2, perm=self.perms[i])
-            z, (logs, b) = self[i].inverse(z, condition)
+            z = self[i].inverse(z, condition)

        x = paddle.squeeze(z, 1)  # (B, H, W)
        batch_size = x.shape[0]
@ -893,3 +888,21 @@ class WaveFlowLoss(nn.Layer):
                                    ) - log_det_jacobian
        loss = loss / np.prod(z.shape)
        return loss + self.const
+
+
+class ConditionalWaveFlow2Infer(ConditionalWaveFlow):
+    def forward(self, mel):
+        """Generate raw audio given mel spectrogram.
+     
+        Parameters
+        ----------
+        mel : np.ndarray [shape=(C_mel, T_mel)]
+            Mel spectrogram of an utterance(in log-magnitude). 
+ 
+        Returns
+        -------
+        np.ndarray [shape=(T,)]
+            The synthesized audio.
+        """
+        audio = self.predict(mel)
+        return audio