From 27d3585606fa8bbca62ae986892e1c9dea36cbb3 Mon Sep 17 00:00:00 2001 From: chenfeiyu Date: Wed, 16 Jun 2021 14:42:11 +0000 Subject: [PATCH] add some profiling to unittesting --- parakeet/utils/profile.py | 20 +++++++++++++ tests/test_pwg.py | 62 +++++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 parakeet/utils/profile.py diff --git a/parakeet/utils/profile.py b/parakeet/utils/profile.py new file mode 100644 index 0000000..1e246eb --- /dev/null +++ b/parakeet/utils/profile.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + + +def synchronize(): + place = paddle.fluid.framework._current_expected_place() + paddle.fluid.core._cuda_synchronize(place) diff --git a/tests/test_pwg.py b/tests/test_pwg.py index 45a480d..7f2205d 100644 --- a/tests/test_pwg.py +++ b/tests/test_pwg.py @@ -14,13 +14,18 @@ import paddle import torch +from timer import timer from parallel_wavegan.layers import upsample, residual_block from parallel_wavegan.models import parallel_wavegan as pwgan from parakeet.utils.layer_tools import summary +from parakeet.utils.profile import synchronize from parakeet.models.parallel_wavegan import ConvInUpsampleNet, ResidualBlock from parakeet.models.parallel_wavegan import PWGGenerator, PWGDiscriminator, ResidualPWGDiscriminator +paddle.set_device("gpu:0") +device = torch.device("cuda:0") + def test_convin_upsample_net(): net = ConvInUpsampleNet( @@ -33,15 +38,34 @@ def test_convin_upsample_net(): nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, freq_axis_kernel_size=3, - aux_context_window=0) + aux_context_window=0).to(device) summary(net) for k, v in net2.named_parameters(): print(k, v.shape) net.state_dict()[k].set_value(v.data.cpu().numpy()) c = paddle.randn([4, 80, 180]) - out = net(c) - out2 = net2(torch.as_tensor(c.numpy())) + synchronize() + with timer(unit='s') as t: + out = net(c) + synchronize() + print(f"paddle conv_in_upsample_net forward takes {t.elapse}s.") + + with timer(unit='s') as t: + out.sum().backward() + synchronize() + print(f"paddle conv_in_upsample_net backward takes {t.elapse}s.") + + c_torch = torch.as_tensor(c.numpy()).to(device) + torch.cuda.synchronize() + with timer(unit='s') as t: + out2 = net2(c_torch) + print(f"torch conv_in_upsample_net forward takes {t.elapse}s.") + + with timer(unit='s') as t: + out2.sum().backward() + print(f"torch conv_in_upsample_net backward takes {t.elapse}s.") + print(out.numpy()[0]) print(out2.data.cpu().numpy()[0]) @@ -74,7 +98,7 @@ def test_pwg_generator(): "nonlinear_activation_params": { "negative_slope": 0.2 } - }) + }).to(device) summary(net) summary(net2) for k, v in net2.named_parameters(): @@ -85,8 +109,34 @@ def test_pwg_generator(): p.set_value(v.data.cpu().numpy()) x = paddle.randn([4, 1, 180 * 256]) c = paddle.randn([4, 80, 180 + 4]) - out = net(x, c) - out2 = net2(torch.as_tensor(x.numpy()), torch.as_tensor(c.numpy())) + + synchronize() + with timer(unit='s') as t: + out = net(x, c) + synchronize() + print(f"paddle generator forward takes {t.elapse}s.") + + synchronize() + with timer(unit='s') as t: + out.sum().backward() + synchronize() + print(f"paddle generator backward takes {t.elapse}s.") + + x_torch = torch.as_tensor(x.numpy()).to(device) + c_torch = torch.as_tensor(c.numpy()).to(device) + + torch.cuda.synchronize() + with timer(unit='s') as t: + out2 = net2(x_torch, c_torch) + torch.cuda.synchronize() + print(f"torch generator forward takes {t.elapse}s.") + + torch.cuda.synchronize() + with timer(unit='s') as t: + out2.sum().backward() + torch.cuda.synchronize() + print(f"torch generator backward takes {t.elapse}s.") + print(out.numpy()[0]) print(out2.data.cpu().numpy()[0]) # print(out.shape)