Merge pull request #717 from baiyfbupt/slim_qat

Add slim quantization module
2020-09-16 10:58:55 +08:00 · 2020-09-16 10:58:55 +08:00 · 0da95ed13b
parent 00a889e166 14dfa73e07
commit 0da95ed13b
7 changed files with 368 additions and 8 deletions
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@ -0,0 +1,34 @@
 > 运行示例前请先安装1.2.0或更高版本PaddleSlim
 # 模型量化压缩教程
 ## 概述
 该示例使用PaddleSlim提供的[量化压缩API](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)对OCR模型进行压缩。
 在阅读该示例前，建议您先了解以下内容：
 - [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
 - [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)
 ## 安装PaddleSlim
 可按照[PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)中的步骤安装PaddleSlim。
 ## 量化训练
 进入PaddleOCR根目录，通过以下命令对模型进行量化：
 ```bash
 python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=det_mv3_db/best_accuracy Global.save_model_dir=./output/quant_model
 ```
 ## 评估并导出
 在得到量化训练保存的模型后，我们可以将其导出为inference_model，用于预测部署：
 ```bash
 python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_model
 ```
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@ -0,0 +1,129 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 __dir__ = os.path.dirname(__file__)
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
 sys.path.append(
    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
 def set_paddle_flags(**kwargs):
    for key, value in kwargs.items():
        if os.environ.get(key, None) is None:
            os.environ[key] = str(value)
 # NOTE(paddle-dev): All of these flags should be
 # set before `import paddle`. Otherwise, it would
 # not take any effect.
 set_paddle_flags(
    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
 )
 import program
 from paddle import fluid
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
 from ppocr.utils.save_load import init_model, load_params
 from ppocr.utils.character import CharacterOps
 from ppocr.utils.utility import create_module
 from ppocr.data.reader_main import reader_main
 from paddleslim.quant import quant_aware, convert
 from paddle.fluid.layer_helper import LayerHelper
 from eval_utils.eval_det_utils import eval_det_run
 from eval_utils.eval_rec_utils import eval_rec_run
 def main():
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }
    startup_prog, eval_program, place, config, alg_type = program.preprocess()
    feeded_var_names, target_vars, fetches_var_name = program.build_export(
        config, eval_program, startup_prog)
    eval_program = eval_program.clone(for_test=True)
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    eval_program = quant_aware(
        eval_program, place, quant_config, scope=None, for_test=True)
    init_model(config, eval_program, exe)
    # 2. Convert the program before save inference program
    #    The dtype of eval_program's weights is float32, but in int8 range.
    eval_program = convert(eval_program, place, quant_config, scope=None)
    eval_fetch_name_list = fetches_var_name
    eval_fetch_varname_list = [v.name for v in target_vars]
    eval_reader = reader_main(config=config, mode="eval")
    quant_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}
    if alg_type == 'det':
        final_metrics = eval_det_run(exe, config, quant_info_dict, "eval")
    else:
        final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval")
    print(final_metrics)
    # 3. Save inference model
    model_path = "./quant_model"
    if not os.path.isdir(model_path):
        os.makedirs(model_path)
    fluid.io.save_inference_model(
        dirname=model_path,
        feeded_var_names=feeded_var_names,
        target_vars=target_vars,
        executor=exe,
        main_program=eval_program,
        model_filename=model_path + '/model',
        params_filename=model_path + '/params')
    print("model saved as {}".format(model_path))
 if __name__ == '__main__':
    main()
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@ -0,0 +1,188 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
 sys.path.append(
    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
 def set_paddle_flags(**kwargs):
    for key, value in kwargs.items():
        if os.environ.get(key, None) is None:
            os.environ[key] = str(value)
 # NOTE(paddle-dev): All of these flags should be
 # set before `import paddle`. Otherwise, it would
 # not take any effect.
 set_paddle_flags(
    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
 )
 import tools.program as program
 from paddle import fluid
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
 from ppocr.data.reader_main import reader_main
 from ppocr.utils.save_load import init_model
 from paddle.fluid.contrib.model_stat import summary
 # quant dependencies
 import paddle
 import paddle.fluid as fluid
 from paddleslim.quant import quant_aware, convert
 from paddle.fluid.layer_helper import LayerHelper
 def pact(x):
    """
    Process a variable using the pact method you define
    Args:
        x(Tensor): Paddle Tensor, need to be preprocess before quantization
    Returns:
        The processed Tensor x.
    """
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    init_thres = 20
    u_param_attr = fluid.ParamAttr(
        name=x.name + '_pact',
        initializer=fluid.initializer.ConstantInitializer(value=init_thres),
        regularizer=fluid.regularizer.L2Decay(0.0001),
        learning_rate=1)
    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
    x = fluid.layers.elementwise_sub(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
    x = fluid.layers.elementwise_add(
        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))
    return x
 def get_optimizer():
    """
    Build a program using a model and an optimizer
    """
    return fluid.optimizer.AdamOptimizer(0.001)
 def main():
    train_build_outputs = program.build(
        config, train_program, startup_program, mode='train')
    train_loader = train_build_outputs[0]
    train_fetch_name_list = train_build_outputs[1]
    train_fetch_varname_list = train_build_outputs[2]
    train_opt_loss_name = train_build_outputs[3]
    model_average = train_build_outputs[-1]
    eval_program = fluid.Program()
    eval_build_outputs = program.build(
        config, eval_program, startup_program, mode='eval')
    eval_fetch_name_list = eval_build_outputs[1]
    eval_fetch_varname_list = eval_build_outputs[2]
    eval_program = eval_program.clone(for_test=True)
    train_reader = reader_main(config=config, mode="train")
    train_loader.set_sample_list_generator(train_reader, places=place)
    eval_reader = reader_main(config=config, mode="eval")
    exe = fluid.Executor(place)
    exe.run(startup_program)
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }
    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.
    act_preprocess_func = pact
    optimizer_func = get_optimizer
    executor = exe
    eval_program = quant_aware(
        eval_program,
        place,
        quant_config,
        scope=None,
        act_preprocess_func=act_preprocess_func,
        optimizer_func=optimizer_func,
        executor=executor,
        for_test=True)
    quant_train_program = quant_aware(
        train_program,
        place,
        quant_config,
        scope=None,
        act_preprocess_func=act_preprocess_func,
        optimizer_func=optimizer_func,
        executor=executor,
        for_test=False,
        return_program=True)
    # compile program for multi-devices
    train_compile_program = program.create_multi_devices_program(
        quant_train_program, train_opt_loss_name, for_quant=True)
    init_model(config, quant_train_program, exe)
    train_info_dict = {'compile_program':train_compile_program,\
        'train_program':quant_train_program,\
        'reader':train_loader,\
        'fetch_name_list':train_fetch_name_list,\
        'fetch_varname_list':train_fetch_varname_list,\
        'model_average': model_average}
    eval_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}
    if train_alg_type == 'det':
        program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
    else:
        program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
 if __name__ == '__main__':
    startup_program, train_program, place, config, train_alg_type = program.preprocess(
    )
    main()
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@ -67,6 +67,7 @@ class DetModel(object):
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        image.stop_gradient = False
        if mode == "train":
            if self.algorithm == "EAST":
                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
@ -108,7 +109,10 @@ class DetModel(object):
                    name='tvo', shape=[9, 128, 128], dtype='float32')
                input_tco = fluid.layers.data(
                    name='tco', shape=[3, 128, 128], dtype='float32')
-                feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
+                feed_list = [
                    image, input_score, input_border, input_mask, input_tvo,
                    input_tco
                ]
                labels = {'input_score': input_score,\
                    'input_border': input_border,\
                    'input_mask': input_mask,\
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@ -68,6 +68,7 @@ class RecModel(object):
        image_shape.insert(0, -1)
        if mode == "train":
            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            image.stop_gradient = False
            if self.loss_type == "attention":
                label_in = fluid.data(
                    name='label_in',
@ -146,6 +147,7 @@ class RecModel(object):
                    )
                    image_shape = deepcopy(self.image_shape)
            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            image.stop_gradient = False
            if self.loss_type == "srn":
                encoder_word_pos = fluid.data(
                    name="encoder_word_pos",
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@ -35,6 +35,7 @@ class CTCPredict(object):
        self.fc_decay = params.get("fc_decay", 0.0004)
    def __call__(self, inputs, labels=None, mode=None):
        with fluid.scope_guard("skip_quant"):
            encoder_features = self.encoder(inputs)
            if self.encoder_type != "reshape":
                encoder_features = fluid.layers.concat(encoder_features, axis=1)
--- a/tools/program.py
+++ b/tools/program.py
@ -225,10 +225,12 @@ def build_export(config, main_prog, startup_prog):
    return feeded_var_names, target_vars, fetches_var_name
-def create_multi_devices_program(program, loss_var_name):
+def create_multi_devices_program(program, loss_var_name, for_quant=False):
    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = True
    if for_quant:
        build_strategy.fuse_all_reduce_ops = False
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    compile_program = fluid.CompiledProgram(program).with_data_parallel(