add slim quantization

2020-09-15 20:17:23 +08:00 · 2020-09-15 20:17:23 +08:00 · 2c6f0b0d55
parent ed6b2f0c71
commit 2c6f0b0d55
7 changed files with 384 additions and 8 deletions
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@ -0,0 +1,34 @@
+> 运行示例前请先安装1.2.0或更高版本PaddleSlim
+
+# 模型量化压缩教程
+
+## 概述
+
+该示例使用PaddleSlim提供的[量化压缩API](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)对检测模型进行压缩。
+在阅读该示例前，建议您先了解以下内容：
+
+- [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
+- [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)
+
+## 安装PaddleSlim
+可按照[PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)中的步骤安装PaddleSlim。
+
+
+
+## 量化训练
+
+进入PaddleOCR根目录，通过以下命令对模型进行量化：
+
+```bash
+python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=det_mv3_db/best_accuracy Global.save_model_dir=./output/quant_model
+```
+
+
+
+## 评估并导出
+
+在得到量化训练保存的模型后，我们可以将其导出为inference_model，用于预测部署：
+
+```bash
+python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_model
+```
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@ -0,0 +1,129 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
+sys.path.append(
+    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect.
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.save_load import init_model, load_params
+from ppocr.utils.character import CharacterOps
+from ppocr.utils.utility import create_module
+from ppocr.data.reader_main import reader_main
+
+from paddleslim.quant import quant_aware, convert
+from paddle.fluid.layer_helper import LayerHelper
+from eval_utils.eval_det_utils import eval_det_run
+from eval_utils.eval_rec_utils import eval_rec_run
+
+
+def main():
+    # 1. quantization configs
+    quant_config = {
+        # weight quantize type, default is 'channel_wise_abs_max'
+        'weight_quantize_type': 'channel_wise_abs_max',
+        # activation quantize type, default is 'moving_average_abs_max'
+        'activation_quantize_type': 'moving_average_abs_max',
+        # weight quantize bit num, default is 8
+        'weight_bits': 8,
+        # activation quantize bit num, default is 8
+        'activation_bits': 8,
+        # ops of name_scope in not_quant_pattern list, will not be quantized
+        'not_quant_pattern': ['skip_quant'],
+        # ops of type in quantize_op_types, will be quantized
+        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
+        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
+        'dtype': 'int8',
+        # window size for 'range_abs_max' quantization. defaulf is 10000
+        'window_size': 10000,
+        # The decay coefficient of moving average, default is 0.9
+        'moving_rate': 0.9,
+    }
+
+    startup_prog, eval_program, place, config, alg_type = program.preprocess()
+
+    feeded_var_names, target_vars, fetches_var_name = program.build_export(
+        config, eval_program, startup_prog)
+
+    eval_program = eval_program.clone(for_test=True)
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    eval_program = quant_aware(
+        eval_program, place, quant_config, scope=None, for_test=True)
+
+    init_model(config, eval_program, exe)
+
+    # 2. Convert the program before save inference program
+    #    The dtype of eval_program's weights is float32, but in int8 range.
+
+    eval_program = convert(eval_program, place, quant_config, scope=None)
+
+    eval_fetch_name_list = fetches_var_name
+    eval_fetch_varname_list = [v.name for v in target_vars]
+    eval_reader = reader_main(config=config, mode="eval")
+    quant_info_dict = {'program':eval_program,\
+        'reader':eval_reader,\
+        'fetch_name_list':eval_fetch_name_list,\
+        'fetch_varname_list':eval_fetch_varname_list}
+
+    if alg_type == 'det':
+        final_metrics = eval_det_run(exe, config, quant_info_dict, "eval")
+    else:
+        final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval")
+    print(final_metrics)
+
+    # 3. Save inference model
+    model_path = "./quant_model"
+    if not os.path.isdir(model_path):
+        os.makedirs(model_path)
+
+    fluid.io.save_inference_model(
+        dirname=model_path,
+        feeded_var_names=feeded_var_names,
+        target_vars=target_vars,
+        executor=exe,
+        main_program=eval_program,
+        model_filename=model_path + '/model',
+        params_filename=model_path + '/params')
+    print("model saved as {}".format(model_path))
+
+
+if __name__ == '__main__':
+    main()
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@ -0,0 +1,204 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
+sys.path.append(
+    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect.
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+import tools.program as program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from paddle.fluid.contrib.model_stat import summary
+
+# quant dependencies
+import paddle
+import paddle.fluid as fluid
+from paddleslim.quant import quant_aware, convert
+from paddle.fluid.layer_helper import LayerHelper
+
+
+def main():
+    train_build_outputs = program.build(
+        config, train_program, startup_program, mode='train')
+    train_loader = train_build_outputs[0]
+    train_fetch_name_list = train_build_outputs[1]
+    train_fetch_varname_list = train_build_outputs[2]
+    train_opt_loss_name = train_build_outputs[3]
+    model_average = train_build_outputs[-1]
+
+    eval_program = fluid.Program()
+    eval_build_outputs = program.build(
+        config, eval_program, startup_program, mode='eval')
+    eval_fetch_name_list = eval_build_outputs[1]
+    eval_fetch_varname_list = eval_build_outputs[2]
+    eval_program = eval_program.clone(for_test=True)
+
+    train_reader = reader_main(config=config, mode="train")
+    train_loader.set_sample_list_generator(train_reader, places=place)
+
+    eval_reader = reader_main(config=config, mode="eval")
+
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+
+    def pact(x, name=None):
+        helper = LayerHelper("pact", **locals())
+        dtype = 'float32'
+        init_thres = 20
+        u_param_attr = fluid.ParamAttr(
+            name=x.name + '_pact',
+            initializer=fluid.initializer.ConstantInitializer(value=init_thres),
+            regularizer=fluid.regularizer.L2Decay(0.0001),
+            learning_rate=1)
+        u_param = helper.create_parameter(
+            attr=u_param_attr, shape=[1], dtype=dtype)
+        x = fluid.layers.elementwise_sub(
+            x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
+        x = fluid.layers.elementwise_add(
+            x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))
+        return x
+
+    def get_optimizer():
+        return fluid.optimizer.AdamOptimizer(0.001)
+
+    # 1. quantization configs
+    quant_config = {
+        # weight quantize type, default is 'channel_wise_abs_max'
+        'weight_quantize_type': 'channel_wise_abs_max',
+        # activation quantize type, default is 'moving_average_abs_max'
+        'activation_quantize_type': 'moving_average_abs_max',
+        # weight quantize bit num, default is 8
+        'weight_bits': 8,
+        # activation quantize bit num, default is 8
+        'activation_bits': 8,
+        # ops of name_scope in not_quant_pattern list, will not be quantized
+        'not_quant_pattern': ['skip_quant'],
+        # ops of type in quantize_op_types, will be quantized
+        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
+        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
+        'dtype': 'int8',
+        # window size for 'range_abs_max' quantization. defaulf is 10000
+        'window_size': 10000,
+        # The decay coefficient of moving average, default is 0.9
+        'moving_rate': 0.9,
+    }
+
+    # 2. quantization transform programs (training aware)
+    #    Make some quantization transforms in the graph before training and testing.
+    #    According to the weight and activation quantization type, the graph will be added
+    #    some fake quantize operators and fake dequantize operators.
+    act_preprocess_func = pact
+    optimizer_func = get_optimizer
+    executor = exe
+
+    eval_program = quant_aware(
+        eval_program,
+        place,
+        quant_config,
+        scope=None,
+        act_preprocess_func=act_preprocess_func,
+        optimizer_func=optimizer_func,
+        executor=executor,
+        for_test=True)
+    quant_train_program = quant_aware(
+        train_program,
+        place,
+        quant_config,
+        scope=None,
+        act_preprocess_func=act_preprocess_func,
+        optimizer_func=optimizer_func,
+        executor=executor,
+        for_test=False,
+        return_program=True)
+
+    # compile program for multi-devices
+    train_compile_program = program.create_multi_devices_program(
+        quant_train_program, train_opt_loss_name, for_quant=True)
+
+    # dump mode structure
+    if config['Global']['debug']:
+        if train_alg_type == 'rec' and 'attention' in config['Global'][
+                'loss_type']:
+            logger.warning('Does not suport dump attention...')
+        else:
+            summary(quant_train_program)
+
+    init_model(config, quant_train_program, exe)
+
+    train_info_dict = {'compile_program':train_compile_program,\
+        'train_program':quant_train_program,\
+        'reader':train_loader,\
+        'fetch_name_list':train_fetch_name_list,\
+        'fetch_varname_list':train_fetch_varname_list,\
+        'model_average': model_average}
+
+    eval_info_dict = {'program':eval_program,\
+        'reader':eval_reader,\
+        'fetch_name_list':eval_fetch_name_list,\
+        'fetch_varname_list':eval_fetch_varname_list}
+
+    if train_alg_type == 'det':
+        program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
+    else:
+        program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
+
+
+def test_reader():
+    logger.info(config)
+    train_reader = reader_main(config=config, mode="train")
+    import time
+    starttime = time.time()
+    count = 0
+    try:
+        for data in train_reader():
+            count += 1
+            if count % 1 == 0:
+                batch_time = time.time() - starttime
+                starttime = time.time()
+                logger.info("reader:", count, len(data), batch_time)
+    except Exception as e:
+        logger.info(e)
+    logger.info("finish reader: {}, Success!".format(count))
+
+
+if __name__ == '__main__':
+    startup_program, train_program, place, config, train_alg_type = program.preprocess(
+    )
+    main()
+#     test_reader()
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@ -67,6 +67,7 @@ class DetModel(object):

        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
+        image.stop_gradient = False
        if mode == "train":
            if self.algorithm == "EAST":
                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
@ -108,7 +109,10 @@ class DetModel(object):
                    name='tvo', shape=[9, 128, 128], dtype='float32')
                input_tco = fluid.layers.data(
                    name='tco', shape=[3, 128, 128], dtype='float32')
-                feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
+                feed_list = [
+                    image, input_score, input_border, input_mask, input_tvo,
+                    input_tco
+                ]
                labels = {'input_score': input_score,\
                    'input_border': input_border,\
                    'input_mask': input_mask,\
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@ -68,6 +68,7 @@ class RecModel(object):
        image_shape.insert(0, -1)
        if mode == "train":
            image = fluid.data(name='image', shape=image_shape, dtype='float32')
+            image.stop_gradient = False
            if self.loss_type == "attention":
                label_in = fluid.data(
                    name='label_in',
@ -146,6 +147,7 @@ class RecModel(object):
                    )
                    image_shape = deepcopy(self.image_shape)
            image = fluid.data(name='image', shape=image_shape, dtype='float32')
+            image.stop_gradient = False
            if self.loss_type == "srn":
                encoder_word_pos = fluid.data(
                    name="encoder_word_pos",
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@ -35,12 +35,13 @@ class CTCPredict(object):
        self.fc_decay = params.get("fc_decay", 0.0004)

    def __call__(self, inputs, labels=None, mode=None):
-        encoder_features = self.encoder(inputs)
-        if self.encoder_type != "reshape":
-            encoder_features = fluid.layers.concat(encoder_features, axis=1)
-        name = "ctc_fc"
-        para_attr, bias_attr = get_para_bias_attr(
-            l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
+        with fluid.scope_guard("skip_quant"):
+            encoder_features = self.encoder(inputs)
+            if self.encoder_type != "reshape":
+                encoder_features = fluid.layers.concat(encoder_features, axis=1)
+            name = "ctc_fc"
+            para_attr, bias_attr = get_para_bias_attr(
+                l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
        predict = fluid.layers.fc(input=encoder_features,
                                  size=self.char_num + 1,
                                  param_attr=para_attr,
--- a/tools/program.py
+++ b/tools/program.py
@ -225,10 +225,12 @@ def build_export(config, main_prog, startup_prog):
    return feeded_var_names, target_vars, fetches_var_name


-def create_multi_devices_program(program, loss_var_name):
+def create_multi_devices_program(program, loss_var_name, for_quant=False):
    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = True
+    if for_quant:
+        build_strategy.fuse_all_reduce_ops = False
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    compile_program = fluid.CompiledProgram(program).with_data_parallel(