add slim quantization
This commit is contained in:
parent
ed6b2f0c71
commit
2c6f0b0d55
|
@ -0,0 +1,34 @@
|
|||
> 运行示例前请先安装1.2.0或更高版本PaddleSlim
|
||||
|
||||
# 模型量化压缩教程
|
||||
|
||||
## 概述
|
||||
|
||||
该示例使用PaddleSlim提供的[量化压缩API](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)对检测模型进行压缩。
|
||||
在阅读该示例前,建议您先了解以下内容:
|
||||
|
||||
- [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
|
||||
- [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)
|
||||
|
||||
## 安装PaddleSlim
|
||||
可按照[PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)中的步骤安装PaddleSlim。
|
||||
|
||||
|
||||
|
||||
## 量化训练
|
||||
|
||||
进入PaddleOCR根目录,通过以下命令对模型进行量化:
|
||||
|
||||
```bash
|
||||
python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=det_mv3_db/best_accuracy Global.save_model_dir=./output/quant_model
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 评估并导出
|
||||
|
||||
在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
|
||||
|
||||
```bash
|
||||
python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_model
|
||||
```
|
|
@ -0,0 +1,129 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
__dir__ = os.path.dirname(__file__)
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
|
||||
sys.path.append(
|
||||
os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
import program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.utils.save_load import init_model, load_params
|
||||
from ppocr.utils.character import CharacterOps
|
||||
from ppocr.utils.utility import create_module
|
||||
from ppocr.data.reader_main import reader_main
|
||||
|
||||
from paddleslim.quant import quant_aware, convert
|
||||
from paddle.fluid.layer_helper import LayerHelper
|
||||
from eval_utils.eval_det_utils import eval_det_run
|
||||
from eval_utils.eval_rec_utils import eval_rec_run
|
||||
|
||||
|
||||
def main():
|
||||
# 1. quantization configs
|
||||
quant_config = {
|
||||
# weight quantize type, default is 'channel_wise_abs_max'
|
||||
'weight_quantize_type': 'channel_wise_abs_max',
|
||||
# activation quantize type, default is 'moving_average_abs_max'
|
||||
'activation_quantize_type': 'moving_average_abs_max',
|
||||
# weight quantize bit num, default is 8
|
||||
'weight_bits': 8,
|
||||
# activation quantize bit num, default is 8
|
||||
'activation_bits': 8,
|
||||
# ops of name_scope in not_quant_pattern list, will not be quantized
|
||||
'not_quant_pattern': ['skip_quant'],
|
||||
# ops of type in quantize_op_types, will be quantized
|
||||
'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
|
||||
# data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
|
||||
'dtype': 'int8',
|
||||
# window size for 'range_abs_max' quantization. defaulf is 10000
|
||||
'window_size': 10000,
|
||||
# The decay coefficient of moving average, default is 0.9
|
||||
'moving_rate': 0.9,
|
||||
}
|
||||
|
||||
startup_prog, eval_program, place, config, alg_type = program.preprocess()
|
||||
|
||||
feeded_var_names, target_vars, fetches_var_name = program.build_export(
|
||||
config, eval_program, startup_prog)
|
||||
|
||||
eval_program = eval_program.clone(for_test=True)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_prog)
|
||||
|
||||
eval_program = quant_aware(
|
||||
eval_program, place, quant_config, scope=None, for_test=True)
|
||||
|
||||
init_model(config, eval_program, exe)
|
||||
|
||||
# 2. Convert the program before save inference program
|
||||
# The dtype of eval_program's weights is float32, but in int8 range.
|
||||
|
||||
eval_program = convert(eval_program, place, quant_config, scope=None)
|
||||
|
||||
eval_fetch_name_list = fetches_var_name
|
||||
eval_fetch_varname_list = [v.name for v in target_vars]
|
||||
eval_reader = reader_main(config=config, mode="eval")
|
||||
quant_info_dict = {'program':eval_program,\
|
||||
'reader':eval_reader,\
|
||||
'fetch_name_list':eval_fetch_name_list,\
|
||||
'fetch_varname_list':eval_fetch_varname_list}
|
||||
|
||||
if alg_type == 'det':
|
||||
final_metrics = eval_det_run(exe, config, quant_info_dict, "eval")
|
||||
else:
|
||||
final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval")
|
||||
print(final_metrics)
|
||||
|
||||
# 3. Save inference model
|
||||
model_path = "./quant_model"
|
||||
if not os.path.isdir(model_path):
|
||||
os.makedirs(model_path)
|
||||
|
||||
fluid.io.save_inference_model(
|
||||
dirname=model_path,
|
||||
feeded_var_names=feeded_var_names,
|
||||
target_vars=target_vars,
|
||||
executor=exe,
|
||||
main_program=eval_program,
|
||||
model_filename=model_path + '/model',
|
||||
params_filename=model_path + '/params')
|
||||
print("model saved as {}".format(model_path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,204 @@
|
|||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(__dir__)
|
||||
sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
|
||||
sys.path.append(
|
||||
os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
|
||||
|
||||
|
||||
def set_paddle_flags(**kwargs):
|
||||
for key, value in kwargs.items():
|
||||
if os.environ.get(key, None) is None:
|
||||
os.environ[key] = str(value)
|
||||
|
||||
|
||||
# NOTE(paddle-dev): All of these flags should be
|
||||
# set before `import paddle`. Otherwise, it would
|
||||
# not take any effect.
|
||||
set_paddle_flags(
|
||||
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
|
||||
)
|
||||
|
||||
import tools.program as program
|
||||
from paddle import fluid
|
||||
from ppocr.utils.utility import initial_logger
|
||||
logger = initial_logger()
|
||||
from ppocr.data.reader_main import reader_main
|
||||
from ppocr.utils.save_load import init_model
|
||||
from paddle.fluid.contrib.model_stat import summary
|
||||
|
||||
# quant dependencies
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddleslim.quant import quant_aware, convert
|
||||
from paddle.fluid.layer_helper import LayerHelper
|
||||
|
||||
|
||||
def main():
|
||||
train_build_outputs = program.build(
|
||||
config, train_program, startup_program, mode='train')
|
||||
train_loader = train_build_outputs[0]
|
||||
train_fetch_name_list = train_build_outputs[1]
|
||||
train_fetch_varname_list = train_build_outputs[2]
|
||||
train_opt_loss_name = train_build_outputs[3]
|
||||
model_average = train_build_outputs[-1]
|
||||
|
||||
eval_program = fluid.Program()
|
||||
eval_build_outputs = program.build(
|
||||
config, eval_program, startup_program, mode='eval')
|
||||
eval_fetch_name_list = eval_build_outputs[1]
|
||||
eval_fetch_varname_list = eval_build_outputs[2]
|
||||
eval_program = eval_program.clone(for_test=True)
|
||||
|
||||
train_reader = reader_main(config=config, mode="train")
|
||||
train_loader.set_sample_list_generator(train_reader, places=place)
|
||||
|
||||
eval_reader = reader_main(config=config, mode="eval")
|
||||
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_program)
|
||||
|
||||
def pact(x, name=None):
|
||||
helper = LayerHelper("pact", **locals())
|
||||
dtype = 'float32'
|
||||
init_thres = 20
|
||||
u_param_attr = fluid.ParamAttr(
|
||||
name=x.name + '_pact',
|
||||
initializer=fluid.initializer.ConstantInitializer(value=init_thres),
|
||||
regularizer=fluid.regularizer.L2Decay(0.0001),
|
||||
learning_rate=1)
|
||||
u_param = helper.create_parameter(
|
||||
attr=u_param_attr, shape=[1], dtype=dtype)
|
||||
x = fluid.layers.elementwise_sub(
|
||||
x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
|
||||
x = fluid.layers.elementwise_add(
|
||||
x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))
|
||||
return x
|
||||
|
||||
def get_optimizer():
|
||||
return fluid.optimizer.AdamOptimizer(0.001)
|
||||
|
||||
# 1. quantization configs
|
||||
quant_config = {
|
||||
# weight quantize type, default is 'channel_wise_abs_max'
|
||||
'weight_quantize_type': 'channel_wise_abs_max',
|
||||
# activation quantize type, default is 'moving_average_abs_max'
|
||||
'activation_quantize_type': 'moving_average_abs_max',
|
||||
# weight quantize bit num, default is 8
|
||||
'weight_bits': 8,
|
||||
# activation quantize bit num, default is 8
|
||||
'activation_bits': 8,
|
||||
# ops of name_scope in not_quant_pattern list, will not be quantized
|
||||
'not_quant_pattern': ['skip_quant'],
|
||||
# ops of type in quantize_op_types, will be quantized
|
||||
'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
|
||||
# data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
|
||||
'dtype': 'int8',
|
||||
# window size for 'range_abs_max' quantization. defaulf is 10000
|
||||
'window_size': 10000,
|
||||
# The decay coefficient of moving average, default is 0.9
|
||||
'moving_rate': 0.9,
|
||||
}
|
||||
|
||||
# 2. quantization transform programs (training aware)
|
||||
# Make some quantization transforms in the graph before training and testing.
|
||||
# According to the weight and activation quantization type, the graph will be added
|
||||
# some fake quantize operators and fake dequantize operators.
|
||||
act_preprocess_func = pact
|
||||
optimizer_func = get_optimizer
|
||||
executor = exe
|
||||
|
||||
eval_program = quant_aware(
|
||||
eval_program,
|
||||
place,
|
||||
quant_config,
|
||||
scope=None,
|
||||
act_preprocess_func=act_preprocess_func,
|
||||
optimizer_func=optimizer_func,
|
||||
executor=executor,
|
||||
for_test=True)
|
||||
quant_train_program = quant_aware(
|
||||
train_program,
|
||||
place,
|
||||
quant_config,
|
||||
scope=None,
|
||||
act_preprocess_func=act_preprocess_func,
|
||||
optimizer_func=optimizer_func,
|
||||
executor=executor,
|
||||
for_test=False,
|
||||
return_program=True)
|
||||
|
||||
# compile program for multi-devices
|
||||
train_compile_program = program.create_multi_devices_program(
|
||||
quant_train_program, train_opt_loss_name, for_quant=True)
|
||||
|
||||
# dump mode structure
|
||||
if config['Global']['debug']:
|
||||
if train_alg_type == 'rec' and 'attention' in config['Global'][
|
||||
'loss_type']:
|
||||
logger.warning('Does not suport dump attention...')
|
||||
else:
|
||||
summary(quant_train_program)
|
||||
|
||||
init_model(config, quant_train_program, exe)
|
||||
|
||||
train_info_dict = {'compile_program':train_compile_program,\
|
||||
'train_program':quant_train_program,\
|
||||
'reader':train_loader,\
|
||||
'fetch_name_list':train_fetch_name_list,\
|
||||
'fetch_varname_list':train_fetch_varname_list,\
|
||||
'model_average': model_average}
|
||||
|
||||
eval_info_dict = {'program':eval_program,\
|
||||
'reader':eval_reader,\
|
||||
'fetch_name_list':eval_fetch_name_list,\
|
||||
'fetch_varname_list':eval_fetch_varname_list}
|
||||
|
||||
if train_alg_type == 'det':
|
||||
program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
|
||||
else:
|
||||
program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
|
||||
|
||||
|
||||
def test_reader():
|
||||
logger.info(config)
|
||||
train_reader = reader_main(config=config, mode="train")
|
||||
import time
|
||||
starttime = time.time()
|
||||
count = 0
|
||||
try:
|
||||
for data in train_reader():
|
||||
count += 1
|
||||
if count % 1 == 0:
|
||||
batch_time = time.time() - starttime
|
||||
starttime = time.time()
|
||||
logger.info("reader:", count, len(data), batch_time)
|
||||
except Exception as e:
|
||||
logger.info(e)
|
||||
logger.info("finish reader: {}, Success!".format(count))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
startup_program, train_program, place, config, train_alg_type = program.preprocess(
|
||||
)
|
||||
main()
|
||||
# test_reader()
|
|
@ -67,6 +67,7 @@ class DetModel(object):
|
|||
|
||||
image = fluid.layers.data(
|
||||
name='image', shape=image_shape, dtype='float32')
|
||||
image.stop_gradient = False
|
||||
if mode == "train":
|
||||
if self.algorithm == "EAST":
|
||||
h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
|
||||
|
@ -108,7 +109,10 @@ class DetModel(object):
|
|||
name='tvo', shape=[9, 128, 128], dtype='float32')
|
||||
input_tco = fluid.layers.data(
|
||||
name='tco', shape=[3, 128, 128], dtype='float32')
|
||||
feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
|
||||
feed_list = [
|
||||
image, input_score, input_border, input_mask, input_tvo,
|
||||
input_tco
|
||||
]
|
||||
labels = {'input_score': input_score,\
|
||||
'input_border': input_border,\
|
||||
'input_mask': input_mask,\
|
||||
|
|
|
@ -68,6 +68,7 @@ class RecModel(object):
|
|||
image_shape.insert(0, -1)
|
||||
if mode == "train":
|
||||
image = fluid.data(name='image', shape=image_shape, dtype='float32')
|
||||
image.stop_gradient = False
|
||||
if self.loss_type == "attention":
|
||||
label_in = fluid.data(
|
||||
name='label_in',
|
||||
|
@ -146,6 +147,7 @@ class RecModel(object):
|
|||
)
|
||||
image_shape = deepcopy(self.image_shape)
|
||||
image = fluid.data(name='image', shape=image_shape, dtype='float32')
|
||||
image.stop_gradient = False
|
||||
if self.loss_type == "srn":
|
||||
encoder_word_pos = fluid.data(
|
||||
name="encoder_word_pos",
|
||||
|
|
|
@ -35,12 +35,13 @@ class CTCPredict(object):
|
|||
self.fc_decay = params.get("fc_decay", 0.0004)
|
||||
|
||||
def __call__(self, inputs, labels=None, mode=None):
|
||||
encoder_features = self.encoder(inputs)
|
||||
if self.encoder_type != "reshape":
|
||||
encoder_features = fluid.layers.concat(encoder_features, axis=1)
|
||||
name = "ctc_fc"
|
||||
para_attr, bias_attr = get_para_bias_attr(
|
||||
l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
|
||||
with fluid.scope_guard("skip_quant"):
|
||||
encoder_features = self.encoder(inputs)
|
||||
if self.encoder_type != "reshape":
|
||||
encoder_features = fluid.layers.concat(encoder_features, axis=1)
|
||||
name = "ctc_fc"
|
||||
para_attr, bias_attr = get_para_bias_attr(
|
||||
l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
|
||||
predict = fluid.layers.fc(input=encoder_features,
|
||||
size=self.char_num + 1,
|
||||
param_attr=para_attr,
|
||||
|
|
|
@ -225,10 +225,12 @@ def build_export(config, main_prog, startup_prog):
|
|||
return feeded_var_names, target_vars, fetches_var_name
|
||||
|
||||
|
||||
def create_multi_devices_program(program, loss_var_name):
|
||||
def create_multi_devices_program(program, loss_var_name, for_quant=False):
|
||||
build_strategy = fluid.BuildStrategy()
|
||||
build_strategy.memory_optimize = False
|
||||
build_strategy.enable_inplace = True
|
||||
if for_quant:
|
||||
build_strategy.fuse_all_reduce_ops = False
|
||||
exec_strategy = fluid.ExecutionStrategy()
|
||||
exec_strategy.num_iteration_per_drop_scope = 1
|
||||
compile_program = fluid.CompiledProgram(program).with_data_parallel(
|
||||
|
|
Loading…
Reference in New Issue