PaddleOCR/tools/infer_rec.py

141 lines
4.7 KiB
Python
Raw Normal View History

2020-05-10 16:26:57 +08:00
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
2020-06-12 13:49:24 +08:00
import os
import sys
__dir__ = os.path.dirname(__file__)
sys.path.append(__dir__)
sys.path.append(os.path.join(__dir__, '..'))
2020-05-10 16:26:57 +08:00
2020-06-02 19:03:27 +08:00
2020-05-10 16:26:57 +08:00
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
2020-06-12 13:49:24 +08:00
import tools.program as program
2020-05-10 16:26:57 +08:00
from paddle import fluid
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.data.reader_main import reader_main
from ppocr.utils.save_load import init_model
from ppocr.utils.character import CharacterOps
from ppocr.utils.utility import create_module
2020-05-12 20:51:28 +08:00
from ppocr.utils.utility import get_image_file_list
2020-05-10 16:26:57 +08:00
def main():
config = program.load_config(FLAGS.config)
program.merge_config(FLAGS.opt)
logger.info(config)
char_ops = CharacterOps(config['Global'])
2020-06-03 13:44:07 +08:00
loss_type = config['Global']['loss_type']
2020-05-10 16:26:57 +08:00
config['Global']['char_ops'] = char_ops
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
# check_gpu(use_gpu)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
rec_model = create_module(config['Architecture']['function'])(params=config)
startup_prog = fluid.Program()
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
_, outputs = rec_model(mode="test")
fetch_name_list = list(outputs.keys())
fetch_varname_list = [outputs[v].name for v in fetch_name_list]
eval_prog = eval_prog.clone(for_test=True)
exe.run(startup_prog)
init_model(config, eval_prog, exe)
2020-05-11 17:52:43 +08:00
blobs = reader_main(config, 'test')()
2020-06-02 19:03:27 +08:00
infer_img = config['Global']['infer_img']
2020-05-12 19:55:16 +08:00
infer_list = get_image_file_list(infer_img)
2020-05-11 17:52:43 +08:00
max_img_num = len(infer_list)
if len(infer_list) == 0:
logger.info("Can not find img in infer_img dir.")
for i in range(max_img_num):
2020-06-03 13:44:07 +08:00
print("infer_img:%s" % infer_list[i])
2020-05-11 17:52:43 +08:00
img = next(blobs)
2020-05-10 16:26:57 +08:00
predict = exe.run(program=eval_prog,
feed={"image": img},
fetch_list=fetch_varname_list,
return_numpy=False)
2020-06-02 15:53:02 +08:00
if loss_type == "ctc":
preds = np.array(predict[0])
2020-05-10 16:26:57 +08:00
preds = preds.reshape(-1)
preds_lod = predict[0].lod()[0]
preds_text = char_ops.decode(preds)
2020-06-02 15:53:02 +08:00
probs = np.array(predict[1])
ind = np.argmax(probs, axis=1)
blank = probs.shape[1]
valid_ind = np.where(ind != (blank - 1))[0]
2020-07-01 13:30:03 +08:00
if len(valid_ind) == 0:
continue
2020-06-02 15:53:02 +08:00
score = np.mean(probs[valid_ind, ind[valid_ind]])
elif loss_type == "attention":
preds = np.array(predict[0])
probs = np.array(predict[1])
2020-05-10 16:26:57 +08:00
end_pos = np.where(preds[0, :] == 1)[0]
if len(end_pos) <= 1:
2020-06-02 15:53:02 +08:00
preds = preds[0, 1:]
score = np.mean(probs[0, 1:])
2020-05-10 16:26:57 +08:00
else:
2020-06-02 15:53:02 +08:00
preds = preds[0, 1:end_pos[1]]
score = np.mean(probs[0, 1:end_pos[1]])
preds = preds.reshape(-1)
preds_text = char_ops.decode(preds)
2020-05-10 16:26:57 +08:00
2020-06-02 19:03:27 +08:00
print("\t index:", preds)
print("\t word :", preds_text)
2020-06-02 15:53:02 +08:00
print("\t score :", score)
2020-05-10 16:26:57 +08:00
# save for inference model
target_var = []
for key, values in outputs.items():
target_var.append(values)
fluid.io.save_inference_model(
"./output/",
feeded_var_names=['image'],
target_vars=target_var,
executor=exe,
main_program=eval_prog,
model_filename="model",
params_filename="params")
if __name__ == '__main__':
parser = program.ArgsParser()
FLAGS = parser.parse_args()
main()