PaddleOCR/tools/infer/predict_system.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__dir__ = os.path.dirname(__file__)
sys.path.append(__dir__)
sys.path.append(os.path.join(__dir__, '../..'))

import tools.infer.utility as utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import tools.infer.predict_det as predict_det
import tools.infer.predict_rec as predict_rec
import copy
import numpy as np
import math
import time
from ppocr.utils.utility import get_image_file_list
from PIL import Image
from tools.infer.utility import draw_ocr
from tools.infer.utility import draw_ocr_box_txt


class TextSystem(object):
    def __init__(self, args):
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)

    def get_rotate_crop_image(self, img, points):
        img_height, img_width = img.shape[0:2]
        left = int(np.min(points[:, 0]))
        right = int(np.max(points[:, 0]))
        top = int(np.min(points[:, 1]))
        bottom = int(np.max(points[:, 1]))
        img_crop = img[top:bottom, left:right, :].copy()
        points[:, 0] = points[:, 0] - left
        points[:, 1] = points[:, 1] - top
        img_crop_width = int(np.linalg.norm(points[0] - points[1]))
        img_crop_height = int(np.linalg.norm(points[0] - points[3]))
        pts_std = np.float32([[0, 0], [img_crop_width, 0],\
            [img_crop_width, img_crop_height], [0, img_crop_height]])
        M = cv2.getPerspectiveTransform(points, pts_std)
        dst_img = cv2.warpPerspective(
            img_crop,
            M, (img_crop_width, img_crop_height),
            borderMode=cv2.BORDER_REPLICATE)
        dst_img_height, dst_img_width = dst_img.shape[0:2]
        if dst_img_height * 1.0 / dst_img_width >= 1.5:
            dst_img = np.rot90(dst_img)
        return dst_img

    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
            print(bno, rec_res[bno])

    def __call__(self, img):
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)
        if dt_boxes is None:
            return None, None
        img_crop_list = []

        dt_boxes = sorted_boxes(dt_boxes)

        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
            img_crop_list.append(img_crop)
        rec_res, elapse = self.text_recognizer(img_crop_list)
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
        return dt_boxes, rec_res


def sorted_boxes(dt_boxes):
    """
    Sort text boxes in order from top to bottom, left to right
    args:
        dt_boxes(array):detected text boxes with shape [4, 2]
    return:
        sorted boxes(array) with shape [4, 2]
    """
    num_boxes = dt_boxes.shape[0]
    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
    _boxes = list(sorted_boxes)

    for i in range(num_boxes - 1):
        if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \
            (_boxes[i + 1][0][0] < _boxes[i][0][0]):
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
    return _boxes


if __name__ == "__main__":
    args = utility.parse_args()
    image_file_list = get_image_file_list(args.image_dir)
    text_sys = TextSystem(args)
    is_visualize = True
    for image_file in image_file_list:
        img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        print("Predict time of %s: %.3fs" % (image_file, elapse))
        dt_num = len(dt_boxes)
        dt_boxes_final = []
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if score >= 0.5:
                text_str = "%s, %.3f" % (text, score)
                print(text_str)
                dt_boxes_final.append(dt_boxes[dno])

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

            draw_img = draw_ocr(
                image, boxes, txts, scores, draw_txt=True, drop_score=0.5)
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            print("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
在程序中使用os.path.append添加环境变量，不再使用命令设置 2020-06-11 09:45:01 +08:00			`import os`
			`import sys`
			`__dir__ = os.path.dirname(__file__)`
			`sys.path.append(__dir__)`
			`sys.path.append(os.path.join(__dir__, '../..'))`
discard export PYTHONPATH manually 2020-06-12 13:49:24 +08:00
			`import tools.infer.utility as utility`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`from ppocr.utils.utility import initial_logger`
			`logger = initial_logger()`
			`import cv2`
discard export PYTHONPATH manually 2020-06-12 13:49:24 +08:00			`import tools.infer.predict_det as predict_det`
			`import tools.infer.predict_rec as predict_rec`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`import copy`
			`import numpy as np`
			`import math`
			`import time`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`from ppocr.utils.utility import get_image_file_list`
			`from PIL import Image`
			`from tools.infer.utility import draw_ocr`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`from tools.infer.utility import draw_ocr_box_txt`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00

			`class TextSystem(object):`
			`def __init__(self, args):`
			`self.text_detector = predict_det.TextDetector(args)`
			`self.text_recognizer = predict_rec.TextRecognizer(args)`

			`def get_rotate_crop_image(self, img, points):`
			`img_height, img_width = img.shape[0:2]`
			`left = int(np.min(points[:, 0]))`
			`right = int(np.max(points[:, 0]))`
			`top = int(np.min(points[:, 1]))`
			`bottom = int(np.max(points[:, 1]))`
			`img_crop = img[top:bottom, left:right, :].copy()`
			`points[:, 0] = points[:, 0] - left`
			`points[:, 1] = points[:, 1] - top`
			`img_crop_width = int(np.linalg.norm(points[0] - points[1]))`
			`img_crop_height = int(np.linalg.norm(points[0] - points[3]))`
			`pts_std = np.float32([[0, 0], [img_crop_width, 0],\`
			`[img_crop_width, img_crop_height], [0, img_crop_height]])`
			`M = cv2.getPerspectiveTransform(points, pts_std)`
			`dst_img = cv2.warpPerspective(`
			`img_crop,`
			`M, (img_crop_width, img_crop_height),`
			`borderMode=cv2.BORDER_REPLICATE)`
			`dst_img_height, dst_img_width = dst_img.shape[0:2]`
			`if dst_img_height * 1.0 / dst_img_width >= 1.5:`
			`dst_img = np.rot90(dst_img)`
			`return dst_img`

			`def print_draw_crop_rec_res(self, img_crop_list, rec_res):`
			`bbox_num = len(img_crop_list)`
			`for bno in range(bbox_num):`
			`cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])`
			`print(bno, rec_res[bno])`

			`def __call__(self, img):`
			`ori_im = img.copy()`
			`dt_boxes, elapse = self.text_detector(img)`
			`if dt_boxes is None:`
			`return None, None`
			`img_crop_list = []`
add doc、infer_det.py、requirments.txt 2020-05-11 15:27:52 +08:00
			`dt_boxes = sorted_boxes(dt_boxes)`

upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`for bno in range(len(dt_boxes)):`
			`tmp_box = copy.deepcopy(dt_boxes[bno])`
			`img_crop = self.get_rotate_crop_image(ori_im, tmp_box)`
			`img_crop_list.append(img_crop)`
			`rec_res, elapse = self.text_recognizer(img_crop_list)`
add doc、infer_det.py、requirments.txt 2020-05-11 15:27:52 +08:00			`# self.print_draw_crop_rec_res(img_crop_list, rec_res)`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`return dt_boxes, rec_res`


add doc、infer_det.py、requirments.txt 2020-05-11 15:27:52 +08:00			`def sorted_boxes(dt_boxes):`
			`"""`
			`Sort text boxes in order from top to bottom, left to right`
			`args:`
update readme 2020-05-14 13:49:28 +08:00			`dt_boxes(array):detected text boxes with shape [4, 2]`
add doc、infer_det.py、requirments.txt 2020-05-11 15:27:52 +08:00			`return:`
			`sorted boxes(array) with shape [4, 2]`
			`"""`
			`num_boxes = dt_boxes.shape[0]`
modify batch num in rec and fix sorted_box func 2020-05-20 16:05:46 +08:00			`sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))`
add doc、infer_det.py、requirments.txt 2020-05-11 15:27:52 +08:00			`_boxes = list(sorted_boxes)`

			`for i in range(num_boxes - 1):`
			`if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \`
			`(_boxes[i + 1][0][0] < _boxes[i][0][0]):`
			`tmp = _boxes[i]`
			`_boxes[i] = _boxes[i + 1]`
			`_boxes[i + 1] = tmp`
			`return _boxes`


upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`if __name__ == "__main__":`
			`args = utility.parse_args()`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`image_file_list = get_image_file_list(args.image_dir)`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`text_sys = TextSystem(args)`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`is_visualize = True`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`for image_file in image_file_list:`
			`img = cv2.imread(image_file)`
			`if img is None:`
			`logger.info("error in loading image:{}".format(image_file))`
			`continue`
			`starttime = time.time()`
			`dt_boxes, rec_res = text_sys(img)`
			`elapse = time.time() - starttime`
			`print("Predict time of %s: %.3fs" % (image_file, elapse))`
			`dt_num = len(dt_boxes)`
			`dt_boxes_final = []`
			`for dno in range(dt_num):`
			`text, score = rec_res[dno]`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`if score >= 0.5:`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`text_str = "%s, %.3f" % (text, score)`
			`print(text_str)`
			`dt_boxes_final.append(dt_boxes[dno])`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00
			`if is_visualize:`
			`image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))`
			`boxes = dt_boxes`
			`txts = [rec_res[i][0] for i in range(len(rec_res))]`
			`scores = [rec_res[i][1] for i in range(len(rec_res))]`

			`draw_img = draw_ocr(`
			`image, boxes, txts, scores, draw_txt=True, drop_score=0.5)`
change the saved path of visualized image 2020-05-14 14:25:00 +08:00			`draw_img_save = "./inference_results/"`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`if not os.path.exists(draw_img_save):`
			`os.makedirs(draw_img_save)`
			`cv2.imwrite(`
			`os.path.join(draw_img_save, os.path.basename(image_file)),`
Update predict_system.py 2020-05-14 14:43:20 +08:00			`draw_img[:, :, ::-1])`
change the saved path of visualized image 2020-05-14 14:25:00 +08:00			`print("The visualized image saved in {}".format(`
			`os.path.join(draw_img_save, os.path.basename(image_file))))`