PaddleOCR/tools/infer/predict_system.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import predict_det
import predict_rec
import copy
import numpy as np
import math
import time


class TextSystem(object):
    def __init__(self, args):
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)

    def get_rotate_crop_image(self, img, points):
        img_height, img_width = img.shape[0:2]
        left = int(np.min(points[:, 0]))
        right = int(np.max(points[:, 0]))
        top = int(np.min(points[:, 1]))
        bottom = int(np.max(points[:, 1]))
        img_crop = img[top:bottom, left:right, :].copy()
        points[:, 0] = points[:, 0] - left
        points[:, 1] = points[:, 1] - top
        img_crop_width = int(np.linalg.norm(points[0] - points[1]))
        img_crop_height = int(np.linalg.norm(points[0] - points[3]))
        pts_std = np.float32([[0, 0], [img_crop_width, 0],\
            [img_crop_width, img_crop_height], [0, img_crop_height]])
        M = cv2.getPerspectiveTransform(points, pts_std)
        dst_img = cv2.warpPerspective(
            img_crop,
            M, (img_crop_width, img_crop_height),
            borderMode=cv2.BORDER_REPLICATE)
        dst_img_height, dst_img_width = dst_img.shape[0:2]
        if dst_img_height * 1.0 / dst_img_width >= 1.5:
            dst_img = np.rot90(dst_img)
        return dst_img

    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
            print(bno, rec_res[bno])

    def __call__(self, img):
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)
        if dt_boxes is None:
            return None, None
        img_crop_list = []
        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
            img_crop_list.append(img_crop)
        rec_res, elapse = self.text_recognizer(img_crop_list)
        #         self.print_draw_crop_rec_res(img_crop_list, rec_res)
        return dt_boxes, rec_res


if __name__ == "__main__":
    args = utility.parse_args()
    image_file_list = utility.get_image_file_list(args.image_dir)
    text_sys = TextSystem(args)
    for image_file in image_file_list:
        img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        print("Predict time of %s: %.3fs" % (image_file, elapse))
        dt_num = len(dt_boxes)
        dt_boxes_final = []
        for dno in range(dt_num):
            text, score = rec_res[dno]
            if score >= 0:
                text_str = "%s, %.3f" % (text, score)
                print(text_str)
                dt_boxes_final.append(dt_boxes[dno])
        utility.draw_text_det_res(dt_boxes_final, image_file)
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import utility`
			`from ppocr.utils.utility import initial_logger`
			`logger = initial_logger()`
			`import cv2`
			`import predict_det`
			`import predict_rec`
			`import copy`
			`import numpy as np`
			`import math`
			`import time`


			`class TextSystem(object):`
			`def __init__(self, args):`
			`self.text_detector = predict_det.TextDetector(args)`
			`self.text_recognizer = predict_rec.TextRecognizer(args)`

			`def get_rotate_crop_image(self, img, points):`
			`img_height, img_width = img.shape[0:2]`
			`left = int(np.min(points[:, 0]))`
			`right = int(np.max(points[:, 0]))`
			`top = int(np.min(points[:, 1]))`
			`bottom = int(np.max(points[:, 1]))`
			`img_crop = img[top:bottom, left:right, :].copy()`
			`points[:, 0] = points[:, 0] - left`
			`points[:, 1] = points[:, 1] - top`
			`img_crop_width = int(np.linalg.norm(points[0] - points[1]))`
			`img_crop_height = int(np.linalg.norm(points[0] - points[3]))`
			`pts_std = np.float32([[0, 0], [img_crop_width, 0],\`
			`[img_crop_width, img_crop_height], [0, img_crop_height]])`
			`M = cv2.getPerspectiveTransform(points, pts_std)`
			`dst_img = cv2.warpPerspective(`
			`img_crop,`
			`M, (img_crop_width, img_crop_height),`
			`borderMode=cv2.BORDER_REPLICATE)`
			`dst_img_height, dst_img_width = dst_img.shape[0:2]`
			`if dst_img_height * 1.0 / dst_img_width >= 1.5:`
			`dst_img = np.rot90(dst_img)`
			`return dst_img`

			`def print_draw_crop_rec_res(self, img_crop_list, rec_res):`
			`bbox_num = len(img_crop_list)`
			`for bno in range(bbox_num):`
			`cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])`
			`print(bno, rec_res[bno])`

			`def __call__(self, img):`
			`ori_im = img.copy()`
			`dt_boxes, elapse = self.text_detector(img)`
			`if dt_boxes is None:`
			`return None, None`
			`img_crop_list = []`
			`for bno in range(len(dt_boxes)):`
			`tmp_box = copy.deepcopy(dt_boxes[bno])`
			`img_crop = self.get_rotate_crop_image(ori_im, tmp_box)`
			`img_crop_list.append(img_crop)`
			`rec_res, elapse = self.text_recognizer(img_crop_list)`
			`# self.print_draw_crop_rec_res(img_crop_list, rec_res)`
			`return dt_boxes, rec_res`


			`if __name__ == "__main__":`
			`args = utility.parse_args()`
			`image_file_list = utility.get_image_file_list(args.image_dir)`
			`text_sys = TextSystem(args)`
			`for image_file in image_file_list:`
			`img = cv2.imread(image_file)`
			`if img is None:`
			`logger.info("error in loading image:{}".format(image_file))`
			`continue`
			`starttime = time.time()`
			`dt_boxes, rec_res = text_sys(img)`
			`elapse = time.time() - starttime`
			`print("Predict time of %s: %.3fs" % (image_file, elapse))`
			`dt_num = len(dt_boxes)`
			`dt_boxes_final = []`
			`for dno in range(dt_num):`
			`text, score = rec_res[dno]`
			`if score >= 0:`
			`text_str = "%s, %.3f" % (text, score)`
			`print(text_str)`
			`dt_boxes_final.append(dt_boxes[dno])`
			`utility.draw_text_det_res(dt_boxes_final, image_file)`