PaddleOCR/tools/infer/predict_det.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
from ppocr.data.det.east_process import EASTProcessTest
from ppocr.data.det.db_process import DBProcessTest
from ppocr.postprocess.db_postprocess import DBPostProcess
from ppocr.postprocess.east_postprocess import EASTPostPocess
import copy
import numpy as np
import math
import time


class TextDetector(object):
    def __init__(self, args):
        max_side_len = args.det_max_side_len
        self.det_algorithm = args.det_algorithm
        preprocess_params = {'max_side_len': max_side_len}
        postprocess_params = {}
        if self.det_algorithm == "DB":
            self.preprocess_op = DBProcessTest(preprocess_params)
            postprocess_params["thresh"] = args.det_db_thresh
            postprocess_params["box_thresh"] = args.det_db_box_thresh
            postprocess_params["max_candidates"] = 1000
            self.postprocess_op = DBPostProcess(postprocess_params)
        elif self.det_algorithm == "EAST":
            self.preprocess_op = EASTProcessTest(preprocess_params)
            postprocess_params["score_thresh"] = args.det_east_score_thresh
            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
            self.postprocess_op = EASTPostPocess(postprocess_params)
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)

        self.predictor, self.input_tensor, self.output_tensors =\
            utility.create_predictor(args, mode="det")

    def order_points_clockwise(self, pts):
        #######
        ## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
        ########
        # sort the points based on their x-coordinates
        xSorted = pts[np.argsort(pts[:, 0]), :]

        # grab the left-most and right-most points from the sorted
        # x-roodinate points
        leftMost = xSorted[:2, :]
        rightMost = xSorted[2:, :]

        # now, sort the left-most coordinates according to their
        # y-coordinates so we can grab the top-left and bottom-left
        # points, respectively
        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
        (tl, bl) = leftMost

        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
        (tr, br) = rightMost

        rect = np.array([tl, tr, br, bl], dtype="float32")
        return rect

    def expand_det_res(self, points, bbox_height, bbox_width, img_height,
                       img_width):
        if bbox_height * 1.0 / bbox_width >= 2.0:
            expand_w = bbox_width * 0.20
            expand_h = bbox_width * 0.20
        elif bbox_width * 1.0 / bbox_height >= 3.0:
            expand_w = bbox_height * 0.20
            expand_h = bbox_height * 0.20
        else:
            expand_w = bbox_height * 0.1
            expand_h = bbox_height * 0.1

        points[0, 0] = int(max((points[0, 0] - expand_w), 0))
        points[1, 0] = int(min((points[1, 0] + expand_w), img_width))
        points[3, 0] = int(max((points[3, 0] - expand_w), 0))
        points[2, 0] = int(min((points[2, 0] + expand_w), img_width))

        points[0, 1] = int(max((points[0, 1] - expand_h), 0))
        points[1, 1] = int(max((points[1, 1] - expand_h), 0))
        points[3, 1] = int(min((points[3, 1] + expand_h), img_height))
        points[2, 1] = int(min((points[2, 1] + expand_h), img_height))
        return points

    def filter_tag_det_res(self, dt_boxes, image_shape):
        img_height, img_width = image_shape[0:2]
        dt_boxes_new = []
        for box in dt_boxes:
            box = self.order_points_clockwise(box)
            left = int(np.min(box[:, 0]))
            right = int(np.max(box[:, 0]))
            top = int(np.min(box[:, 1]))
            bottom = int(np.max(box[:, 1]))
            bbox_height = bottom - top
            bbox_width = right - left
            diffh = math.fabs(box[0, 1] - box[1, 1])
            diffw = math.fabs(box[0, 0] - box[3, 0])
            rect_width = int(np.linalg.norm(box[0] - box[1]))
            rect_height = int(np.linalg.norm(box[0] - box[3]))
            if rect_width <= 10 or rect_height <= 10:
                continue
            if diffh <= 10 and diffw <= 10:
                box = self.expand_det_res(
                    copy.deepcopy(box), bbox_height, bbox_width, img_height,
                    img_width)
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
        return dt_boxes

    def __call__(self, img):
        ori_im = img.copy()
        im, ratio_list = self.preprocess_op(img)
        if im is None:
            return None, 0
        im = im.copy()
        starttime = time.time()
        self.input_tensor.copy_from_cpu(im)
        self.predictor.zero_copy_run()
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)
        outs_dict = {}
        if self.det_algorithm == "EAST":
            outs_dict['f_score'] = outputs[0]
            outs_dict['f_geo'] = outputs[1]
        else:
            outs_dict['maps'] = [outputs[0]]
        dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])
        dt_boxes = dt_boxes_list[0]
        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
        elapse = time.time() - starttime
        return dt_boxes, elapse


if __name__ == "__main__":
    args = utility.parse_args()
    image_file_list = utility.get_image_file_list(args.image_dir)
    text_detector = TextDetector(args)
    count = 0
    total_time = 0
    for image_file in image_file_list:
        img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        dt_boxes, elapse = text_detector(img)
        if count > 0:
            total_time += elapse
        count += 1
        print("Predict time of %s:" % image_file, elapse)
        utility.draw_text_det_res(dt_boxes, image_file)
    print("Avg Time:", total_time / (count - 1))
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import utility`
			`from ppocr.utils.utility import initial_logger`
			`logger = initial_logger()`
			`import cv2`
			`from ppocr.data.det.east_process import EASTProcessTest`
			`from ppocr.data.det.db_process import DBProcessTest`
			`from ppocr.postprocess.db_postprocess import DBPostProcess`
			`from ppocr.postprocess.east_postprocess import EASTPostPocess`
			`import copy`
			`import numpy as np`
			`import math`
			`import time`


			`class TextDetector(object):`
			`def __init__(self, args):`
			`max_side_len = args.det_max_side_len`
			`self.det_algorithm = args.det_algorithm`
			`preprocess_params = {'max_side_len': max_side_len}`
			`postprocess_params = {}`
			`if self.det_algorithm == "DB":`
			`self.preprocess_op = DBProcessTest(preprocess_params)`
			`postprocess_params["thresh"] = args.det_db_thresh`
			`postprocess_params["box_thresh"] = args.det_db_box_thresh`
			`postprocess_params["max_candidates"] = 1000`
			`self.postprocess_op = DBPostProcess(postprocess_params)`
			`elif self.det_algorithm == "EAST":`
			`self.preprocess_op = EASTProcessTest(preprocess_params)`
			`postprocess_params["score_thresh"] = args.det_east_score_thresh`
			`postprocess_params["cover_thresh"] = args.det_east_cover_thresh`
			`postprocess_params["nms_thresh"] = args.det_east_nms_thresh`
			`self.postprocess_op = EASTPostPocess(postprocess_params)`
			`else:`
			`logger.info("unknown det_algorithm:{}".format(self.det_algorithm))`
			`sys.exit(0)`

			`self.predictor, self.input_tensor, self.output_tensors =\`
			`utility.create_predictor(args, mode="det")`

			`def order_points_clockwise(self, pts):`
			`#######`
			`## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py`
			`########`
			`# sort the points based on their x-coordinates`
			`xSorted = pts[np.argsort(pts[:, 0]), :]`

			`# grab the left-most and right-most points from the sorted`
			`# x-roodinate points`
			`leftMost = xSorted[:2, :]`
			`rightMost = xSorted[2:, :]`

			`# now, sort the left-most coordinates according to their`
			`# y-coordinates so we can grab the top-left and bottom-left`
			`# points, respectively`
			`leftMost = leftMost[np.argsort(leftMost[:, 1]), :]`
			`(tl, bl) = leftMost`

			`rightMost = rightMost[np.argsort(rightMost[:, 1]), :]`
			`(tr, br) = rightMost`

			`rect = np.array([tl, tr, br, bl], dtype="float32")`
			`return rect`

			`def expand_det_res(self, points, bbox_height, bbox_width, img_height,`
			`img_width):`
			`if bbox_height * 1.0 / bbox_width >= 2.0:`
			`expand_w = bbox_width * 0.20`
			`expand_h = bbox_width * 0.20`
			`elif bbox_width * 1.0 / bbox_height >= 3.0:`
			`expand_w = bbox_height * 0.20`
			`expand_h = bbox_height * 0.20`
			`else:`
			`expand_w = bbox_height * 0.1`
			`expand_h = bbox_height * 0.1`

			`points[0, 0] = int(max((points[0, 0] - expand_w), 0))`
			`points[1, 0] = int(min((points[1, 0] + expand_w), img_width))`
			`points[3, 0] = int(max((points[3, 0] - expand_w), 0))`
			`points[2, 0] = int(min((points[2, 0] + expand_w), img_width))`

			`points[0, 1] = int(max((points[0, 1] - expand_h), 0))`
			`points[1, 1] = int(max((points[1, 1] - expand_h), 0))`
			`points[3, 1] = int(min((points[3, 1] + expand_h), img_height))`
			`points[2, 1] = int(min((points[2, 1] + expand_h), img_height))`
			`return points`

			`def filter_tag_det_res(self, dt_boxes, image_shape):`
			`img_height, img_width = image_shape[0:2]`
			`dt_boxes_new = []`
			`for box in dt_boxes:`
			`box = self.order_points_clockwise(box)`
			`left = int(np.min(box[:, 0]))`
			`right = int(np.max(box[:, 0]))`
			`top = int(np.min(box[:, 1]))`
			`bottom = int(np.max(box[:, 1]))`
			`bbox_height = bottom - top`
			`bbox_width = right - left`
			`diffh = math.fabs(box[0, 1] - box[1, 1])`
			`diffw = math.fabs(box[0, 0] - box[3, 0])`
			`rect_width = int(np.linalg.norm(box[0] - box[1]))`
			`rect_height = int(np.linalg.norm(box[0] - box[3]))`
			`if rect_width <= 10 or rect_height <= 10:`
			`continue`
			`if diffh <= 10 and diffw <= 10:`
			`box = self.expand_det_res(`
			`copy.deepcopy(box), bbox_height, bbox_width, img_height,`
			`img_width)`
			`dt_boxes_new.append(box)`
			`dt_boxes = np.array(dt_boxes_new)`
			`return dt_boxes`

			`def __call__(self, img):`
			`ori_im = img.copy()`
			`im, ratio_list = self.preprocess_op(img)`
			`if im is None:`
			`return None, 0`
			`im = im.copy()`
			`starttime = time.time()`
			`self.input_tensor.copy_from_cpu(im)`
			`self.predictor.zero_copy_run()`
			`outputs = []`
			`for output_tensor in self.output_tensors:`
			`output = output_tensor.copy_to_cpu()`
			`outputs.append(output)`
			`outs_dict = {}`
			`if self.det_algorithm == "EAST":`
			`outs_dict['f_score'] = outputs[0]`
			`outs_dict['f_geo'] = outputs[1]`
			`else:`
			`outs_dict['maps'] = [outputs[0]]`
			`dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])`
			`dt_boxes = dt_boxes_list[0]`
			`dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)`
			`elapse = time.time() - starttime`
			`return dt_boxes, elapse`


			`if __name__ == "__main__":`
			`args = utility.parse_args()`
			`image_file_list = utility.get_image_file_list(args.image_dir)`
			`text_detector = TextDetector(args)`
			`count = 0`
			`total_time = 0`
			`for image_file in image_file_list:`
			`img = cv2.imread(image_file)`
			`if img is None:`
			`logger.info("error in loading image:{}".format(image_file))`
			`continue`
			`dt_boxes, elapse = text_detector(img)`
			`if count > 0:`
			`total_time += elapse`
			`count += 1`
			`print("Predict time of %s:" % image_file, elapse)`
			`utility.draw_text_det_res(dt_boxes, image_file)`
			`print("Avg Time:", total_time / (count - 1))`