PaddleOCR/tools/infer/utility.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os, sys
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import create_paddle_predictor
import cv2
import numpy as np
import json
from PIL import Image, ImageDraw, ImageFont
import math


def parse_args():
    def str2bool(v):
        return v.lower() in ("true", "t", "1")

    parser = argparse.ArgumentParser()
    #params for prediction engine
    parser.add_argument("--use_gpu", type=str2bool, default=True)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
    parser.add_argument("--gpu_mem", type=int, default=8000)

    #params for text detector
    parser.add_argument("--image_dir", type=str)
    parser.add_argument("--det_algorithm", type=str, default='DB')
    parser.add_argument("--det_model_dir", type=str)
    parser.add_argument("--det_max_side_len", type=float, default=960)

    #DB parmas
    parser.add_argument("--det_db_thresh", type=float, default=0.3)
    parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
    parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)

    #EAST parmas
    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)

    #params for text recognizer
    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
    parser.add_argument("--rec_model_dir", type=str)
    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
    parser.add_argument("--rec_char_type", type=str, default='ch')
    parser.add_argument("--rec_batch_num", type=int, default=30)
    parser.add_argument(
        "--rec_char_dict_path",
        type=str,
        default="./ppocr/utils/ppocr_keys_v1.txt")
    parser.add_argument("--use_space_char", type=bool, default=True)
    return parser.parse_args()


def create_predictor(args, mode):
    if mode == "det":
        model_dir = args.det_model_dir
    else:
        model_dir = args.rec_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        # config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(4)
    #config.enable_memory_optim()
    config.disable_glog_info()

    # use zero copy
    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_tensor(input_names[0])
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors


def draw_text_det_res(dt_boxes, img_path):
    src_im = cv2.imread(img_path)
    for box in dt_boxes:
        box = np.array(box).astype(np.int32).reshape(-1, 2)
        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
    return src_im


def resize_img(img, input_size=600):
    """
    resize img and limit the longest side of the image to input_size
    """
    img = np.array(img)
    im_shape = img.shape
    im_size_max = np.max(im_shape[0:2])
    im_scale = float(input_size) / float(im_size_max)
    im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
    return im


def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5):
    """
    Visualize the results of OCR detection and recognition
    args:
        image(Image|array): RGB image
        boxes(list): boxes with shape(N, 4, 2)
        txts(list): the texts
        scores(list): txxs corresponding scores
        draw_txt(bool): whether draw text or not
        drop_score(float): only scores greater than drop_threshold will be visualized
    return(array):
        the visualized img
    """
    if scores is None:
        scores = [1] * len(boxes)
    for (box, score) in zip(boxes, scores):
        if score < drop_score or math.isnan(score):
            continue
        box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
        image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)

    if draw_txt:
        img = np.array(resize_img(image, input_size=600))
        txt_img = text_visual(
            txts, scores, img_h=img.shape[0], img_w=600, threshold=drop_score)
        img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
        return img
    return image


def draw_ocr_box_txt(image, boxes, txts):
    h, w = image.height, image.width
    img_left = image.copy()
    img_right = Image.new('RGB', (w, h), (255, 255, 255))

    import random
    # 每次使用相同的随机种子 ，可以保证两次颜色一致
    random.seed(0)
    draw_left = ImageDraw.Draw(img_left)
    draw_right = ImageDraw.Draw(img_right)
    for (box, txt) in zip(boxes, txts):
        color = (random.randint(0, 255), random.randint(0, 255),
                 random.randint(0, 255))
        draw_left.polygon(box, fill=color)
        draw_right.polygon(
            [
                box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
                box[2][1], box[3][0], box[3][1]
            ],
            outline=color)
        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
            1])**2)
        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
            1])**2)
        if box_height > 2 * box_width:
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(
                "./doc/simfang.ttf", font_size, encoding="utf-8")
            cur_y = box[0][1]
            for c in txt:
                char_size = font.getsize(c)
                draw_right.text(
                    (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
                cur_y += char_size[1]
        else:
            font_size = max(int(box_height * 0.8), 10)
            font = ImageFont.truetype(
                "./doc/simfang.ttf", font_size, encoding="utf-8")
            draw_right.text(
                [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
    img_left = Image.blend(image, img_left, 0.5)
    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
    img_show.paste(img_left, (0, 0, w, h))
    img_show.paste(img_right, (w, 0, w * 2, h))
    return np.array(img_show)


def str_count(s):
    """
    Count the number of Chinese characters,
    a single English character and a single number
    equal to half the length of Chinese characters.

    args:
        s(string): the input of string
    return(int):
        the number of Chinese characters
    """
    import string
    count_zh = count_pu = 0
    s_len = len(s)
    en_dg_count = 0
    for c in s:
        if c in string.ascii_letters or c.isdigit() or c.isspace():
            en_dg_count += 1
        elif c.isalpha():
            count_zh += 1
        else:
            count_pu += 1
    return s_len - math.ceil(en_dg_count / 2)


def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.):
    """
    create new blank img and draw txt on it
    args:
        texts(list): the text will be draw
        scores(list|None): corresponding score of each txt
        img_h(int): the height of blank img
        img_w(int): the width of blank img
    return(array):

    """
    if scores is not None:
        assert len(texts) == len(
            scores), "The number of txts and corresponding scores must match"

    def create_blank_img():
        blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
        blank_img[:, img_w - 1:] = 0
        blank_img = Image.fromarray(blank_img).convert("RGB")
        draw_txt = ImageDraw.Draw(blank_img)
        return blank_img, draw_txt

    blank_img, draw_txt = create_blank_img()

    font_size = 20
    txt_color = (0, 0, 0)
    font = ImageFont.truetype("./doc/simfang.ttf", font_size, encoding="utf-8")

    gap = font_size + 5
    txt_img_list = []
    count, index = 1, 0
    for idx, txt in enumerate(texts):
        index += 1
        if scores[idx] < threshold or math.isnan(scores[idx]):
            index -= 1
            continue
        first_line = True
        while str_count(txt) >= img_w // font_size - 4:
            tmp = txt
            txt = tmp[:img_w // font_size - 4]
            if first_line:
                new_txt = str(index) + ': ' + txt
                first_line = False
            else:
                new_txt = '    ' + txt
            draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
            txt = tmp[img_w // font_size - 4:]
            if count >= img_h // gap - 1:
                txt_img_list.append(np.array(blank_img))
                blank_img, draw_txt = create_blank_img()
                count = 0
            count += 1
        if first_line:
            new_txt = str(index) + ': ' + txt + '   ' + '%.3f' % (scores[idx])
        else:
            new_txt = "  " + txt + "  " + '%.3f' % (scores[idx])
        draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
        # whether add new blank img or not
        if count >= img_h // gap - 1 and idx + 1 < len(texts):
            txt_img_list.append(np.array(blank_img))
            blank_img, draw_txt = create_blank_img()
            count = 0
        count += 1
    txt_img_list.append(np.array(blank_img))
    if len(txt_img_list) == 1:
        blank_img = np.array(txt_img_list[0])
    else:
        blank_img = np.concatenate(txt_img_list, axis=1)
    return np.array(blank_img)


def base64_to_cv2(b64str):
    import base64
    data = base64.b64decode(b64str.encode('utf8'))
    data = np.fromstring(data, np.uint8)
    data = cv2.imdecode(data, cv2.IMREAD_COLOR)
    return data


def draw_boxes(image, boxes, scores=None, drop_score=0.5):
    if scores is None:
        scores = [1] * len(boxes)
    for (box, score) in zip(boxes, scores):
        if score < drop_score:
            continue
        box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
        image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
    return image


if __name__ == '__main__':
    test_img = "./doc/test_v2"
    predict_txt = "./doc/predict.txt"
    f = open(predict_txt, 'r')
    data = f.readlines()
    img_path, anno = data[0].strip().split('\t')
    img_name = os.path.basename(img_path)
    img_path = os.path.join(test_img, img_name)
    image = Image.open(img_path)

    data = json.loads(anno)
    boxes, txts, scores = [], [], []
    for dic in data:
        boxes.append(dic['points'])
        txts.append(dic['transcription'])
        scores.append(round(dic['scores'], 3))

    new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True)

    cv2.imwrite(img_name, new_img)
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import argparse`
			`import os, sys`
			`from ppocr.utils.utility import initial_logger`
			`logger = initial_logger()`
			`from paddle.fluid.core import PaddleTensor`
			`from paddle.fluid.core import AnalysisConfig`
			`from paddle.fluid.core import create_paddle_predictor`
			`import cv2`
			`import numpy as np`
add visulize code 2020-05-13 20:29:45 +08:00			`import json`
			`from PIL import Image, ImageDraw, ImageFont`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`import math`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00

			`def parse_args():`
			`def str2bool(v):`
			`return v.lower() in ("true", "t", "1")`

			`parser = argparse.ArgumentParser()`
			`#params for prediction engine`
			`parser.add_argument("--use_gpu", type=str2bool, default=True)`
			`parser.add_argument("--ir_optim", type=str2bool, default=True)`
			`parser.add_argument("--use_tensorrt", type=str2bool, default=False)`
			`parser.add_argument("--gpu_mem", type=int, default=8000)`

			`#params for text detector`
			`parser.add_argument("--image_dir", type=str)`
			`parser.add_argument("--det_algorithm", type=str, default='DB')`
			`parser.add_argument("--det_model_dir", type=str)`
			`parser.add_argument("--det_max_side_len", type=float, default=960)`

			`#DB parmas`
			`parser.add_argument("--det_db_thresh", type=float, default=0.3)`
			`parser.add_argument("--det_db_box_thresh", type=float, default=0.5)`
fix predict_det not found unclip_ratio 2020-05-25 18:14:13 +08:00			`parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00
			`#EAST parmas`
			`parser.add_argument("--det_east_score_thresh", type=float, default=0.8)`
			`parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)`
			`parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)`

			`#params for text recognizer`
			`parser.add_argument("--rec_algorithm", type=str, default='CRNN')`
			`parser.add_argument("--rec_model_dir", type=str)`
			`parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")`
			`parser.add_argument("--rec_char_type", type=str, default='ch')`
modify batch num in rec and fix sorted_box func 2020-05-20 16:19:49 +08:00			`parser.add_argument("--rec_batch_num", type=int, default=30)`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`parser.add_argument(`
			`"--rec_char_dict_path",`
			`type=str,`
			`default="./ppocr/utils/ppocr_keys_v1.txt")`
change name for use_space_char 2020-07-07 14:29:45 +08:00			`parser.add_argument("--use_space_char", type=bool, default=True)`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`return parser.parse_args()`


			`def create_predictor(args, mode):`
			`if mode == "det":`
			`model_dir = args.det_model_dir`
			`else:`
			`model_dir = args.rec_model_dir`

			`if model_dir is None:`
			`logger.info("not find {} model file path {}".format(mode, model_dir))`
			`sys.exit(0)`
			`model_file_path = model_dir + "/model"`
			`params_file_path = model_dir + "/params"`
			`if not os.path.exists(model_file_path):`
			`logger.info("not find model file path {}".format(model_file_path))`
			`sys.exit(0)`
			`if not os.path.exists(params_file_path):`
			`logger.info("not find params file path {}".format(params_file_path))`
			`sys.exit(0)`

			`config = AnalysisConfig(model_file_path, params_file_path)`

			`if args.use_gpu:`
			`config.enable_use_gpu(args.gpu_mem, 0)`
			`else:`
			`config.disable_gpu()`
move out visulization from hubserving 2020-07-13 17:25:30 +08:00			`# config.enable_mkldnn()`
opt cpu speed and fix wrong link of weights 2020-07-09 20:29:33 +08:00			`config.set_cpu_math_library_num_threads(4)`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`#config.enable_memory_optim()`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`config.disable_glog_info()`
fix tools/infer/utility.py 2020-05-11 15:33:54 +08:00
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`# use zero copy`
solve predicts diff between cpu and gpu running 2020-05-15 17:32:32 +08:00			`config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`config.switch_use_feed_fetch_ops(False)`
			`predictor = create_paddle_predictor(config)`
			`input_names = predictor.get_input_names()`
			`input_tensor = predictor.get_input_tensor(input_names[0])`
			`output_names = predictor.get_output_names()`
			`output_tensors = []`
			`for output_name in output_names:`
			`output_tensor = predictor.get_output_tensor(output_name)`
			`output_tensors.append(output_tensor)`
			`return predictor, input_tensor, output_tensors`


fix bug in results visualization 2020-05-28 15:46:05 +08:00			`def draw_text_det_res(dt_boxes, img_path):`
upload PaddleOCR code 2020-05-10 16:26:57 +08:00			`src_im = cv2.imread(img_path)`
			`for box in dt_boxes:`
			`box = np.array(box).astype(np.int32).reshape(-1, 2)`
			`cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)`
add visualized code for det results 2020-05-15 17:06:43 +08:00			`return src_im`
add visulize code 2020-05-13 20:29:45 +08:00

add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`def resize_img(img, input_size=600):`
			`"""`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`resize img and limit the longest side of the image to input_size`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`"""`
			`img = np.array(img)`
			`im_shape = img.shape`
			`im_size_max = np.max(im_shape[0:2])`
			`im_scale = float(input_size) / float(im_size_max)`
			`im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)`
			`return im`


			`def draw_ocr(image, boxes, txts, scores, draw_txt=True, drop_score=0.5):`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`"""`
			`Visualize the results of OCR detection and recognition`
			`args:`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`image(Image\|array): RGB image`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`boxes(list): boxes with shape(N, 4, 2)`
			`txts(list): the texts`
			`scores(list): txxs corresponding scores`
			`draw_txt(bool): whether draw text or not`
			`drop_score(float): only scores greater than drop_threshold will be visualized`
			`return(array):`
			`the visualized img`
			`"""`
valid det inference 2020-05-15 14:22:57 +08:00			`if scores is None:`
			`scores = [1] * len(boxes)`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`for (box, score) in zip(boxes, scores):`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`if score < drop_score or math.isnan(score):`
add visualize code to predict_eval 2020-05-14 12:08:11 +08:00			`continue`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)`
fix visual bug 2020-05-28 20:06:26 +08:00			`image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)`
add visulize code 2020-05-13 20:29:45 +08:00
			`if draw_txt:`
fix visual bug 2020-05-28 20:06:26 +08:00			`img = np.array(resize_img(image, input_size=600))`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`txt_img = text_visual(`
			`txts, scores, img_h=img.shape[0], img_w=600, threshold=drop_score)`
			`img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)`
fix visual bug 2020-05-28 20:06:26 +08:00			`return img`
			`return image`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00

修改显示方式，微调显示参数 2020-06-15 20:05:34 +08:00			`def draw_ocr_box_txt(image, boxes, txts):`
			`h, w = image.height, image.width`
			`img_left = image.copy()`
			`img_right = Image.new('RGB', (w, h), (255, 255, 255))`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00
			`import random`
修改显示方式，微调显示参数 2020-06-15 20:05:34 +08:00			`# 每次使用相同的随机种子，可以保证两次颜色一致`
			`random.seed(0)`
			`draw_left = ImageDraw.Draw(img_left)`
			`draw_right = ImageDraw.Draw(img_right)`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`for (box, txt) in zip(boxes, txts):`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`color = (random.randint(0, 255), random.randint(0, 255),`
			`random.randint(0, 255))`
修改显示方式，微调显示参数 2020-06-15 20:05:34 +08:00			`draw_left.polygon(box, fill=color)`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`draw_right.polygon(`
			`[`
			`box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],`
			`box[2][1], box[3][0], box[3][1]`
			`],`
			`outline=color)`
			`box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][`
			`1])**2)`
			`box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][`
			`1])**2)`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`if box_height > 2 * box_width:`
			`font_size = max(int(box_width * 0.9), 10)`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`font = ImageFont.truetype(`
			`"./doc/simfang.ttf", font_size, encoding="utf-8")`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`cur_y = box[0][1]`
			`for c in txt:`
			`char_size = font.getsize(c)`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`draw_right.text(`
			`(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`cur_y += char_size[1]`
			`else:`
			`font_size = max(int(box_height * 0.8), 10)`
close memory_optim for attention 2020-06-30 19:28:29 +08:00			`font = ImageFont.truetype(`
			`"./doc/simfang.ttf", font_size, encoding="utf-8")`
			`draw_right.text(`
			`[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)`
修改显示方式，微调显示参数 2020-06-15 20:05:34 +08:00			`img_left = Image.blend(image, img_left, 0.5)`
			`img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))`
			`img_show.paste(img_left, (0, 0, w, h))`
			`img_show.paste(img_right, (w, 0, w * 2, h))`
增加一个画图函数，可以更方便更直观的看检测和识别结果，对横排、竖排文本都可以显示 2020-06-15 18:40:01 +08:00			`return np.array(img_show)`


opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`def str_count(s):`
			`"""`
			`Count the number of Chinese characters,`
			`a single English character and a single number`
			`equal to half the length of Chinese characters.`

			`args:`
			`s(string): the input of string`
			`return(int):`
			`the number of Chinese characters`
			`"""`
			`import string`
			`count_zh = count_pu = 0`
			`s_len = len(s)`
			`en_dg_count = 0`
			`for c in s:`
			`if c in string.ascii_letters or c.isdigit() or c.isspace():`
			`en_dg_count += 1`
			`elif c.isalpha():`
			`count_zh += 1`
			`else:`
			`count_pu += 1`
			`return s_len - math.ceil(en_dg_count / 2)`


			`def text_visual(texts, scores, img_h=400, img_w=600, threshold=0.):`
			`"""`
			`create new blank img and draw txt on it`
			`args:`
			`texts(list): the text will be draw`
			`scores(list\|None): corresponding score of each txt`
			`img_h(int): the height of blank img`
			`img_w(int): the width of blank img`
			`return(array):`

			`"""`
			`if scores is not None:`
			`assert len(texts) == len(`
			`scores), "The number of txts and corresponding scores must match"`

			`def create_blank_img():`
			`blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255`
			`blank_img[:, img_w - 1:] = 0`
add visulize code 2020-05-13 20:29:45 +08:00			`blank_img = Image.fromarray(blank_img).convert("RGB")`
			`draw_txt = ImageDraw.Draw(blank_img)`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`return blank_img, draw_txt`
add visulize code 2020-05-13 20:29:45 +08:00
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`blank_img, draw_txt = create_blank_img()`

			`font_size = 20`
			`txt_color = (0, 0, 0)`
opt visualized func and add docker usage in cpu 2020-05-27 15:03:12 +08:00			`font = ImageFont.truetype("./doc/simfang.ttf", font_size, encoding="utf-8")`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00
			`gap = font_size + 5`
			`txt_img_list = []`
fix visual bug 2020-05-28 20:06:26 +08:00			`count, index = 1, 0`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`for idx, txt in enumerate(texts):`
			`index += 1`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`if scores[idx] < threshold or math.isnan(scores[idx]):`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`index -= 1`
			`continue`
			`first_line = True`
			`while str_count(txt) >= img_w // font_size - 4:`
			`tmp = txt`
			`txt = tmp[:img_w // font_size - 4]`
			`if first_line:`
			`new_txt = str(index) + ': ' + txt`
			`first_line = False`
			`else:`
			`new_txt = ' ' + txt`
fix multi-line txt display 2020-05-29 15:23:09 +08:00			`draw_txt.text((0, gap * count), new_txt, txt_color, font=font)`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`txt = tmp[img_w // font_size - 4:]`
			`if count >= img_h // gap - 1:`
			`txt_img_list.append(np.array(blank_img))`
			`blank_img, draw_txt = create_blank_img()`
			`count = 0`
fix visual bug 2020-05-28 20:06:26 +08:00			`count += 1`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`if first_line:`
			`new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx])`
			`else:`
fix bug in results visualization 2020-05-28 15:46:05 +08:00			`new_txt = " " + txt + " " + '%.3f' % (scores[idx])`
fix visual bug 2020-05-28 20:06:26 +08:00			`draw_txt.text((0, gap * count), new_txt, txt_color, font=font)`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`# whether add new blank img or not`
fix visual bug 2020-05-28 20:06:26 +08:00			`if count >= img_h // gap - 1 and idx + 1 < len(texts):`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`txt_img_list.append(np.array(blank_img))`
			`blank_img, draw_txt = create_blank_img()`
			`count = 0`
fix visual bug 2020-05-28 20:06:26 +08:00			`count += 1`
opt visualized func and add docker usage in cpu 2020-05-27 14:55:58 +08:00			`txt_img_list.append(np.array(blank_img))`
			`if len(txt_img_list) == 1:`
			`blank_img = np.array(txt_img_list[0])`
			`else:`
			`blank_img = np.concatenate(txt_img_list, axis=1)`
			`return np.array(blank_img)`
add visulize code 2020-05-13 20:29:45 +08:00

add hub serving 2020-07-09 20:34:42 +08:00			`def base64_to_cv2(b64str):`
			`import base64`
			`data = base64.b64decode(b64str.encode('utf8'))`
			`data = np.fromstring(data, np.uint8)`
			`data = cv2.imdecode(data, cv2.IMREAD_COLOR)`
			`return data`


			`def draw_boxes(image, boxes, scores=None, drop_score=0.5):`
			`if scores is None:`
			`scores = [1] * len(boxes)`
			`for (box, score) in zip(boxes, scores):`
			`if score < drop_score:`
			`continue`
			`box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)`
			`image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)`
			`return image`


add visulize code 2020-05-13 20:29:45 +08:00			`if __name__ == '__main__':`
			`test_img = "./doc/test_v2"`
			`predict_txt = "./doc/predict.txt"`
			`f = open(predict_txt, 'r')`
			`data = f.readlines()`
			`img_path, anno = data[0].strip().split('\t')`
			`img_name = os.path.basename(img_path)`
			`img_path = os.path.join(test_img, img_name)`
			`image = Image.open(img_path)`

			`data = json.loads(anno)`
			`boxes, txts, scores = [], [], []`
			`for dic in data:`
			`boxes.append(dic['points'])`
			`txts.append(dic['transcription'])`
			`scores.append(round(dic['scores'], 3))`

			`new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True)`

add config.enable_memory_optim() 2020-06-23 11:46:45 +08:00			`cv2.imwrite(img_name, new_img)`