93 lines
3.0 KiB
Python
Executable File
93 lines
3.0 KiB
Python
Executable File
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
|
#
|
|
#Licensed under the Apache License, Version 2.0 (the "License");
|
|
#you may not use this file except in compliance with the License.
|
|
#You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
#Unless required by applicable law or agreed to in writing, software
|
|
#distributed under the License is distributed on an "AS IS" BASIS,
|
|
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
#See the License for the specific language governing permissions and
|
|
#limitations under the License.
|
|
|
|
import math
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
def get_bounding_box_rect(pos):
|
|
left = min(pos[0])
|
|
right = max(pos[0])
|
|
top = min(pos[1])
|
|
bottom = max(pos[1])
|
|
return [left, top, right, bottom]
|
|
|
|
|
|
def resize_norm_img(img, image_shape):
|
|
imgC, imgH, imgW = image_shape
|
|
h = img.shape[0]
|
|
w = img.shape[1]
|
|
ratio = w / float(h)
|
|
if math.ceil(imgH * ratio) > imgW:
|
|
resized_w = imgW
|
|
else:
|
|
resized_w = int(math.ceil(imgH * ratio))
|
|
resized_image = cv2.resize(img, (resized_w, imgH))
|
|
resized_image = resized_image.astype('float32')
|
|
if image_shape[0] == 1:
|
|
resized_image = resized_image / 255
|
|
resized_image = resized_image[np.newaxis, :]
|
|
else:
|
|
resized_image = resized_image.transpose((2, 0, 1)) / 255
|
|
resized_image -= 0.5
|
|
resized_image /= 0.5
|
|
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
|
padding_im[:, :, 0:resized_w] = resized_image
|
|
return padding_im
|
|
|
|
|
|
def get_img_data(value):
|
|
"""get_img_data"""
|
|
if not value:
|
|
return None
|
|
imgdata = np.frombuffer(value, dtype='uint8')
|
|
if imgdata is None:
|
|
return None
|
|
imgori = cv2.imdecode(imgdata, 1)
|
|
if imgori is None:
|
|
return None
|
|
return imgori
|
|
|
|
|
|
def process_image(img,
|
|
image_shape,
|
|
label=None,
|
|
char_ops=None,
|
|
loss_type=None,
|
|
max_text_length=None):
|
|
norm_img = resize_norm_img(img, image_shape)
|
|
norm_img = norm_img[np.newaxis, :]
|
|
if label is not None:
|
|
char_num = char_ops.get_char_num()
|
|
text = char_ops.encode(label)
|
|
if len(text) == 0 or len(text) > max_text_length:
|
|
return None
|
|
else:
|
|
if loss_type == "ctc":
|
|
text = text.reshape(-1, 1)
|
|
return (norm_img, text)
|
|
elif loss_type == "attention":
|
|
beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
|
|
end_flag_idx = char_ops.get_beg_end_flag_idx("end")
|
|
beg_text = np.append(beg_flag_idx, text)
|
|
end_text = np.append(text, end_flag_idx)
|
|
beg_text = beg_text.reshape(-1, 1)
|
|
end_text = end_text.reshape(-1, 1)
|
|
return (norm_img, beg_text, end_text)
|
|
else:
|
|
assert False, "Unsupport loss_type %s in process_image"\
|
|
% loss_type
|
|
return (norm_img)
|