PaddleOCR/ppocr/postprocess/rec_postprocess.py

309 lines
12 KiB
Python
Raw Normal View History

2020-10-13 17:13:33 +08:00
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
2021-01-26 15:53:49 +08:00
import string
2020-10-13 17:13:33 +08:00
import paddle
from paddle.nn import functional as F
class BaseRecLabelDecode(object):
""" Convert between text-label and text-index """
def __init__(self,
character_dict_path=None,
character_type='ch',
use_space_char=False):
2020-12-09 14:45:25 +08:00
support_character_type = [
2021-01-26 15:53:49 +08:00
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
2021-01-26 15:24:13 +08:00
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
2021-01-26 15:53:49 +08:00
'ne', 'EN'
2020-12-09 14:45:25 +08:00
]
2020-10-13 17:13:33 +08:00
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
2020-12-09 16:48:27 +08:00
support_character_type, character_type)
2020-10-13 17:13:33 +08:00
2020-12-30 16:15:49 +08:00
self.beg_str = "sos"
self.end_str = "eos"
2020-10-13 17:13:33 +08:00
if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
2021-01-26 15:53:49 +08:00
elif character_type == "EN_symbol":
2021-01-26 15:24:13 +08:00
# same with ASTER setting (use 94 char).
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:
2020-10-13 17:13:33 +08:00
self.character_str = ""
2021-01-26 15:24:13 +08:00
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type)
2020-10-13 17:13:33 +08:00
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n")
self.character_str += line
if use_space_char:
self.character_str += " "
dict_character = list(self.character_str)
2021-01-26 15:24:13 +08:00
2020-10-13 17:13:33 +08:00
else:
raise NotImplementedError
self.character_type = character_type
dict_character = self.add_special_char(dict_character)
self.dict = {}
for i, char in enumerate(dict_character):
self.dict[char] = i
self.character = dict_character
def add_special_char(self, dict_character):
return dict_character
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
2020-10-13 17:13:33 +08:00
""" convert text-index into text-label. """
result_list = []
ignored_tokens = self.get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
# only for predict
2020-10-13 17:13:33 +08:00
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
batch_idx][idx]:
continue
char_list.append(self.character[int(text_index[batch_idx][
idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
2020-10-13 17:13:33 +08:00
return result_list
def get_ignored_tokens(self):
return [0] # for ctc blank
class CTCLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """
def __init__(self,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs):
super(CTCLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char)
def __call__(self, preds, label=None, *args, **kwargs):
2020-11-09 18:19:42 +08:00
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
2020-10-13 17:13:33 +08:00
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
2021-01-20 18:33:42 +08:00
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
2020-10-13 17:13:33 +08:00
if label is None:
return text
label = self.decode(label)
2020-10-13 17:13:33 +08:00
return text, label
def add_special_char(self, dict_character):
dict_character = ['blank'] + dict_character
return dict_character
class AttnLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """
def __init__(self,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs):
super(AttnLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char)
def add_special_char(self, dict_character):
2021-01-29 11:15:03 +08:00
self.beg_str = "sos"
self.end_str = "eos"
dict_character = dict_character
dict_character = [self.beg_str] + dict_character + [self.end_str]
2020-10-13 17:13:33 +08:00
return dict_character
2021-02-01 14:27:56 +08:00
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
ignored_tokens = self.get_ignored_tokens()
[beg_idx, end_idx] = self.get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if int(text_index[batch_idx][idx]) == int(end_idx):
break
if is_remove_duplicate:
# only for predict
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
batch_idx][idx]:
continue
char_list.append(self.character[int(text_index[batch_idx][
idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
2021-01-29 11:15:03 +08:00
def __call__(self, preds, label=None, *args, **kwargs):
"""
2020-10-13 17:13:33 +08:00
text = self.decode(text)
2021-01-29 11:15:03 +08:00
if label is None:
return text
else:
label = self.decode(label, is_remove_duplicate=False)
return text, label
"""
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
2021-02-01 14:27:56 +08:00
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
2021-01-29 11:15:03 +08:00
if label is None:
return text
2021-02-01 14:27:56 +08:00
label = self.decode(label, is_remove_duplicate=False)
2021-01-29 11:15:03 +08:00
return text, label
def encoder(self, labels, labels_length):
"""
used to encoder labels readed from LMDB dataset, forexample:
[35, 25, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] encode to
'you': [0, 35,25,31, 37, 0, ...] 'sos'you'eos'
"""
if isinstance(labels, paddle.Tensor):
labels = labels.numpy()
batch_max_length = labels.shape[
1] + 2 # add start token 'sos' and end token 'eos'
new_labels = np.zeros(
[labels.shape[0], batch_max_length]).astype(np.int64)
for i in range(labels.shape[0]):
new_labels[i, 1:1 + labels_length[i]] = labels[i, :labels_length[
i]] # new_labels[i, 0] = 'sos' token
new_labels[i, labels_length[i] + 1] = len(
self.character) - 1 # add end charactor 'eos' token
return new_labels
2020-10-13 17:13:33 +08:00
def get_ignored_tokens(self):
beg_idx = self.get_beg_end_flag_idx("beg")
end_idx = self.get_beg_end_flag_idx("end")
return [beg_idx, end_idx]
def get_beg_end_flag_idx(self, beg_or_end):
if beg_or_end == "beg":
idx = np.array(self.dict[self.beg_str])
elif beg_or_end == "end":
idx = np.array(self.dict[self.end_str])
else:
assert False, "unsupport type %s in get_beg_end_flag_idx" \
% beg_or_end
2020-12-09 14:45:25 +08:00
return idx
2020-12-30 16:15:49 +08:00
class SRNLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """
def __init__(self,
character_dict_path=None,
character_type='en',
use_space_char=False,
**kwargs):
super(SRNLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char)
def __call__(self, preds, label=None, *args, **kwargs):
pred = preds['predict']
char_num = len(self.character_str) + 2
if isinstance(pred, paddle.Tensor):
pred = pred.numpy()
pred = np.reshape(pred, [-1, char_num])
preds_idx = np.argmax(pred, axis=1)
preds_prob = np.max(pred, axis=1)
preds_idx = np.reshape(preds_idx, [-1, 25])
preds_prob = np.reshape(preds_prob, [-1, 25])
2021-01-22 11:15:56 +08:00
text = self.decode(preds_idx, preds_prob)
2020-12-30 16:15:49 +08:00
if label is None:
2021-02-01 14:27:56 +08:00
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
2020-12-30 16:15:49 +08:00
return text
2021-01-22 11:15:56 +08:00
label = self.decode(label)
2020-12-30 16:15:49 +08:00
return text, label
2021-02-01 14:27:56 +08:00
def decode(self, text_index, text_prob=None, is_remove_duplicate=True):
2020-12-30 16:15:49 +08:00
""" convert text-index into text-label. """
result_list = []
ignored_tokens = self.get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] in ignored_tokens:
continue
if is_remove_duplicate:
# only for predict
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
batch_idx][idx]:
continue
char_list.append(self.character[int(text_index[batch_idx][
idx])])
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list)))
return result_list
def add_special_char(self, dict_character):
dict_character = dict_character + [self.beg_str, self.end_str]
return dict_character
def get_ignored_tokens(self):
beg_idx = self.get_beg_end_flag_idx("beg")
end_idx = self.get_beg_end_flag_idx("end")
return [beg_idx, end_idx]
def get_beg_end_flag_idx(self, beg_or_end):
if beg_or_end == "beg":
idx = np.array(self.dict[self.beg_str])
elif beg_or_end == "end":
idx = np.array(self.dict[self.end_str])
else:
assert False, "unsupport type %s in get_beg_end_flag_idx" \
% beg_or_end
return idx