2021-06-10 14:24:59 +08:00
|
|
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
sys.path.append(__dir__)
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
|
|
|
|
|
|
|
|
import cv2
|
|
|
|
import json
|
|
|
|
from tqdm import tqdm
|
2021-07-27 19:18:55 +08:00
|
|
|
from ppstructure.table.table_metric import TEDS
|
|
|
|
from ppstructure.table.predict_table import TableSystem
|
|
|
|
from ppstructure.utility import init_args
|
2021-06-10 14:24:59 +08:00
|
|
|
from ppocr.utils.logging import get_logger
|
|
|
|
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
parser = init_args()
|
|
|
|
parser.add_argument("--gt_path", type=str)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
def main(gt_path, img_root, args):
|
|
|
|
teds = TEDS(n_jobs=16)
|
|
|
|
|
|
|
|
text_sys = TableSystem(args)
|
|
|
|
jsons_gt = json.load(open(gt_path)) # gt
|
|
|
|
pred_htmls = []
|
|
|
|
gt_htmls = []
|
|
|
|
for img_name in tqdm(jsons_gt):
|
|
|
|
# read image
|
|
|
|
img = cv2.imread(os.path.join(img_root,img_name))
|
|
|
|
pred_html = text_sys(img)
|
|
|
|
pred_htmls.append(pred_html)
|
|
|
|
|
2021-07-29 17:59:44 +08:00
|
|
|
gt_structures, gt_bboxes, gt_contents = jsons_gt[img_name]
|
|
|
|
gt_html, gt = get_gt_html(gt_structures, gt_contents)
|
2021-06-10 14:24:59 +08:00
|
|
|
gt_htmls.append(gt_html)
|
|
|
|
scores = teds.batch_evaluate_html(gt_htmls, pred_htmls)
|
|
|
|
logger.info('teds:', sum(scores) / len(scores))
|
|
|
|
|
|
|
|
|
2021-07-29 17:59:44 +08:00
|
|
|
def get_gt_html(gt_structures, gt_contents):
|
2021-06-10 14:24:59 +08:00
|
|
|
end_html = []
|
|
|
|
td_index = 0
|
|
|
|
for tag in gt_structures:
|
|
|
|
if '</td>' in tag:
|
2021-07-29 17:59:44 +08:00
|
|
|
if gt_contents[td_index] != []:
|
|
|
|
end_html.extend(gt_contents[td_index])
|
2021-06-10 14:24:59 +08:00
|
|
|
end_html.append(tag)
|
|
|
|
td_index += 1
|
|
|
|
else:
|
|
|
|
end_html.append(tag)
|
|
|
|
return ''.join(end_html), end_html
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
args = parse_args()
|
|
|
|
main(args.gt_path,args.image_dir, args)
|