diff --git a/README.md b/README.md index e08448f6..2b889c28 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,49 @@ # 简介 -PaddleOCR旨在打造一套丰富、领先、且实用的文字检测、识别模型/工具库,助力使用者训练出更好的模型,并应用落地。 - - -## 文档教程 -- [快速安装](./doc/installation.md) -- [文本识别模型训练/评估/预测](./doc/detection.md) -- [文本预测模型训练/评估/预测](./doc/recognition.md) +PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力使用者训练出更好的模型,并应用落地。 ## 特性: - 超轻量级模型 - (检测模型4.1M + 识别模型4.5M = 8.6M) -- 支持竖排文字 +- 支持竖排文字识别 - (单模型同时支持横排和竖排文字识别) - 支持长文本识别 - 支持中英文数字组合识别 - 提供训练代码 - 支持模型部署 + +## 文档教程 +- [快速安装](./doc/installation.md) +- [快速开始]() +- [文本识别模型训练/评估/预测](./doc/detection.md) +- [文本预测模型训练/评估/预测](./doc/recognition.md) +- [基于inference model预测](./doc/) + + ## 文本检测算法: PaddleOCR开源的文本检测算法列表: -- [EAST](https://arxiv.org/abs/1704.03155) -- [DB](https://arxiv.org/abs/1911.08947) -- [SAST](https://arxiv.org/abs/1908.05498) +- [x] [EAST](https://arxiv.org/abs/1704.03155) +- [x] [DB](https://arxiv.org/abs/1911.08947) +- [x] [SAST](https://arxiv.org/abs/1908.05498) +- [] + 算法效果: -|模型|骨干网络|数据集|Hmean| -|-|-|-|-| -|EAST|ResNet50_vd|ICDAR2015|85.85%| -|EAST|MobileNetV3|ICDAR2015|79.08%| -|DB|ResNet50_vd|ICDAR2015|83.30%| -|DB|MobileNetV3|ICDAR2015|73.00%| +|模型|骨干网络|Hmean| +|-|-|-| +|EAST^[1]^|ResNet50_vd|85.85%| +|EAST^[1]^|MobileNetV3|79.08%| +|DB^[2]^|ResNet50_vd|83.30%| +|DB^[2]^|MobileNetV3|73.00%| PaddleOCR文本检测算法的训练与使用请参考[文档](./doc/detection.md)。 ## 文本识别算法: PaddleOCR开源的文本识别算法列表: -- (CRNN)[https://arxiv.org/abs/1507.05717] +- [CRNN](https://arxiv.org/abs/1507.05717) - [Rosetta](https://arxiv.org/abs/1910.05085) - [STAR-Net](http://www.bmva.org/bmvc/2016/papers/paper043/index.html) - [RARE](https://arxiv.org/abs/1603.03915v1) @@ -59,7 +64,8 @@ PaddleOCR开源的文本识别算法列表: PaddleOCR文本识别算法的训练与使用请参考[文档](./doc/recognition.md)。 -## 端到端算法 +## TODO +**端到端OCR算法** PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.org/abs/1909.07808),敬请关注。 - End2End-PSL (comming soon) @@ -67,7 +73,7 @@ PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.o # 参考文献 ``` -EAST: +1. EAST: @inproceedings{zhou2017east, title={EAST: an efficient and accurate scene text detector}, author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun}, @@ -76,7 +82,7 @@ EAST: year={2017} } -DB: +2. DB: @article{liao2019real, title={Real-time Scene Text Detection with Differentiable Binarization}, author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, @@ -84,7 +90,7 @@ DB: year={2019} } -DTRB: +3. DTRB: @inproceedings{baek2019wrong, title={What is wrong with scene text recognition model comparisons? dataset and model analysis}, author={Baek, Jeonghun and Kim, Geewook and Lee, Junyeop and Park, Sungrae and Han, Dongyoon and Yun, Sangdoo and Oh, Seong Joon and Lee, Hwalsuk}, @@ -93,7 +99,7 @@ DTRB: year={2019} } -SAST: +4. SAST: @inproceedings{wang2019single, title={A Single-Shot Arbitrarily-Shaped Text Detector based on Context Attended Multi-Task Learning}, author={Wang, Pengfei and Zhang, Chengquan and Qi, Fei and Huang, Zuming and En, Mengyi and Han, Junyu and Liu, Jingtuo and Ding, Errui and Shi, Guangming}, @@ -102,7 +108,7 @@ SAST: year={2019} } -SRN: +5. SRN: @article{yu2020towards, title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks}, author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui}, @@ -110,7 +116,7 @@ SRN: year={2020} } -end2end-psl: +6. end2end-psl: @inproceedings{sun2019chinese, title={Chinese Street View Text: Large-scale Chinese Text Reading with Partially Supervised Learning}, author={Sun, Yipeng and Liu, Jiaming and Liu, Wei and Han, Junyu and Ding, Errui and Liu, Jingtuo}, diff --git a/doc/detection.md b/doc/detection.md index 5d54d780..5e550110 100644 --- a/doc/detection.md +++ b/doc/detection.md @@ -1,6 +1,6 @@ # 文字检测 -本节以icdar15数据集为例,介绍PaddleOCR中检测模型的使用方式。 +本节以icdar15数据集为例,介绍PaddleOCR中检测模型的训练、评估与测试。 ## 数据准备 icdar2015数据集可以从[官网](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载到,首次下载需注册。 @@ -26,8 +26,9 @@ wget -P /PaddleOCR/train_data/ 测试标注文件链接 " 图像文件名 json.dumps编码的图像标注信息" ch4_test_images/img_61.jpg [{"transcription": "MASA", "points": [[310, 104], [416, 141], [418, 216], [312, 179]], ...}] ``` -json.dumps编码前的图像标注信息是包含多个字典的list,字典中的points表示文本框的位置,如果您想在其他数据集上训练PaddleOCR, -可以按照上述形式构建标注文件。 +json.dumps编码前的图像标注信息是包含多个字典的list,字典中的$points$表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。 +$transcription$表示当前文本框的文字,在文本检测任务中并不需要这个信息。 +如果您想在其他数据集上训练PaddleOCR,可以按照上述形式构建标注文件。 ## 快速启动训练 @@ -62,7 +63,7 @@ PaddleOCR计算三个OCR检测相关的指标,分别是:Precision、Recall 运行如下代码,根据配置文件det_db_mv3.yml中save_res_path指定的测试集检测结果文件,计算评估指标。 ``` -python3 tools/eval.py -c configs/det/det_db_mv3.yml -o checkpoints="./output/best_accuracy" +python3 tools/eval.py -c configs/det/det_db_mv3.yml -o Gloabl.checkpoints="./output/best_accuracy" ``` ## 测试检测效果 diff --git a/doc/installation.md b/doc/installation.md index 5fca344e..25e0d0d5 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -25,10 +25,3 @@ cd PaddleOCR pip3 install --upgrade pip pip3 install -r requirements.txt ``` - -## 快速运行 - -``` -python3 tools/infer/predict_eval.py --image_file="./" -``` -【可视化运行结果】 diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py index 5ba01ee7..2e68d91d 100755 --- a/ppocr/data/det/dataset_traversal.py +++ b/ppocr/data/det/dataset_traversal.py @@ -22,6 +22,7 @@ import string from ppocr.utils.utility import initial_logger logger = initial_logger() from ppocr.utils.utility import create_module +from tools.infer.utility import get_image_file_list import time @@ -72,16 +73,8 @@ class EvalTestReader(object): self.params) batch_size = self.params['test_batch_size_per_card'] - flag_test_single_img = False - if mode == "test": - single_img_path = self.params['single_img_path'] - if single_img_path is not None: - flag_test_single_img = True - img_list = [] - if flag_test_single_img: - img_list.append([single_img_path, single_img_path]) - else: + if mode != "test": img_set_dir = self.params['img_set_dir'] img_name_list_path = self.params['label_file_path'] with open(img_name_list_path, "rb") as fin: @@ -90,6 +83,9 @@ class EvalTestReader(object): img_name = line.decode().strip("\n").split("\t")[0] img_path = img_set_dir + "/" + img_name img_list.append([img_path, img_name]) + else: + img_path = self.params['single_img_path'] + img_list = get_image_file_list(img_path) def batch_iter_reader(): batch_outs = [] diff --git a/tools/infer/utility.py b/tools/infer/utility.py index f1f7a8a0..01477a5c 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -61,16 +61,21 @@ def parse_args(): return parser.parse_args() -def get_image_file_list(image_dir): - image_file_list = [] - if image_dir is None: - return image_file_list - if os.path.isfile(image_dir): - image_file_list = [image_dir] - elif os.path.isdir(image_dir): - for single_file in os.listdir(image_dir): - image_file_list.append(os.path.join(image_dir, single_file)) - return image_file_list +def get_image_file_list(img_file): + imgs_lists = [] + if img_file is None or not os.path.exists(img_file): + raise Exception("not found any img file in {}".format(img_file)) + + img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] + if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: + imgs_lists.append(img_file) + elif os.path.isdir(img_file): + for single_file in os.listdir(img_file): + if single_file.split('.')[-1] in img_end: + imgs_lists.append(os.path.join(img_file, single_file)) + if len(imgs_lists) == 0: + raise Exception("not found any img file in {}".format(img_file)) + return imgs_lists def create_predictor(args, mode): diff --git a/tools/infer_det.py b/tools/infer_det.py index 7998cdb6..8d591a65 100755 --- a/tools/infer_det.py +++ b/tools/infer_det.py @@ -68,50 +68,6 @@ def draw_det_res(dt_boxes, config, img_name, ino): logger.info("The detected Image saved in {}".format(save_path)) -def simple_reader(img_file, config): - imgs_lists = [] - if img_file is None or not os.path.exists(img_file): - raise Exception("not found any img file in {}".format(img_file)) - - img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] - if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: - imgs_lists.append(img_file) - elif os.path.isdir(img_file): - for single_file in os.listdir(img_file): - if single_file.split('.')[-1] in img_end: - imgs_lists.append(os.path.join(img_file, single_file)) - if len(imgs_lists) == 0: - raise Exception("not found any img file in {}".format(img_file)) - - batch_size = config['Global']['test_batch_size_per_card'] - global_params = config['Global'] - params = deepcopy(config['TestReader']) - params.update(global_params) - reader_function = params['process_function'] - process_function = create_module(reader_function)(params) - - def batch_iter_reader(): - batch_outs = [] - for img_path in imgs_lists: - img = cv2.imread(img_path) - if img.shape[-1] == 1 or len(list(img.shape)) == 2: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - if img is None: - logger.info("load image error:" + img_path) - continue - outs = process_function(img) - outs.append(os.path.basename(img_path)) - print(outs[0].shape, outs[2]) - batch_outs.append(outs) - if len(batch_outs) == batch_size: - yield batch_outs - batch_outs = [] - if len(batch_outs) != 0: - yield batch_outs - - return batch_iter_reader - - def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) @@ -148,9 +104,7 @@ def main(): save_res_path = config['Global']['save_res_path'] with open(save_res_path, "wb") as fout: - # test_reader = reader_main(config=config, mode='test') - single_img_path = config['TestReader']['single_img_path'] - test_reader = simple_reader(img_file=single_img_path, config=config) + test_reader = reader_main(config=config, mode='test') tackling_num = 0 for data in test_reader(): img_num = len(data)