fix problems refer comments

2020-05-12 19:40:57 +08:00 · 2020-05-12 19:40:57 +08:00 · 5b4675e06d
parent e91f370362
commit 5b4675e06d
6 changed files with 57 additions and 102 deletions
--- a/README.md
+++ b/README.md
@ -1,44 +1,49 @@

 # 简介
-PaddleOCR旨在打造一套丰富、领先、且实用的文字检测、识别模型/工具库，助力使用者训练出更好的模型，并应用落地。
-
-
-## 文档教程
- [快速安装](./doc/installation.md)
- [文本识别模型训练/评估/预测](./doc/detection.md)
- [文本预测模型训练/评估/预测](./doc/recognition.md)
+PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力使用者训练出更好的模型，并应用落地。

 ## 特性：
 - 超轻量级模型
    - (检测模型4.1M + 识别模型4.5M = 8.6M)
- 支持竖排文字
+- 支持竖排文字识别
    - (单模型同时支持横排和竖排文字识别)
 - 支持长文本识别
 - 支持中英文数字组合识别
 - 提供训练代码
 - 支持模型部署

+
+## 文档教程
+- [快速安装](./doc/installation.md)
+- [快速开始]()
+- [文本识别模型训练/评估/预测](./doc/detection.md)
+- [文本预测模型训练/评估/预测](./doc/recognition.md)
+- [基于inference model预测](./doc/)
+
+
 ## 文本检测算法:

 PaddleOCR开源的文本检测算法列表：
- [EAST](https://arxiv.org/abs/1704.03155)
- [DB](https://arxiv.org/abs/1911.08947)
- [SAST](https://arxiv.org/abs/1908.05498)
+- [x] [EAST](https://arxiv.org/abs/1704.03155)
+- [x] [DB](https://arxiv.org/abs/1911.08947)
+- [x] [SAST](https://arxiv.org/abs/1908.05498)
+- []
+

 算法效果：
-|模型|骨干网络|数据集|Hmean|
-|-|-|-|-|
-|EAST|ResNet50_vd|ICDAR2015|85.85%|
-|EAST|MobileNetV3|ICDAR2015|79.08%|
-|DB|ResNet50_vd|ICDAR2015|83.30%|
-|DB|MobileNetV3|ICDAR2015|73.00%|
+|模型|骨干网络|Hmean|
+|-|-|-|
+|EAST^[1]^|ResNet50_vd|85.85%|
+|EAST^[1]^|MobileNetV3|79.08%|
+|DB^[2]^|ResNet50_vd|83.30%|
+|DB^[2]^|MobileNetV3|73.00%|

 PaddleOCR文本检测算法的训练与使用请参考[文档](./doc/detection.md)。

 ## 文本识别算法:

 PaddleOCR开源的文本识别算法列表：
- (CRNN)[https://arxiv.org/abs/1507.05717]
+- [CRNN](https://arxiv.org/abs/1507.05717)
 - [Rosetta](https://arxiv.org/abs/1910.05085)
 - [STAR-Net](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)
 - [RARE](https://arxiv.org/abs/1603.03915v1)
@ -59,7 +64,8 @@ PaddleOCR开源的文本识别算法列表：

 PaddleOCR文本识别算法的训练与使用请参考[文档](./doc/recognition.md)。

-## 端到端算法
+## TODO
+**端到端OCR算法**
 PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.org/abs/1909.07808)，敬请关注。
 - End2End-PSL (comming soon)

@ -67,7 +73,7 @@ PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.o

 # 参考文献
 ```
-EAST:
+1. EAST:
@inproceedings{zhou2017east,
  title={EAST: an efficient and accurate scene text detector},
  author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun},
@ -76,7 +82,7 @@ EAST:
  year={2017}
 }

-DB:
+2. DB:
@article{liao2019real,
  title={Real-time Scene Text Detection with Differentiable Binarization},
  author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
@ -84,7 +90,7 @@ DB:
  year={2019}
 }

-DTRB:
+3. DTRB:
@inproceedings{baek2019wrong,
  title={What is wrong with scene text recognition model comparisons? dataset and model analysis},
  author={Baek, Jeonghun and Kim, Geewook and Lee, Junyeop and Park, Sungrae and Han, Dongyoon and Yun, Sangdoo and Oh, Seong Joon and Lee, Hwalsuk},
@ -93,7 +99,7 @@ DTRB:
  year={2019}
 }

-SAST:
+4. SAST:
@inproceedings{wang2019single,
  title={A Single-Shot Arbitrarily-Shaped Text Detector based on Context Attended Multi-Task Learning},
  author={Wang, Pengfei and Zhang, Chengquan and Qi, Fei and Huang, Zuming and En, Mengyi and Han, Junyu and Liu, Jingtuo and Ding, Errui and Shi, Guangming},
@ -102,7 +108,7 @@ SAST:
  year={2019}
 }

-SRN:
+5. SRN:
@article{yu2020towards,
  title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks},
  author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui},
@ -110,7 +116,7 @@ SRN:
  year={2020}
 }

-end2end-psl:
+6. end2end-psl:
@inproceedings{sun2019chinese,
  title={Chinese Street View Text: Large-scale Chinese Text Reading with Partially Supervised Learning},
  author={Sun, Yipeng and Liu, Jiaming and Liu, Wei and Han, Junyu and Ding, Errui and Liu, Jingtuo},
--- a/doc/detection.md
+++ b/doc/detection.md
@ -1,6 +1,6 @@
 # 文字检测

-本节以icdar15数据集为例，介绍PaddleOCR中检测模型的使用方式。
+本节以icdar15数据集为例，介绍PaddleOCR中检测模型的训练、评估与测试。

 ## 数据准备
 icdar2015数据集可以从[官网](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载到，首次下载需注册。
@ -26,8 +26,9 @@ wget -P /PaddleOCR/train_data/  测试标注文件链接
 " 图像文件名                    json.dumps编码的图像标注信息"
 ch4_test_images/img_61.jpg    [{"transcription": "MASA", "points": [[310, 104], [416, 141], [418, 216], [312, 179]], ...}]
 ```
-json.dumps编码前的图像标注信息是包含多个字典的list，字典中的points表示文本框的位置，如果您想在其他数据集上训练PaddleOCR,
-可以按照上述形式构建标注文件。
+json.dumps编码前的图像标注信息是包含多个字典的list，字典中的$points$表示文本框的四个点的坐标(x, y)，从左上角的点开始顺时针排列。
+$transcription$表示当前文本框的文字，在文本检测任务中并不需要这个信息。
+如果您想在其他数据集上训练PaddleOCR，可以按照上述形式构建标注文件。


 ## 快速启动训练
@ -62,7 +63,7 @@ PaddleOCR计算三个OCR检测相关的指标，分别是：Precision、Recall
 运行如下代码，根据配置文件det_db_mv3.yml中save_res_path指定的测试集检测结果文件，计算评估指标。

 ```
-python3 tools/eval.py -c configs/det/det_db_mv3.yml  -o checkpoints="./output/best_accuracy"
+python3 tools/eval.py -c configs/det/det_db_mv3.yml  -o Gloabl.checkpoints="./output/best_accuracy"
 ```

 ## 测试检测效果
--- a/doc/installation.md
+++ b/doc/installation.md
@ -25,10 +25,3 @@ cd PaddleOCR
 pip3 install --upgrade pip
 pip3 install -r requirements.txt
 ```
-
-## 快速运行
-
-```
-python3 tools/infer/predict_eval.py --image_file="./"
-```
-【可视化运行结果】
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@ -22,6 +22,7 @@ import string
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
 from ppocr.utils.utility import create_module
+from tools.infer.utility import get_image_file_list
 import time


@ -72,16 +73,8 @@ class EvalTestReader(object):
            self.params)
        batch_size = self.params['test_batch_size_per_card']

-        flag_test_single_img = False
-        if mode == "test":
-            single_img_path = self.params['single_img_path']
-            if single_img_path is not None:
-                flag_test_single_img = True
-
        img_list = []
-        if flag_test_single_img:
-            img_list.append([single_img_path, single_img_path])
-        else:
+        if mode != "test":
            img_set_dir = self.params['img_set_dir']
            img_name_list_path = self.params['label_file_path']
            with open(img_name_list_path, "rb") as fin:
@ -90,6 +83,9 @@ class EvalTestReader(object):
                    img_name = line.decode().strip("\n").split("\t")[0]
                    img_path = img_set_dir + "/" + img_name
                    img_list.append([img_path, img_name])
+        else:
+            img_path = self.params['single_img_path']
+            img_list = get_image_file_list(img_path)

        def batch_iter_reader():
            batch_outs = []
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@ -61,16 +61,21 @@ def parse_args():
    return parser.parse_args()


-def get_image_file_list(image_dir):
-    image_file_list = []
-    if image_dir is None:
-        return image_file_list
-    if os.path.isfile(image_dir):
-        image_file_list = [image_dir]
-    elif os.path.isdir(image_dir):
-        for single_file in os.listdir(image_dir):
-            image_file_list.append(os.path.join(image_dir, single_file))
-    return image_file_list
+def get_image_file_list(img_file):
+    imgs_lists = []
+    if img_file is None or not os.path.exists(img_file):
+        raise Exception("not found any img file in {}".format(img_file))
+
+    img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
+    if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
+        imgs_lists.append(img_file)
+    elif os.path.isdir(img_file):
+        for single_file in os.listdir(img_file):
+            if single_file.split('.')[-1] in img_end:
+                imgs_lists.append(os.path.join(img_file, single_file))
+    if len(imgs_lists) == 0:
+        raise Exception("not found any img file in {}".format(img_file))
+    return imgs_lists


 def create_predictor(args, mode):
--- a/tools/infer_det.py
+++ b/tools/infer_det.py
@ -68,50 +68,6 @@ def draw_det_res(dt_boxes, config, img_name, ino):
        logger.info("The detected Image saved in {}".format(save_path))


-def simple_reader(img_file, config):
-    imgs_lists = []
-    if img_file is None or not os.path.exists(img_file):
-        raise Exception("not found any img file in {}".format(img_file))
-
-    img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
-    if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
-        imgs_lists.append(img_file)
-    elif os.path.isdir(img_file):
-        for single_file in os.listdir(img_file):
-            if single_file.split('.')[-1] in img_end:
-                imgs_lists.append(os.path.join(img_file, single_file))
-    if len(imgs_lists) == 0:
-        raise Exception("not found any img file in {}".format(img_file))
-
-    batch_size = config['Global']['test_batch_size_per_card']
-    global_params = config['Global']
-    params = deepcopy(config['TestReader'])
-    params.update(global_params)
-    reader_function = params['process_function']
-    process_function = create_module(reader_function)(params)
-
-    def batch_iter_reader():
-        batch_outs = []
-        for img_path in imgs_lists:
-            img = cv2.imread(img_path)
-            if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-            if img is None:
-                logger.info("load image error:" + img_path)
-                continue
-            outs = process_function(img)
-            outs.append(os.path.basename(img_path))
-            print(outs[0].shape, outs[2])
-            batch_outs.append(outs)
-            if len(batch_outs) == batch_size:
-                yield batch_outs
-                batch_outs = []
-        if len(batch_outs) != 0:
-            yield batch_outs
-
-    return batch_iter_reader
-
-
 def main():
    config = program.load_config(FLAGS.config)
    program.merge_config(FLAGS.opt)
@ -148,9 +104,7 @@ def main():

    save_res_path = config['Global']['save_res_path']
    with open(save_res_path, "wb") as fout:
-        # test_reader = reader_main(config=config, mode='test')
-        single_img_path = config['TestReader']['single_img_path']
-        test_reader = simple_reader(img_file=single_img_path, config=config)
+        test_reader = reader_main(config=config, mode='test')
        tackling_num = 0
        for data in test_reader():
            img_num = len(data)