fix problems refer comments

This commit is contained in:
LDOUBLEV 2020-05-12 19:40:57 +08:00
parent e91f370362
commit 5b4675e06d
6 changed files with 57 additions and 102 deletions

View File

@ -1,44 +1,49 @@
# 简介
PaddleOCR旨在打造一套丰富、领先、且实用的文字检测、识别模型/工具库,助力使用者训练出更好的模型,并应用落地。
## 文档教程
- [快速安装](./doc/installation.md)
- [文本识别模型训练/评估/预测](./doc/detection.md)
- [文本预测模型训练/评估/预测](./doc/recognition.md)
PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库助力使用者训练出更好的模型并应用落地。
## 特性:
- 超轻量级模型
- (检测模型4.1M + 识别模型4.5M = 8.6M)
- 支持竖排文字
- 支持竖排文字识别
- (单模型同时支持横排和竖排文字识别)
- 支持长文本识别
- 支持中英文数字组合识别
- 提供训练代码
- 支持模型部署
## 文档教程
- [快速安装](./doc/installation.md)
- [快速开始]()
- [文本识别模型训练/评估/预测](./doc/detection.md)
- [文本预测模型训练/评估/预测](./doc/recognition.md)
- [基于inference model预测](./doc/)
## 文本检测算法:
PaddleOCR开源的文本检测算法列表
- [EAST](https://arxiv.org/abs/1704.03155)
- [DB](https://arxiv.org/abs/1911.08947)
- [SAST](https://arxiv.org/abs/1908.05498)
- [x] [EAST](https://arxiv.org/abs/1704.03155)
- [x] [DB](https://arxiv.org/abs/1911.08947)
- [x] [SAST](https://arxiv.org/abs/1908.05498)
- []
算法效果:
|模型|骨干网络|数据集|Hmean|
|-|-|-|-|
|EAST|ResNet50_vd|ICDAR2015|85.85%|
|EAST|MobileNetV3|ICDAR2015|79.08%|
|DB|ResNet50_vd|ICDAR2015|83.30%|
|DB|MobileNetV3|ICDAR2015|73.00%|
|模型|骨干网络|Hmean|
|-|-|-|
|EAST^[1]^|ResNet50_vd|85.85%|
|EAST^[1]^|MobileNetV3|79.08%|
|DB^[2]^|ResNet50_vd|83.30%|
|DB^[2]^|MobileNetV3|73.00%|
PaddleOCR文本检测算法的训练与使用请参考[文档](./doc/detection.md)。
## 文本识别算法:
PaddleOCR开源的文本识别算法列表
- (CRNN)[https://arxiv.org/abs/1507.05717]
- [CRNN](https://arxiv.org/abs/1507.05717)
- [Rosetta](https://arxiv.org/abs/1910.05085)
- [STAR-Net](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)
- [RARE](https://arxiv.org/abs/1603.03915v1)
@ -59,7 +64,8 @@ PaddleOCR开源的文本识别算法列表
PaddleOCR文本识别算法的训练与使用请参考[文档](./doc/recognition.md)。
## 端到端算法
## TODO
**端到端OCR算法**
PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.org/abs/1909.07808),敬请关注。
- End2End-PSL (comming soon)
@ -67,7 +73,7 @@ PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.o
# 参考文献
```
EAST:
1. EAST:
@inproceedings{zhou2017east,
title={EAST: an efficient and accurate scene text detector},
author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun},
@ -76,7 +82,7 @@ EAST:
year={2017}
}
DB:
2. DB:
@article{liao2019real,
title={Real-time Scene Text Detection with Differentiable Binarization},
author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
@ -84,7 +90,7 @@ DB:
year={2019}
}
DTRB:
3. DTRB:
@inproceedings{baek2019wrong,
title={What is wrong with scene text recognition model comparisons? dataset and model analysis},
author={Baek, Jeonghun and Kim, Geewook and Lee, Junyeop and Park, Sungrae and Han, Dongyoon and Yun, Sangdoo and Oh, Seong Joon and Lee, Hwalsuk},
@ -93,7 +99,7 @@ DTRB:
year={2019}
}
SAST:
4. SAST:
@inproceedings{wang2019single,
title={A Single-Shot Arbitrarily-Shaped Text Detector based on Context Attended Multi-Task Learning},
author={Wang, Pengfei and Zhang, Chengquan and Qi, Fei and Huang, Zuming and En, Mengyi and Han, Junyu and Liu, Jingtuo and Ding, Errui and Shi, Guangming},
@ -102,7 +108,7 @@ SAST:
year={2019}
}
SRN:
5. SRN:
@article{yu2020towards,
title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks},
author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui},
@ -110,7 +116,7 @@ SRN:
year={2020}
}
end2end-psl:
6. end2end-psl:
@inproceedings{sun2019chinese,
title={Chinese Street View Text: Large-scale Chinese Text Reading with Partially Supervised Learning},
author={Sun, Yipeng and Liu, Jiaming and Liu, Wei and Han, Junyu and Ding, Errui and Liu, Jingtuo},

View File

@ -1,6 +1,6 @@
# 文字检测
本节以icdar15数据集为例介绍PaddleOCR中检测模型的使用方式
本节以icdar15数据集为例介绍PaddleOCR中检测模型的训练、评估与测试
## 数据准备
icdar2015数据集可以从[官网](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载到,首次下载需注册。
@ -26,8 +26,9 @@ wget -P /PaddleOCR/train_data/ 测试标注文件链接
" 图像文件名 json.dumps编码的图像标注信息"
ch4_test_images/img_61.jpg [{"transcription": "MASA", "points": [[310, 104], [416, 141], [418, 216], [312, 179]], ...}]
```
json.dumps编码前的图像标注信息是包含多个字典的list字典中的points表示文本框的位置如果您想在其他数据集上训练PaddleOCR,
可以按照上述形式构建标注文件。
json.dumps编码前的图像标注信息是包含多个字典的list字典中的$points$表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。
$transcription$表示当前文本框的文字,在文本检测任务中并不需要这个信息。
如果您想在其他数据集上训练PaddleOCR可以按照上述形式构建标注文件。
## 快速启动训练
@ -62,7 +63,7 @@ PaddleOCR计算三个OCR检测相关的指标分别是Precision、Recall
运行如下代码根据配置文件det_db_mv3.yml中save_res_path指定的测试集检测结果文件计算评估指标。
```
python3 tools/eval.py -c configs/det/det_db_mv3.yml -o checkpoints="./output/best_accuracy"
python3 tools/eval.py -c configs/det/det_db_mv3.yml -o Gloabl.checkpoints="./output/best_accuracy"
```
## 测试检测效果

View File

@ -25,10 +25,3 @@ cd PaddleOCR
pip3 install --upgrade pip
pip3 install -r requirements.txt
```
## 快速运行
```
python3 tools/infer/predict_eval.py --image_file="./"
```
【可视化运行结果】

View File

@ -22,6 +22,7 @@ import string
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.utils.utility import create_module
from tools.infer.utility import get_image_file_list
import time
@ -72,16 +73,8 @@ class EvalTestReader(object):
self.params)
batch_size = self.params['test_batch_size_per_card']
flag_test_single_img = False
if mode == "test":
single_img_path = self.params['single_img_path']
if single_img_path is not None:
flag_test_single_img = True
img_list = []
if flag_test_single_img:
img_list.append([single_img_path, single_img_path])
else:
if mode != "test":
img_set_dir = self.params['img_set_dir']
img_name_list_path = self.params['label_file_path']
with open(img_name_list_path, "rb") as fin:
@ -90,6 +83,9 @@ class EvalTestReader(object):
img_name = line.decode().strip("\n").split("\t")[0]
img_path = img_set_dir + "/" + img_name
img_list.append([img_path, img_name])
else:
img_path = self.params['single_img_path']
img_list = get_image_file_list(img_path)
def batch_iter_reader():
batch_outs = []

View File

@ -61,16 +61,21 @@ def parse_args():
return parser.parse_args()
def get_image_file_list(image_dir):
image_file_list = []
if image_dir is None:
return image_file_list
if os.path.isfile(image_dir):
image_file_list = [image_dir]
elif os.path.isdir(image_dir):
for single_file in os.listdir(image_dir):
image_file_list.append(os.path.join(image_dir, single_file))
return image_file_list
def get_image_file_list(img_file):
imgs_lists = []
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
if single_file.split('.')[-1] in img_end:
imgs_lists.append(os.path.join(img_file, single_file))
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
return imgs_lists
def create_predictor(args, mode):

View File

@ -68,50 +68,6 @@ def draw_det_res(dt_boxes, config, img_name, ino):
logger.info("The detected Image saved in {}".format(save_path))
def simple_reader(img_file, config):
imgs_lists = []
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
if single_file.split('.')[-1] in img_end:
imgs_lists.append(os.path.join(img_file, single_file))
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
batch_size = config['Global']['test_batch_size_per_card']
global_params = config['Global']
params = deepcopy(config['TestReader'])
params.update(global_params)
reader_function = params['process_function']
process_function = create_module(reader_function)(params)
def batch_iter_reader():
batch_outs = []
for img_path in imgs_lists:
img = cv2.imread(img_path)
if img.shape[-1] == 1 or len(list(img.shape)) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img is None:
logger.info("load image error:" + img_path)
continue
outs = process_function(img)
outs.append(os.path.basename(img_path))
print(outs[0].shape, outs[2])
batch_outs.append(outs)
if len(batch_outs) == batch_size:
yield batch_outs
batch_outs = []
if len(batch_outs) != 0:
yield batch_outs
return batch_iter_reader
def main():
config = program.load_config(FLAGS.config)
program.merge_config(FLAGS.opt)
@ -148,9 +104,7 @@ def main():
save_res_path = config['Global']['save_res_path']
with open(save_res_path, "wb") as fout:
# test_reader = reader_main(config=config, mode='test')
single_img_path = config['TestReader']['single_img_path']
test_reader = simple_reader(img_file=single_img_path, config=config)
test_reader = reader_main(config=config, mode='test')
tackling_num = 0
for data in test_reader():
img_num = len(data)