diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index 9de24676..4d9c5274 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -24,6 +24,7 @@ import sys from functools import partial from collections import defaultdict import json +import cv2 __dir__ = os.path.dirname(os.path.abspath(__file__)) @@ -1242,10 +1243,13 @@ class MainWindow(QMainWindow, WindowMixin): # if unicodeFilePath in self.mImgList: if unicodeFilePath and os.path.exists(unicodeFilePath): - self.imageData = read(unicodeFilePath, None) self.canvas.verified = False - image = QImage.fromData(self.imageData) + cvimg = cv2.imdecode(np.fromfile(unicodeFilePath, dtype=np.uint8), 1) + height, width, depth = cvimg.shape + cvimg = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB) + image = QImage(cvimg.data, width, height, width * depth, QImage.Format_RGB888) + if image.isNull(): self.errorMessage(u'Error opening file', u"

Make sure %s is a valid image file." % unicodeFilePath) diff --git a/PPOCRLabel/libs/autoDialog.py b/PPOCRLabel/libs/autoDialog.py index 09fbeb56..3374e92c 100644 --- a/PPOCRLabel/libs/autoDialog.py +++ b/PPOCRLabel/libs/autoDialog.py @@ -7,6 +7,8 @@ except ImportError: from PyQt4.QtCore import * import json +import cv2 +import numpy as np from libs.utils import newIcon @@ -34,11 +36,16 @@ class Worker(QThread): if self.handle == 0: self.listValue.emit(Imgpath) if self.model == 'paddle': - self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True) + h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape + if h > 32 and w > 32: + self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True) + else: + print('The size of', Imgpath, 'is too small to be recognised') + self.result_dic = None # 结果保存 if self.result_dic is None or len(self.result_dic) == 0: - print('Can not recognise file is : ', Imgpath) + print('Can not recognise file', Imgpath) pass else: strs = '' diff --git a/README_ch.md b/README_ch.md index 2de6fdf5..e2e96a6b 100755 --- a/README_ch.md +++ b/README_ch.md @@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 - 静态图版本:develop分支 **近期更新** -- 2021.1.18 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数152个,每周一都会更新,欢迎大家持续关注。 +- 2021.1.25 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数157个,每周一都会更新,欢迎大家持续关注。 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。 - 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941 diff --git a/deploy/docker/hubserving/cpu/Dockerfile b/deploy/docker/hubserving/cpu/Dockerfile index 342f1e81..e46ca73b 100644 --- a/deploy/docker/hubserving/cpu/Dockerfile +++ b/deploy/docker/hubserving/cpu/Dockerfile @@ -1,11 +1,9 @@ -# Version: 1.0.0 -FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev +# Version: 2.0.0 +FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0rc1 # PaddleOCR base on Python3.7 RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple -RUN python3.7 -m pip install paddlepaddle==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple - RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR @@ -15,15 +13,15 @@ WORKDIR /PaddleOCR RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple RUN mkdir -p /PaddleOCR/inference/ -# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py) +# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/ -# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py) +# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ -# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py) +# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ diff --git a/deploy/docker/hubserving/gpu/Dockerfile b/deploy/docker/hubserving/gpu/Dockerfile index 222d053d..b7fa6f4c 100644 --- a/deploy/docker/hubserving/gpu/Dockerfile +++ b/deploy/docker/hubserving/gpu/Dockerfile @@ -1,11 +1,9 @@ -# Version: 1.0.0 -FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev +# Version: 2.0.0 +FROM egistry.baidubce.com/paddlepaddle/paddle:2.0.0rc1-gpu-cuda10.0-cudnn7 # PaddleOCR base on Python3.7 RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple -RUN python3.7 -m pip install paddlepaddle-gpu==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple - RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR @@ -15,15 +13,15 @@ WORKDIR /PaddleOCR RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple RUN mkdir -p /PaddleOCR/inference/ -# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py) +# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ -# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py) +# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/ -# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py) +# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py) ADD {link} /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md index 37b9834d..bb61689b 100755 --- a/doc/doc_ch/FAQ.md +++ b/doc/doc_ch/FAQ.md @@ -9,42 +9,43 @@ ## PaddleOCR常见问题汇总(持续更新) -* [近期更新(2021.1.18)](#近期更新) +* [近期更新(2021.1.25)](#近期更新) * [【精选】OCR精选10个问题](#OCR精选10个问题) * [【理论篇】OCR通用32个问题](#OCR通用问题) * [基础知识7题](#基础知识) * [数据集7题](#数据集2) * [模型训练调优18题](#模型训练调优2) -* [【实战篇】PaddleOCR实战110个问题](#PaddleOCR实战问题) - * [使用咨询36题](#使用咨询) +* [【实战篇】PaddleOCR实战115个问题](#PaddleOCR实战问题) + * [使用咨询38题](#使用咨询) * [数据集17题](#数据集3) * [模型训练调优28题](#模型训练调优3) - * [预测部署29题](#预测部署3) + * [预测部署32题](#预测部署3) -## 近期更新(2021.1.18) +## 近期更新(2021.1.25) +#### Q3.1.37: 小语种模型只有识别模型,没有检测模型吗? -#### Q2.3.18: 在PP-OCR系统中,文本检测的骨干网络为什么没有使用SE模块? +**A**:小语种(包括纯英文数字)的检测模型和中文的检测模型是共用的,在训练中文检测模型时加入了多语言数据。https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/models_list_en.md#1-text-detection-model。 -**A**:SE模块是MobileNetV3网络一个重要模块,目的是估计特征图每个特征通道重要性,给特征图每个特征分配权重,提高网络的表达能力。但是,对于文本检测,输入网络的分辨率比较大,一般是640\*640,利用SE模块估计特征图每个特征通道重要性比较困难,网络提升能力有限,但是该模块又比较耗时,因此在PP-OCR系统中,文本检测的骨干网络没有使用SE模块。实验也表明,当去掉SE模块,超轻量模型大小可以减小40%,文本检测效果基本不受影响。详细可以参考PP-OCR技术文章,https://arxiv.org/abs/2009.09941. +#### Q3.1.38: module 'paddle.distributed' has no attribute ‘get_rank’。 -#### Q3.3.27: PaddleOCR关于文本识别模型的训练,支持的数据增强方式有哪些? +**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0rc1。 -**A**:文本识别支持的数据增强方式有随机小幅度裁剪、图像平衡、添加白噪声、颜色漂移、图像反色和Text Image Augmentation(TIA)变换等。可以参考[代码](../../ppocr/data/imaug/rec_img_aug.py)中的warp函数。 +#### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署? -#### Q3.3.28: 关于dygraph分支中,文本识别模型训练,要使用数据增强应该如何设置? +**A**:目前Paddle的预测库是支持华为鲲鹏920CPU的,但是OCR还没在这些芯片上测试过,可以自己调试,有问题反馈给我们。 -**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段,使数据增强生效。可以通过添加对aug_prob设置,表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性,默认不采用,可以通过添加use_tia设置,使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744)。 +#### Q3.4.31: 采用Paddle-Lite进行端侧部署,出现问题,环境没问题。 -#### Q3.4.28: PP-OCR系统中,文本检测的结果有置信度吗? +**A**:如果你的预测库是自己编译的,那么你的nb文件也要自己编译,用同一个lite版本。不能直接用下载的nb文件,因为版本不同。 -**A**:文本检测的结果有置信度,由于推理过程中没有使用,所以没有显示的返回到最终结果中。如果需要文本检测结果的置信度,可以在[文本检测DB的后处理代码](../../ppocr/postprocess/db_postprocess.py)的155行,添加scores信息。这样,在[检测预测代码](../../tools/infer/predict_det.py)的197行,就可以拿到文本检测的scores信息。 +#### Q3.4.32: PaddleOCR的模型支持onnx转换吗? -#### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿? - -**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。 +**A**:我们目前已经通过Paddle2ONNX来支持各模型套件的转换,PaddleOCR基于PaddlePaddle 2.0的版本(dygraph分支)已经支持导出为ONNX,欢迎关注Paddle2ONNX,了解更多项目的进展: +Paddle2ONNX项目:https://github.com/PaddlePaddle/Paddle2ONNX +Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr) ## 【精选】OCR精选10个问题 @@ -474,9 +475,18 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信 例如识别身份证照片,可以先匹配"姓名","性别"等关键字,根据这些关键字的坐标去推测其他信息的位置,再与识别的结果匹配。 #### Q3.1.36 如何识别竹简上的古文? + **A**:对于字符都是普通的汉字字符的情况,只要标注足够的数据,finetune模型就可以了。如果数据量不足,您可以尝试StyleText工具。 而如果使用的字符是特殊的古文字、甲骨文、象形文字等,那么首先需要构建一个古文字的字典,之后再进行训练。 +#### Q3.1.37: 小语种模型只有识别模型,没有检测模型吗? + +**A**:小语种(包括纯英文数字)的检测模型和中文的检测模型是共用的,在训练中文检测模型时加入了多语言数据。https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/models_list_en.md#1-text-detection-model。 + +#### Q3.1.38: module 'paddle.distributed' has no attribute ‘get_rank’。 + +**A**:Paddle版本问题,请安装2.0版本Paddle:pip install paddlepaddle==2.0.0rc1。 + ### 数据集 @@ -854,3 +864,17 @@ img = cv.imdecode(img_array, -1) #### Q3.4.29: DB文本检测,特征提取网络金字塔构建的部分代码在哪儿? **A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码,其中architectures是文本检测或者文本识别整体流程代码;backbones是骨干网络相关代码;necks是类似与FPN的颈函数代码;heads是提取文本检测或者文本识别预测结果相关的头函数;transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。 + +#### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署? + +**A**:目前Paddle的预测库是支持华为鲲鹏920CPU的,但是OCR还没在这些芯片上测试过,可以自己调试,有问题反馈给我们。 + +#### Q3.4.31: 采用Paddle-Lite进行端侧部署,出现问题,环境没问题。 + +**A**:如果你的预测库是自己编译的,那么你的nb文件也要自己编译,用同一个lite版本。不能直接用下载的nb文件,因为版本不同。 + +#### Q3.4.32: PaddleOCR的模型支持onnx转换吗? + +**A**:我们目前已经通过Paddle2ONNX来支持各模型套件的转换,PaddleOCR基于PaddlePaddle 2.0的版本(dygraph分支)已经支持导出为ONNX,欢迎关注Paddle2ONNX,了解更多项目的进展: +Paddle2ONNX项目:https://github.com/PaddlePaddle/Paddle2ONNX +Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr) \ No newline at end of file diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 8cebce3a..59d1bc8c 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -14,11 +14,10 @@ PaddleOCR开源的文本检测算法列表: - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] 在ICDAR2015文本检测公开数据集上,算法效果如下: - |模型|骨干网络|precision|recall|Hmean|下载链接| | --- | --- | --- | --- | --- | --- | -|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| -|EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| +|EAST|ResNet50_vd|85.80%|86.71%|86.25%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|MobileNetV3|79.42%|80.64%|80.03%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| @@ -40,17 +39,19 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训 PaddleOCR基于动态图开源的文本识别算法列表: - [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐) - [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] -- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon +- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] - [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon - [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon 参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: |模型|骨干网络|Avg Accuracy|模型存储命名|下载链接| -|-|-|-|-|-| +|---|---|---|---|---| |Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)| |Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)| |CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| |CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| +|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)| +|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)| PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。 diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index 7e372c64..c4601e15 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -352,10 +352,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982] ``` # 使用方向分类器 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true # 不使用方向分类器 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false ``` @@ -364,7 +364,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model 执行命令后,识别结果图像如下: -![](../imgs_results/2.jpg) +![](../imgs_results/system_res_00018069.jpg) ### 2. 其他模型推理 @@ -381,4 +381,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d 执行命令后,识别结果图像如下: -(coming soon) +![](../imgs_results/img_10_east_starnet.jpg) diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md index b2b07b9d..fbfb3838 100644 --- a/doc/doc_ch/models_list.md +++ b/doc/doc_ch/models_list.md @@ -1,4 +1,4 @@ -## OCR模型列表(V2.0,2020年12月12日更新) +## OCR模型列表(V2.0,2021年1月20日更新) **说明** :2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。 - [一、文本检测模型](#文本检测模型) @@ -22,7 +22,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 |模型名称|模型简介|配置文件|推理模型大小|下载地址| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon)| +|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / 训练模型 (coming soon)| |ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| @@ -35,7 +35,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 |模型名称|模型简介|配置文件|推理模型大小|下载地址| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon) | +|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) | |ch_ppocr_mobile_v2.0_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | @@ -46,7 +46,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 |模型名称|模型简介|配置文件|推理模型大小|下载地址| | --- | --- | --- | --- | --- | -|en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | 推理模型 (coming soon) / slim模型 (coming soon) | +|en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) | |en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | @@ -55,7 +55,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 **说明:** 新增的多语言模型的配置文件通过代码方式生成,您可以通过`--help`参数查看当前PaddleOCR支持生成哪些多语言的配置文件: ```bash # 该代码需要在指定目录运行 -cd PaddleOCR/configs/rec/multi_language/ +cd {your/path/}PaddleOCR/configs/rec/multi_language/ python3 generate_multi_language_configs.py --help ``` 下面以生成意大利语配置文件为例: @@ -64,7 +64,7 @@ python3 generate_multi_language_configs.py --help 如果您仅仅想用配置文件测试PaddleOCR提供的多语言模型可以通过下面命令生成默认的配置文件,使用PaddleOCR提供的小语种字典进行预测。 ```bash # 该代码需要在指定目录运行 -cd PaddleOCR/configs/rec/multi_language/ +cd {your/path/}PaddleOCR/configs/rec/multi_language/ # 通过-l或者--language参数设置需要生成的语种的配置文件,该命令会将默认参数写入配置文件 python3 generate_multi_language_configs.py -l it ``` @@ -77,6 +77,8 @@ python3 generate_multi_language_configs.py -l it 使用以下命令生成配置文件: ```bash +# 该代码需要在指定目录运行 +cd {your/path/}PaddleOCR/configs/rec/multi_language/ # -l或者--language字段是必须的 # --train修改训练集,--val修改验证集,--data_dir修改数据集目录,-o修改对应默认参数 # --dict命令改变字典路径,示例使用默认字典路径则该参数可不填 @@ -112,7 +114,7 @@ python3 generate_multi_language_configs.py -l it \ | uk_mobile_v2.0_rec |乌克兰文识别|rec_uk_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) | | be_mobile_v2.0_rec |白俄罗斯文识别|rec_be_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) | | te_mobile_v2.0_rec |泰卢固文识别|rec_te_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | -| ka_mobile_v2.0_rec |卡纳达文识别|[rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | +| ka_mobile_v2.0_rec |卡纳达文识别|rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | | ta_mobile_v2.0_rec |泰米尔文识别|rec_ta_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | @@ -121,5 +123,5 @@ python3 generate_multi_language_configs.py -l it \ |模型名称|模型简介|配置文件|推理模型大小|下载地址| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |推理模型 (coming soon) / 训练模型 / slim模型 | +|ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) | |ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index f2349a1c..68bfd529 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -19,8 +19,8 @@ On the ICDAR2015 dataset, the text detection result is as follows: |Model|Backbone|precision|recall|Hmean|Download link| | --- | --- | --- | --- | --- | --- | -|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| -|EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| +|EAST|ResNet50_vd|85.80%|86.71%|86.25%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|MobileNetV3|79.42%|80.64%|80.03%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| @@ -41,17 +41,19 @@ For the training guide and use of PaddleOCR text detection algorithms, please re PaddleOCR open-source text recognition algorithms list: - [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7] - [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] -- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon +- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] - [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon - [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow: |Model|Backbone|Avg Accuracy|Module combination|Download link| -|-|-|-|-|-| +|---|---|---|---|---| |Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)| |Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)| |CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| |CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| +|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)| +|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)| Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md) diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index 4cafb61d..d2913707 100755 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -366,15 +366,15 @@ When performing prediction, you need to specify the path of a single image or a ``` # use direction classifier -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true # not use use direction classifier -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" ``` After executing the command, the recognition result image is as follows: -![](../imgs_results/2.jpg) +![](../imgs_results/system_res_00018069.jpg) ### 2. OTHER MODELS @@ -391,4 +391,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d After executing the command, the recognition result image is as follows: -(coming soon) +![](../imgs_results/img_10_east_starnet.jpg) diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index 578badc1..3eb0cd23 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -33,7 +33,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |inference model (coming soon) / slim model (coming soon) | +|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) | |ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | @@ -45,7 +45,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |inference model (coming soon ) / slim model (coming soon) | +|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) | |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | @@ -54,6 +54,8 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine **Note:** The configuration file of the new multi language model is generated by code. You can use the `--help` parameter to check which multi language are supported by current PaddleOCR. ```bash +# The code needs to run in the specified directory +cd {your/path/}PaddleOCR/configs/rec/multi_language/ python3 generate_multi_language_configs.py --help ``` @@ -62,7 +64,7 @@ Take the Italian configuration file as an example: you can generate the default configuration file through the following command, and use the default language dictionary provided by paddleocr for prediction. ```bash # The code needs to run in the specified directory -cd PaddleOCR/configs/rec/multi_language/ +cd {your/path/}PaddleOCR/configs/rec/multi_language/ # Set the required language configuration file through -l or --language parameter # This command will write the default parameter to the configuration file. python3 generate_multi_language_configs.py -l it @@ -74,6 +76,8 @@ If you want to train your own model, you can prepare the training set file, veri - Use the default dictionary provided by paddleocr:{your/path/}PaddleOCR/ppocr/utils/dict/it_dict.txt - Training data path:{your/path/}PaddleOCR/train_data ```bash +# The code needs to run in the specified directory +cd {your/path/}PaddleOCR/configs/rec/multi_language/ # The -l or --language parameter is required # --train modify train_list path # --val modify eval_list path @@ -92,27 +96,27 @@ python3 generate_multi_language_configs.py -l it \ | german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | | korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | -| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) | -| xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) | -| pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) | -| ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) | -| ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) | -| hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) | -| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | -| ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) | -| fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) | -| ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) | -| rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) | -| oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) | -| mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) | -| ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) | -| rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) | -| bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) | -| uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) | -| be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) | -| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | -| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|[rec_ka_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | -| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | +| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) | +| xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) | +| pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) | +| ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) | +| ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) | +| hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) | +| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | +| ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) | +| fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) | +| ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) | +| rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) | +| oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) | +| mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) | +| ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) | +| rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) | +| bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) | +| uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) | +| be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) | +| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | +| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | +| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | @@ -120,6 +124,5 @@ python3 generate_multi_language_configs.py -l it \ |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |inference model (coming soon) / trained model / slim model| +|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_train.tar) | |ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | - diff --git a/doc/fonts/arabic.ttf b/doc/fonts/arabic.ttf new file mode 100644 index 00000000..064b6041 Binary files /dev/null and b/doc/fonts/arabic.ttf differ diff --git a/doc/fonts/chinese_cht.TTF b/doc/fonts/chinese_cht.TTF new file mode 100644 index 00000000..fae13bc2 Binary files /dev/null and b/doc/fonts/chinese_cht.TTF differ diff --git a/doc/fonts/cyrillic.ttf b/doc/fonts/cyrillic.ttf new file mode 100644 index 00000000..be4bf660 Binary files /dev/null and b/doc/fonts/cyrillic.ttf differ diff --git a/doc/french.ttf b/doc/fonts/french.ttf similarity index 100% rename from doc/french.ttf rename to doc/fonts/french.ttf diff --git a/doc/german.ttf b/doc/fonts/german.ttf similarity index 100% rename from doc/german.ttf rename to doc/fonts/german.ttf diff --git a/doc/fonts/hindi.ttf b/doc/fonts/hindi.ttf new file mode 100644 index 00000000..8b0c36f5 Binary files /dev/null and b/doc/fonts/hindi.ttf differ diff --git a/doc/japan.ttc b/doc/fonts/japan.ttc similarity index 100% rename from doc/japan.ttc rename to doc/fonts/japan.ttc diff --git a/doc/fonts/kannada.ttf b/doc/fonts/kannada.ttf new file mode 100644 index 00000000..43b60d42 Binary files /dev/null and b/doc/fonts/kannada.ttf differ diff --git a/doc/korean.ttf b/doc/fonts/korean.ttf similarity index 100% rename from doc/korean.ttf rename to doc/fonts/korean.ttf diff --git a/doc/fonts/latin.ttf b/doc/fonts/latin.ttf new file mode 100644 index 00000000..e392413a Binary files /dev/null and b/doc/fonts/latin.ttf differ diff --git a/doc/fonts/marathi.ttf b/doc/fonts/marathi.ttf new file mode 100644 index 00000000..a796d3ed Binary files /dev/null and b/doc/fonts/marathi.ttf differ diff --git a/doc/fonts/nepali.ttf b/doc/fonts/nepali.ttf new file mode 100644 index 00000000..8b0c36f5 Binary files /dev/null and b/doc/fonts/nepali.ttf differ diff --git a/doc/fonts/persian.ttf b/doc/fonts/persian.ttf new file mode 100644 index 00000000..bdb1c8d7 Binary files /dev/null and b/doc/fonts/persian.ttf differ diff --git a/doc/simfang.ttf b/doc/fonts/simfang.ttf similarity index 100% rename from doc/simfang.ttf rename to doc/fonts/simfang.ttf diff --git a/doc/fonts/spanish.ttf b/doc/fonts/spanish.ttf new file mode 100644 index 00000000..532353d2 Binary files /dev/null and b/doc/fonts/spanish.ttf differ diff --git a/doc/fonts/tamil.ttf b/doc/fonts/tamil.ttf new file mode 100644 index 00000000..2e9998e8 Binary files /dev/null and b/doc/fonts/tamil.ttf differ diff --git a/doc/fonts/telugu.ttf b/doc/fonts/telugu.ttf new file mode 100644 index 00000000..12c91e41 Binary files /dev/null and b/doc/fonts/telugu.ttf differ diff --git a/doc/fonts/urdu.ttf b/doc/fonts/urdu.ttf new file mode 100644 index 00000000..625feee2 Binary files /dev/null and b/doc/fonts/urdu.ttf differ diff --git a/doc/fonts/uyghur.ttf b/doc/fonts/uyghur.ttf new file mode 100644 index 00000000..625feee2 Binary files /dev/null and b/doc/fonts/uyghur.ttf differ diff --git a/doc/imgs_results/img_10_east_starnet.jpg b/doc/imgs_results/img_10_east_starnet.jpg new file mode 100644 index 00000000..fd8c0392 Binary files /dev/null and b/doc/imgs_results/img_10_east_starnet.jpg differ diff --git a/doc/imgs_results/system_res_00018069.jpg b/doc/imgs_results/system_res_00018069.jpg new file mode 100644 index 00000000..fc06b050 Binary files /dev/null and b/doc/imgs_results/system_res_00018069.jpg differ diff --git a/doc/imgs/arabic_1.jpg b/doc/imgs_words/arabic/ar_1.jpg similarity index 100% rename from doc/imgs/arabic_1.jpg rename to doc/imgs_words/arabic/ar_1.jpg diff --git a/doc/imgs/arabic_2.jpg b/doc/imgs_words/arabic/ar_2.jpg similarity index 100% rename from doc/imgs/arabic_2.jpg rename to doc/imgs_words/arabic/ar_2.jpg diff --git a/doc/imgs/be_1.jpg b/doc/imgs_words/belarusian/be_1.jpg similarity index 100% rename from doc/imgs/be_1.jpg rename to doc/imgs_words/belarusian/be_1.jpg diff --git a/doc/imgs/be_2.jpg b/doc/imgs_words/belarusian/be_2.jpg similarity index 100% rename from doc/imgs/be_2.jpg rename to doc/imgs_words/belarusian/be_2.jpg diff --git a/doc/imgs/bg_1.jpg b/doc/imgs_words/bulgarian/bg_1.jpg similarity index 100% rename from doc/imgs/bg_1.jpg rename to doc/imgs_words/bulgarian/bg_1.jpg diff --git a/doc/imgs/bg_2.jpg b/doc/imgs_words/bulgarian/bg_2.jpg similarity index 100% rename from doc/imgs/bg_2.jpg rename to doc/imgs_words/bulgarian/bg_2.jpg diff --git a/doc/imgs/chinese_cht_1.png b/doc/imgs_words/chinese_traditional/chinese_cht_1.png similarity index 100% rename from doc/imgs/chinese_cht_1.png rename to doc/imgs_words/chinese_traditional/chinese_cht_1.png diff --git a/doc/imgs/chinese_cht_2.png b/doc/imgs_words/chinese_traditional/chinese_cht_2.png similarity index 100% rename from doc/imgs/chinese_cht_2.png rename to doc/imgs_words/chinese_traditional/chinese_cht_2.png diff --git a/doc/imgs/hi_1.jpg b/doc/imgs_words/hindi/hi_1.jpg similarity index 100% rename from doc/imgs/hi_1.jpg rename to doc/imgs_words/hindi/hi_1.jpg diff --git a/doc/imgs/hi_2.jpg b/doc/imgs_words/hindi/hi_2.jpg similarity index 100% rename from doc/imgs/hi_2.jpg rename to doc/imgs_words/hindi/hi_2.jpg diff --git a/doc/imgs/it_1.jpg b/doc/imgs_words/italian/it_1.jpg similarity index 100% rename from doc/imgs/it_1.jpg rename to doc/imgs_words/italian/it_1.jpg diff --git a/doc/imgs/it_2.jpg b/doc/imgs_words/italian/it_2.jpg similarity index 100% rename from doc/imgs/it_2.jpg rename to doc/imgs_words/italian/it_2.jpg diff --git a/doc/imgs/ka_1.jpg b/doc/imgs_words/kannada/ka_1.jpg similarity index 100% rename from doc/imgs/ka_1.jpg rename to doc/imgs_words/kannada/ka_1.jpg diff --git a/doc/imgs/ka_2.jpg b/doc/imgs_words/kannada/ka_2.jpg similarity index 100% rename from doc/imgs/ka_2.jpg rename to doc/imgs_words/kannada/ka_2.jpg diff --git a/doc/imgs/mr_1.jpg b/doc/imgs_words/marathi/mr_1.jpg similarity index 100% rename from doc/imgs/mr_1.jpg rename to doc/imgs_words/marathi/mr_1.jpg diff --git a/doc/imgs/mr_2.jpg b/doc/imgs_words/marathi/mr_2.jpg similarity index 100% rename from doc/imgs/mr_2.jpg rename to doc/imgs_words/marathi/mr_2.jpg diff --git a/doc/imgs/ne_1.jpg b/doc/imgs_words/nepali/ne_1.jpg similarity index 100% rename from doc/imgs/ne_1.jpg rename to doc/imgs_words/nepali/ne_1.jpg diff --git a/doc/imgs/ne_2.jpg b/doc/imgs_words/nepali/ne_2.jpg similarity index 100% rename from doc/imgs/ne_2.jpg rename to doc/imgs_words/nepali/ne_2.jpg diff --git a/doc/imgs/oc_1.jpg b/doc/imgs_words/occitan/oc_1.jpg similarity index 100% rename from doc/imgs/oc_1.jpg rename to doc/imgs_words/occitan/oc_1.jpg diff --git a/doc/imgs/oc_2.jpg b/doc/imgs_words/occitan/oc_2.jpg similarity index 100% rename from doc/imgs/oc_2.jpg rename to doc/imgs_words/occitan/oc_2.jpg diff --git a/doc/imgs/fa_1.jpg b/doc/imgs_words/persian/fa_1.jpg similarity index 100% rename from doc/imgs/fa_1.jpg rename to doc/imgs_words/persian/fa_1.jpg diff --git a/doc/imgs/fa_2.jpg b/doc/imgs_words/persian/fa_2.jpg similarity index 100% rename from doc/imgs/fa_2.jpg rename to doc/imgs_words/persian/fa_2.jpg diff --git a/doc/imgs/pu_1.jpg b/doc/imgs_words/portuguese/pu_1.jpg similarity index 100% rename from doc/imgs/pu_1.jpg rename to doc/imgs_words/portuguese/pu_1.jpg diff --git a/doc/imgs/pu_2.jpg b/doc/imgs_words/portuguese/pu_2.jpg similarity index 100% rename from doc/imgs/pu_2.jpg rename to doc/imgs_words/portuguese/pu_2.jpg diff --git a/doc/imgs/ru_1.jpg b/doc/imgs_words/russia/ru_1.jpg similarity index 100% rename from doc/imgs/ru_1.jpg rename to doc/imgs_words/russia/ru_1.jpg diff --git a/doc/imgs/ru_2.jpg b/doc/imgs_words/russia/ru_2.jpg similarity index 100% rename from doc/imgs/ru_2.jpg rename to doc/imgs_words/russia/ru_2.jpg diff --git a/doc/imgs/rsc_1.jpg b/doc/imgs_words/serbian_cyrillic/rsc_1.jpg similarity index 100% rename from doc/imgs/rsc_1.jpg rename to doc/imgs_words/serbian_cyrillic/rsc_1.jpg diff --git a/doc/imgs/rsc_2.jpg b/doc/imgs_words/serbian_cyrillic/rsc_2.jpg similarity index 100% rename from doc/imgs/rsc_2.jpg rename to doc/imgs_words/serbian_cyrillic/rsc_2.jpg diff --git a/doc/imgs/rs_1.jpg b/doc/imgs_words/serbian_latin/rs_1.jpg similarity index 100% rename from doc/imgs/rs_1.jpg rename to doc/imgs_words/serbian_latin/rs_1.jpg diff --git a/doc/imgs/rs_2.jpg b/doc/imgs_words/serbian_latin/rs_2.jpg similarity index 100% rename from doc/imgs/rs_2.jpg rename to doc/imgs_words/serbian_latin/rs_2.jpg diff --git a/doc/imgs/xi_1.jpg b/doc/imgs_words/spanish/xi_1.jpg similarity index 100% rename from doc/imgs/xi_1.jpg rename to doc/imgs_words/spanish/xi_1.jpg diff --git a/doc/imgs/xi_2.jpg b/doc/imgs_words/spanish/xi_2.jpg similarity index 100% rename from doc/imgs/xi_2.jpg rename to doc/imgs_words/spanish/xi_2.jpg diff --git a/doc/imgs/ta_1.jpg b/doc/imgs_words/tamil/ta_1.jpg similarity index 100% rename from doc/imgs/ta_1.jpg rename to doc/imgs_words/tamil/ta_1.jpg diff --git a/doc/imgs/ta_2.jpg b/doc/imgs_words/tamil/ta_2.jpg similarity index 100% rename from doc/imgs/ta_2.jpg rename to doc/imgs_words/tamil/ta_2.jpg diff --git a/doc/imgs/te_1.jpg b/doc/imgs_words/telugu/te_1.jpg similarity index 100% rename from doc/imgs/te_1.jpg rename to doc/imgs_words/telugu/te_1.jpg diff --git a/doc/imgs/te_2.jpg b/doc/imgs_words/telugu/te_2.jpg similarity index 100% rename from doc/imgs/te_2.jpg rename to doc/imgs_words/telugu/te_2.jpg diff --git a/doc/imgs/uk_1.jpg b/doc/imgs_words/ukranian/uk_1.jpg similarity index 100% rename from doc/imgs/uk_1.jpg rename to doc/imgs_words/ukranian/uk_1.jpg diff --git a/doc/imgs/uk_2.jpg b/doc/imgs_words/ukranian/uk_2.jpg similarity index 100% rename from doc/imgs/uk_2.jpg rename to doc/imgs_words/ukranian/uk_2.jpg diff --git a/doc/imgs/ur_1.jpg b/doc/imgs_words/urdu/ur_1.jpg similarity index 100% rename from doc/imgs/ur_1.jpg rename to doc/imgs_words/urdu/ur_1.jpg diff --git a/doc/imgs/ur_2.jpg b/doc/imgs_words/urdu/ur_2.jpg similarity index 100% rename from doc/imgs/ur_2.jpg rename to doc/imgs_words/urdu/ur_2.jpg diff --git a/doc/imgs/ug_1.jpg b/doc/imgs_words/uyghur/ug_1.jpg similarity index 100% rename from doc/imgs/ug_1.jpg rename to doc/imgs_words/uyghur/ug_1.jpg diff --git a/doc/imgs/ug_2.jpg b/doc/imgs_words/uyghur/ug_2.jpg similarity index 100% rename from doc/imgs/ug_2.jpg rename to doc/imgs_words/uyghur/ug_2.jpg diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 317877a2..ee505a0c 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/paddleocr.py b/paddleocr.py index 3c3c47ab..db24aa59 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -290,7 +290,9 @@ class PaddleOCR(predict_system.TextSystem): image_file = img img, flag = check_and_read_gif(image_file) if not flag: - img = cv2.imread(image_file) + with open(image_file, 'rb') as f: + np_arr = np.frombuffer(f.read(), dtype=np.uint8) + img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) if img is None: logger.error("error in loading image:{}".format(image_file)) return None diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py index 4e1ff0ae..ea27a865 100644 --- a/ppocr/data/__init__.py +++ b/ppocr/data/__init__.py @@ -51,7 +51,7 @@ signal.signal(signal.SIGINT, term_mp) signal.signal(signal.SIGTERM, term_mp) -def build_dataloader(config, mode, device, logger): +def build_dataloader(config, mode, device, logger, seed=None): config = copy.deepcopy(config) support_dict = ['SimpleDataSet', 'LMDBDateSet'] @@ -61,7 +61,7 @@ def build_dataloader(config, mode, device, logger): assert mode in ['Train', 'Eval', 'Test' ], "Mode should be Train, Eval or Test." - dataset = eval(module_name)(config, mode, logger) + dataset = eval(module_name)(config, mode, logger, seed) loader_config = config[mode]['loader'] batch_size = loader_config['batch_size_per_card'] drop_last = loader_config['drop_last'] diff --git a/ppocr/data/imaug/sast_process.py b/ppocr/data/imaug/sast_process.py index b8d6ff89..1536dceb 100644 --- a/ppocr/data/imaug/sast_process.py +++ b/ppocr/data/imaug/sast_process.py @@ -24,11 +24,11 @@ __all__ = ['SASTProcessTrain'] class SASTProcessTrain(object): def __init__(self, - image_shape = [512, 512], - min_crop_size = 24, - min_crop_side_ratio = 0.3, - min_text_size = 10, - max_text_size = 512, + image_shape=[512, 512], + min_crop_size=24, + min_crop_side_ratio=0.3, + min_text_size=10, + max_text_size=512, **kwargs): self.input_size = image_shape[1] self.min_crop_size = min_crop_size @@ -42,12 +42,10 @@ class SASTProcessTrain(object): :param poly: :return: """ - edge = [ - (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1]) - ] + edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), + (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), + (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), + (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] return np.sum(edge) / 2. def gen_quad_from_poly(self, poly): @@ -57,7 +55,8 @@ class SASTProcessTrain(object): point_num = poly.shape[0] min_area_quad = np.zeros((4, 2), dtype=np.float32) if True: - rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation) + rect = cv2.minAreaRect(poly.astype( + np.int32)) # (center (x,y), (width, height), angle of rotation) center_point = rect[0] box = np.array(cv2.boxPoints(rect)) @@ -102,23 +101,33 @@ class SASTProcessTrain(object): if p_area > 0: if tag == False: print('poly in wrong direction') - tag = True # reversed cases should be ignore - poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1), :] + tag = True # reversed cases should be ignore + poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, + 1), :] quad = quad[(0, 3, 2, 1), :] - len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - quad[2]) - len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2]) + len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - + quad[2]) + len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - + quad[2]) hv_tag = 1 - - if len_w * 2.0 < len_h: + + if len_w * 2.0 < len_h: hv_tag = 0 validated_polys.append(poly) validated_tags.append(tag) hv_tags.append(hv_tag) - return np.array(validated_polys), np.array(validated_tags), np.array(hv_tags) + return np.array(validated_polys), np.array(validated_tags), np.array( + hv_tags) - def crop_area(self, im, polys, tags, hv_tags, crop_background=False, max_tries=25): + def crop_area(self, + im, + polys, + tags, + hv_tags, + crop_background=False, + max_tries=25): """ make random crop from the input image :param im: @@ -137,10 +146,10 @@ class SASTProcessTrain(object): poly = np.round(poly, decimals=0).astype(np.int32) minx = np.min(poly[:, 0]) maxx = np.max(poly[:, 0]) - w_array[minx + pad_w: maxx + pad_w] = 1 + w_array[minx + pad_w:maxx + pad_w] = 1 miny = np.min(poly[:, 1]) maxy = np.max(poly[:, 1]) - h_array[miny + pad_h: maxy + pad_h] = 1 + h_array[miny + pad_h:maxy + pad_h] = 1 # ensure the cropped area not across a text h_axis = np.where(h_array == 0)[0] w_axis = np.where(w_array == 0)[0] @@ -166,17 +175,18 @@ class SASTProcessTrain(object): if polys.shape[0] != 0: poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) - selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0] + selected_polys = np.where( + np.sum(poly_axis_in_area, axis=1) == 4)[0] else: selected_polys = [] if len(selected_polys) == 0: # no text in this area if crop_background: return im[ymin : ymax + 1, xmin : xmax + 1, :], \ - polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts + polys[selected_polys], tags[selected_polys], hv_tags[selected_polys] else: continue - im = im[ymin: ymax + 1, xmin: xmax + 1, :] + im = im[ymin:ymax + 1, xmin:xmax + 1, :] polys = polys[selected_polys] tags = tags[selected_polys] hv_tags = hv_tags[selected_polys] @@ -192,18 +202,28 @@ class SASTProcessTrain(object): width_list = [] height_list = [] for quad in poly_quads: - quad_w = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0 - quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0 + quad_w = (np.linalg.norm(quad[0] - quad[1]) + + np.linalg.norm(quad[2] - quad[3])) / 2.0 + quad_h = (np.linalg.norm(quad[0] - quad[3]) + + np.linalg.norm(quad[2] - quad[1])) / 2.0 width_list.append(quad_w) height_list.append(quad_h) - norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0) + norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0) average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0) for quad in poly_quads: - direct_vector_full = ((quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 - direct_vector = direct_vector_full / (np.linalg.norm(direct_vector_full) + 1e-6) * norm_width - direction_label = tuple(map(float, [direct_vector[0], direct_vector[1], 1.0 / (average_height + 1e-6)])) - cv2.fillPoly(direction_map, quad.round().astype(np.int32)[np.newaxis, :, :], direction_label) + direct_vector_full = ( + (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 + direct_vector = direct_vector_full / ( + np.linalg.norm(direct_vector_full) + 1e-6) * norm_width + direction_label = tuple( + map(float, [ + direct_vector[0], direct_vector[1], 1.0 / (average_height + + 1e-6) + ])) + cv2.fillPoly(direction_map, + quad.round().astype(np.int32)[np.newaxis, :, :], + direction_label) return direction_map def calculate_average_height(self, poly_quads): @@ -211,13 +231,19 @@ class SASTProcessTrain(object): """ height_list = [] for quad in poly_quads: - quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0 + quad_h = (np.linalg.norm(quad[0] - quad[3]) + + np.linalg.norm(quad[2] - quad[1])) / 2.0 height_list.append(quad_h) average_height = max(sum(height_list) / len(height_list), 1.0) return average_height - def generate_tcl_label(self, hw, polys, tags, ds_ratio, - tcl_ratio=0.3, shrink_ratio_of_width=0.15): + def generate_tcl_label(self, + hw, + polys, + tags, + ds_ratio, + tcl_ratio=0.3, + shrink_ratio_of_width=0.15): """ Generate polygon. """ @@ -225,21 +251,30 @@ class SASTProcessTrain(object): h, w = int(h * ds_ratio), int(w * ds_ratio) polys = polys * ds_ratio - score_map = np.zeros((h, w,), dtype=np.float32) + score_map = np.zeros( + ( + h, + w, ), dtype=np.float32) tbo_map = np.zeros((h, w, 5), dtype=np.float32) - training_mask = np.ones((h, w,), dtype=np.float32) - direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape([1, 1, 3]).astype(np.float32) + training_mask = np.ones( + ( + h, + w, ), dtype=np.float32) + direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape( + [1, 1, 3]).astype(np.float32) for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] + poly = poly_tag[0] tag = poly_tag[1] # generate min_area_quad min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) + min_area_quad_h = 0.5 * ( + np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + + np.linalg.norm(min_area_quad[1] - min_area_quad[2])) + min_area_quad_w = 0.5 * ( + np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + + np.linalg.norm(min_area_quad[2] - min_area_quad[3])) if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: @@ -247,25 +282,37 @@ class SASTProcessTrain(object): if tag: # continue - cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0.15) + cv2.fillPoly(training_mask, + poly.astype(np.int32)[np.newaxis, :, :], 0.15) else: tcl_poly = self.poly2tcl(poly, tcl_ratio) tcl_quads = self.poly2quads(tcl_poly) poly_quads = self.poly2quads(poly) # stcl map - stcl_quads, quad_index = self.shrink_poly_along_width(tcl_quads, shrink_ratio_of_width=shrink_ratio_of_width, - expand_height_ratio=1.0 / tcl_ratio) + stcl_quads, quad_index = self.shrink_poly_along_width( + tcl_quads, + shrink_ratio_of_width=shrink_ratio_of_width, + expand_height_ratio=1.0 / tcl_ratio) # generate tcl map - cv2.fillPoly(score_map, np.round(stcl_quads).astype(np.int32), 1.0) + cv2.fillPoly(score_map, + np.round(stcl_quads).astype(np.int32), 1.0) # generate tbo map for idx, quad in enumerate(stcl_quads): quad_mask = np.zeros((h, w), dtype=np.float32) - quad_mask = cv2.fillPoly(quad_mask, np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) - tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], quad_mask, tbo_map) + quad_mask = cv2.fillPoly( + quad_mask, + np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) + tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], + quad_mask, tbo_map) return score_map, tbo_map, training_mask - def generate_tvo_and_tco(self, hw, polys, tags, tcl_ratio=0.3, ds_ratio=0.25): + def generate_tvo_and_tco(self, + hw, + polys, + tags, + tcl_ratio=0.3, + ds_ratio=0.25): """ Generate tcl map, tvo map and tbo map. """ @@ -297,35 +344,44 @@ class SASTProcessTrain(object): # generate min_area_quad min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) + min_area_quad_h = 0.5 * ( + np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + + np.linalg.norm(min_area_quad[1] - min_area_quad[2])) + min_area_quad_w = 0.5 * ( + np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + + np.linalg.norm(min_area_quad[2] - min_area_quad[3])) # generate tcl map and text, 128 * 128 tcl_poly = self.poly2tcl(poly, tcl_ratio) # generate poly_tv_xy_map for idx in range(4): - cv2.fillPoly(poly_tv_xy_map[2 * idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 0], 0), w))) - cv2.fillPoly(poly_tv_xy_map[2 * idx + 1], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 1], 0), h))) + cv2.fillPoly( + poly_tv_xy_map[2 * idx], + np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), + float(min(max(min_area_quad[idx, 0], 0), w))) + cv2.fillPoly( + poly_tv_xy_map[2 * idx + 1], + np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), + float(min(max(min_area_quad[idx, 1], 0), h))) # generate poly_tc_xy_map for idx in range(2): - cv2.fillPoly(poly_tc_xy_map[idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), float(center_point[idx])) + cv2.fillPoly( + poly_tc_xy_map[idx], + np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), + float(center_point[idx])) # generate poly_short_edge_map - cv2.fillPoly(poly_short_edge_map, - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(max(min(min_area_quad_h, min_area_quad_w), 1.0))) + cv2.fillPoly( + poly_short_edge_map, + np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), + float(max(min(min_area_quad_h, min_area_quad_w), 1.0))) # generate poly_mask and training_mask - cv2.fillPoly(poly_mask, np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), 1) + cv2.fillPoly(poly_mask, + np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), + 1) tvo_map *= poly_mask tvo_map[:8] -= poly_tv_xy_map @@ -356,7 +412,8 @@ class SASTProcessTrain(object): elif point_num > 4: vector_1 = poly[0] - poly[1] vector_2 = poly[1] - poly[2] - cos_theta = np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) + cos_theta = np.dot(vector_1, vector_2) / ( + np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) theta = np.arccos(np.round(cos_theta, decimals=4)) if abs(theta) > (70 / 180 * math.pi): @@ -374,7 +431,8 @@ class SASTProcessTrain(object): min_area_quad = poly center_point = np.sum(poly, axis=0) / 4 else: - rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation) + rect = cv2.minAreaRect(poly.astype( + np.int32)) # (center (x,y), (width, height), angle of rotation) center_point = rect[0] box = np.array(cv2.boxPoints(rect)) @@ -394,16 +452,23 @@ class SASTProcessTrain(object): return min_area_quad, center_point - def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.): + def shrink_quad_along_width(self, + quad, + begin_width_ratio=0., + end_width_ratio=1.): """ Generate shrink_quad_along_width. """ - ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32) + ratio_pair = np.array( + [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - def shrink_poly_along_width(self, quads, shrink_ratio_of_width, expand_height_ratio=1.0): + def shrink_poly_along_width(self, + quads, + shrink_ratio_of_width, + expand_height_ratio=1.0): """ shrink poly with given length. """ @@ -421,22 +486,28 @@ class SASTProcessTrain(object): upper_edge_list.append(upper_edge_len) # length of left edge and right edge. - left_length = np.linalg.norm(quads[0][0] - quads[0][3]) * expand_height_ratio - right_length = np.linalg.norm(quads[-1][1] - quads[-1][2]) * expand_height_ratio + left_length = np.linalg.norm(quads[0][0] - quads[0][ + 3]) * expand_height_ratio + right_length = np.linalg.norm(quads[-1][1] - quads[-1][ + 2]) * expand_height_ratio - shrink_length = min(left_length, right_length, sum(upper_edge_list)) * shrink_ratio_of_width + shrink_length = min(left_length, right_length, + sum(upper_edge_list)) * shrink_ratio_of_width # shrinking length upper_len_left = shrink_length upper_len_right = sum(upper_edge_list) - shrink_length left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) - left_quad = self.shrink_quad_along_width(quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) + left_quad = self.shrink_quad_along_width( + quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) - right_quad = self.shrink_quad_along_width(quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) - + right_quad = self.shrink_quad_along_width( + quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) + out_quad_list = [] if left_idx == right_idx: - out_quad_list.append([left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) + out_quad_list.append( + [left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) else: out_quad_list.append(left_quad) for idx in range(left_idx + 1, right_idx): @@ -500,7 +571,8 @@ class SASTProcessTrain(object): """ Generate center line by poly clock-wise point. (4, 2) """ - ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) + ratio_pair = np.array( + [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) @@ -509,12 +581,14 @@ class SASTProcessTrain(object): """ Generate center line by poly clock-wise point. """ - ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) + ratio_pair = np.array( + [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) tcl_poly = np.zeros_like(poly) point_num = poly.shape[0] for idx in range(point_num // 2): - point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]) * ratio_pair + point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx] + ) * ratio_pair tcl_poly[idx] = point_pair[0] tcl_poly[point_num - 1 - idx] = point_pair[1] return tcl_poly @@ -527,8 +601,10 @@ class SASTProcessTrain(object): up_line = self.line_cross_two_point(quad[0], quad[1]) lower_line = self.line_cross_two_point(quad[3], quad[2]) - quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) - quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) + quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + + np.linalg.norm(quad[1] - quad[2])) + quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + + np.linalg.norm(quad[2] - quad[3])) # average angle of left and right line. angle = self.average_angle(quad) @@ -565,7 +641,8 @@ class SASTProcessTrain(object): quad_num = point_num // 2 - 1 for idx in range(quad_num): # reshape and adjust to clock-wise - quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]).reshape(4, 2)[[0, 2, 3, 1]]) + quad_list.append((np.array(point_pair_list)[[idx, idx + 1]] + ).reshape(4, 2)[[0, 2, 3, 1]]) return np.array(quad_list) @@ -579,7 +656,8 @@ class SASTProcessTrain(object): return None h, w, _ = im.shape - text_polys, text_tags, hv_tags = self.check_and_validate_polys(text_polys, text_tags, (h, w)) + text_polys, text_tags, hv_tags = self.check_and_validate_polys( + text_polys, text_tags, (h, w)) if text_polys.shape[0] == 0: return None @@ -591,7 +669,7 @@ class SASTProcessTrain(object): if np.random.rand() < 0.5: asp_scale = 1.0 / asp_scale asp_scale = math.sqrt(asp_scale) - + asp_wx = asp_scale asp_hy = 1.0 / asp_scale im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) @@ -610,7 +688,7 @@ class SASTProcessTrain(object): #no background im, text_polys, text_tags, hv_tags = self.crop_area(im, \ text_polys, text_tags, hv_tags, crop_background=False) - + if text_polys.shape[0] == 0: return None #continue for all ignore case @@ -621,17 +699,18 @@ class SASTProcessTrain(object): return None #resize image std_ratio = float(self.input_size) / max(new_w, new_h) - rand_scales = np.array([0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) + rand_scales = np.array( + [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) rz_scale = std_ratio * np.random.choice(rand_scales) im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) text_polys[:, :, 0] *= rz_scale text_polys[:, :, 1] *= rz_scale - + #add gaussian blur if np.random.rand() < 0.1 * 0.5: ks = np.random.permutation(5)[0] + 1 - ks = int(ks/2)*2 + 1 - im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) + ks = int(ks / 2) * 2 + 1 + im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) #add brighter if np.random.rand() < 0.1 * 0.5: im = im * (1.0 + np.random.rand() * 0.5) @@ -640,13 +719,14 @@ class SASTProcessTrain(object): if np.random.rand() < 0.1 * 0.5: im = im * (1.0 - np.random.rand() * 0.5) im = np.clip(im, 0.0, 255.0) - + # Padding the im to [input_size, input_size] new_h, new_w, _ = im.shape if min(new_w, new_h) < self.input_size * 0.5: return None - im_padded = np.ones((self.input_size, self.input_size, 3), dtype=np.float32) + im_padded = np.ones( + (self.input_size, self.input_size, 3), dtype=np.float32) im_padded[:, :, 2] = 0.485 * 255 im_padded[:, :, 1] = 0.456 * 255 im_padded[:, :, 0] = 0.406 * 255 @@ -661,24 +741,29 @@ class SASTProcessTrain(object): sw = int(np.random.rand() * del_w) # Padding - im_padded[sh: sh + new_h, sw: sw + new_w, :] = im.copy() + im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy() text_polys[:, :, 0] += sw text_polys[:, :, 1] += sh - score_map, border_map, training_mask = self.generate_tcl_label((self.input_size, self.input_size), - text_polys, text_tags, 0.25) - + score_map, border_map, training_mask = self.generate_tcl_label( + (self.input_size, self.input_size), text_polys, text_tags, 0.25) + # SAST head - tvo_map, tco_map = self.generate_tvo_and_tco((self.input_size, self.input_size), text_polys, text_tags, tcl_ratio=0.3, ds_ratio=0.25) + tvo_map, tco_map = self.generate_tvo_and_tco( + (self.input_size, self.input_size), + text_polys, + text_tags, + tcl_ratio=0.3, + ds_ratio=0.25) # print("test--------tvo_map shape:", tvo_map.shape) im_padded[:, :, 2] -= 0.485 * 255 im_padded[:, :, 1] -= 0.456 * 255 im_padded[:, :, 0] -= 0.406 * 255 - im_padded[:, :, 2] /= (255.0 * 0.229) - im_padded[:, :, 1] /= (255.0 * 0.224) - im_padded[:, :, 0] /= (255.0 * 0.225) - im_padded = im_padded.transpose((2, 0, 1)) + im_padded[:, :, 2] /= (255.0 * 0.229) + im_padded[:, :, 1] /= (255.0 * 0.224) + im_padded[:, :, 0] /= (255.0 * 0.225) + im_padded = im_padded.transpose((2, 0, 1)) data['image'] = im_padded[::-1, :, :] data['score_map'] = score_map[np.newaxis, :, :] @@ -686,4 +771,4 @@ class SASTProcessTrain(object): data['training_mask'] = training_mask[np.newaxis, :, :] data['tvo_map'] = tvo_map.transpose((2, 0, 1)) data['tco_map'] = tco_map.transpose((2, 0, 1)) - return data \ No newline at end of file + return data diff --git a/ppocr/data/lmdb_dataset.py b/ppocr/data/lmdb_dataset.py index e7bb6dd3..bd0630f6 100644 --- a/ppocr/data/lmdb_dataset.py +++ b/ppocr/data/lmdb_dataset.py @@ -21,7 +21,7 @@ from .imaug import transform, create_operators class LMDBDateSet(Dataset): - def __init__(self, config, mode, logger): + def __init__(self, config, mode, logger, seed=None): super(LMDBDateSet, self).__init__() global_config = config['Global'] diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index ab17dd1a..d2a86b0f 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -20,7 +20,7 @@ from .imaug import transform, create_operators class SimpleDataSet(Dataset): - def __init__(self, config, mode, logger): + def __init__(self, config, mode, logger, seed=None): super(SimpleDataSet, self).__init__() self.logger = logger @@ -41,6 +41,7 @@ class SimpleDataSet(Dataset): self.data_dir = dataset_config['data_dir'] self.do_shuffle = loader_config['shuffle'] + self.seed = seed logger.info("Initialize indexs of datasets:%s" % label_file_list) self.data_lines = self.get_image_info_list(label_file_list, ratio_list) self.data_idx_order_list = list(range(len(self.data_lines))) @@ -55,6 +56,7 @@ class SimpleDataSet(Dataset): for idx, file in enumerate(file_list): with open(file, "rb") as f: lines = f.readlines() + random.seed(self.seed) lines = random.sample(lines, round(len(lines) * ratio_list[idx])) data_lines.extend(lines) @@ -62,6 +64,7 @@ class SimpleDataSet(Dataset): def shuffle_data_random(self): if self.do_shuffle: + random.seed(self.seed) random.shuffle(self.data_lines) return diff --git a/ppocr/modeling/transforms/tps.py b/ppocr/modeling/transforms/tps.py index 3de25193..78338edf 100644 --- a/ppocr/modeling/transforms/tps.py +++ b/ppocr/modeling/transforms/tps.py @@ -213,16 +213,14 @@ class GridGenerator(nn.Layer): def build_P_paddle(self, I_r_size): I_r_height, I_r_width = I_r_size - I_r_grid_x = paddle.divide( - paddle.arange( - -I_r_width, I_r_width, 2, dtype='float64') + 1.0, - paddle.to_tensor( - I_r_width, dtype='float64')) - I_r_grid_y = paddle.divide( - paddle.arange( - -I_r_height, I_r_height, 2, dtype='float64') + 1.0, - paddle.to_tensor( - I_r_height, dtype='float64')) # self.I_r_height + I_r_grid_x = (paddle.arange( + -I_r_width, I_r_width, 2, dtype='float64') + 1.0 + ) / paddle.to_tensor(np.array([I_r_width])) + + I_r_grid_y = (paddle.arange( + -I_r_height, I_r_height, 2, dtype='float64') + 1.0 + ) / paddle.to_tensor(np.array([I_r_height])) + # P: self.I_r_width x self.I_r_height x 2 P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 1c72863c..4d078994 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -109,7 +109,7 @@ class CTCLabelDecode(BaseRecLabelDecode): preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) if label is None: return text label = self.decode(label) diff --git a/tools/program.py b/tools/program.py index c2915426..cbca715a 100755 --- a/tools/program.py +++ b/tools/program.py @@ -182,8 +182,8 @@ def train(config, start_epoch = 1 for epoch in range(start_epoch, epoch_num + 1): - if epoch > 0: - train_dataloader = build_dataloader(config, 'Train', device, logger) + train_dataloader = build_dataloader( + config, 'Train', device, logger, seed=epoch) train_batch_cost = 0.0 train_reader_cost = 0.0 batch_sum = 0