Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into multi_languages

This commit is contained in:
tink2123 2021-01-26 15:06:40 +08:00
commit 5e9fb50db5
84 changed files with 334 additions and 207 deletions

View File

@ -24,6 +24,7 @@ import sys
from functools import partial from functools import partial
from collections import defaultdict from collections import defaultdict
import json import json
import cv2
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
@ -1242,10 +1243,13 @@ class MainWindow(QMainWindow, WindowMixin):
# if unicodeFilePath in self.mImgList: # if unicodeFilePath in self.mImgList:
if unicodeFilePath and os.path.exists(unicodeFilePath): if unicodeFilePath and os.path.exists(unicodeFilePath):
self.imageData = read(unicodeFilePath, None)
self.canvas.verified = False self.canvas.verified = False
image = QImage.fromData(self.imageData) cvimg = cv2.imdecode(np.fromfile(unicodeFilePath, dtype=np.uint8), 1)
height, width, depth = cvimg.shape
cvimg = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB)
image = QImage(cvimg.data, width, height, width * depth, QImage.Format_RGB888)
if image.isNull(): if image.isNull():
self.errorMessage(u'Error opening file', self.errorMessage(u'Error opening file',
u"<p>Make sure <i>%s</i> is a valid image file." % unicodeFilePath) u"<p>Make sure <i>%s</i> is a valid image file." % unicodeFilePath)

View File

@ -7,6 +7,8 @@ except ImportError:
from PyQt4.QtCore import * from PyQt4.QtCore import *
import json import json
import cv2
import numpy as np
from libs.utils import newIcon from libs.utils import newIcon
@ -34,11 +36,16 @@ class Worker(QThread):
if self.handle == 0: if self.handle == 0:
self.listValue.emit(Imgpath) self.listValue.emit(Imgpath)
if self.model == 'paddle': if self.model == 'paddle':
self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True) h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape
if h > 32 and w > 32:
self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True)
else:
print('The size of', Imgpath, 'is too small to be recognised')
self.result_dic = None
# 结果保存 # 结果保存
if self.result_dic is None or len(self.result_dic) == 0: if self.result_dic is None or len(self.result_dic) == 0:
print('Can not recognise file is : ', Imgpath) print('Can not recognise file', Imgpath)
pass pass
else: else:
strs = '' strs = ''

View File

@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本develop分支 - 静态图版本develop分支
**近期更新** **近期更新**
- 2021.1.18 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题总数152个,每周一都会更新,欢迎大家持续关注。 - 2021.1.25 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题总数157个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md)辅助开发者高效完成标注任务输出格式与PP-OCR训练任务完美衔接。 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md)辅助开发者高效完成标注任务输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章https://arxiv.org/abs/2009.09941 - 2020.9.22 更新PP-OCR技术文章https://arxiv.org/abs/2009.09941

View File

@ -1,11 +1,9 @@
# Version: 1.0.0 # Version: 2.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev FROM registry.baidubce.com/paddlepaddle/paddle:2.0.0rc1
# PaddleOCR base on Python3.7 # PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
RUN python3.7 -m pip install paddlepaddle==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
@ -15,15 +13,15 @@ WORKDIR /PaddleOCR
RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
RUN mkdir -p /PaddleOCR/inference/ RUN mkdir -p /PaddleOCR/inference/
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py # Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py # Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py # Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/

View File

@ -1,11 +1,9 @@
# Version: 1.0.0 # Version: 2.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev FROM egistry.baidubce.com/paddlepaddle/paddle:2.0.0rc1-gpu-cuda10.0-cudnn7
# PaddleOCR base on Python3.7 # PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
RUN python3.7 -m pip install paddlepaddle-gpu==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
@ -15,15 +13,15 @@ WORKDIR /PaddleOCR
RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
RUN mkdir -p /PaddleOCR/inference/ RUN mkdir -p /PaddleOCR/inference/
# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py # Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v2.0_det_infer to ch_ppocr_server_v2.0_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py # Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_cls_infer to ch_ppocr_mobile_v2.0_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py # Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v2.0_rec_infer to ch_ppocr_server_v2.0_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py
ADD {link} /PaddleOCR/inference/ ADD {link} /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/

View File

@ -9,42 +9,43 @@
## PaddleOCR常见问题汇总(持续更新) ## PaddleOCR常见问题汇总(持续更新)
* [近期更新2021.1.18](#近期更新) * [近期更新2021.1.25](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题) * [【精选】OCR精选10个问题](#OCR精选10个问题)
* [【理论篇】OCR通用32个问题](#OCR通用问题) * [【理论篇】OCR通用32个问题](#OCR通用问题)
* [基础知识7题](#基础知识) * [基础知识7题](#基础知识)
* [数据集7题](#数据集2) * [数据集7题](#数据集2)
* [模型训练调优18题](#模型训练调优2) * [模型训练调优18题](#模型训练调优2)
* [【实战篇】PaddleOCR实战110个问题](#PaddleOCR实战问题) * [【实战篇】PaddleOCR实战115个问题](#PaddleOCR实战问题)
* [使用咨询36题](#使用咨询) * [使用咨询38题](#使用咨询)
* [数据集17题](#数据集3) * [数据集17题](#数据集3)
* [模型训练调优28题](#模型训练调优3) * [模型训练调优28题](#模型训练调优3)
* [预测部署29题](#预测部署3) * [预测部署32题](#预测部署3)
<a name="近期更新"></a> <a name="近期更新"></a>
## 近期更新2021.1.18 ## 近期更新2021.1.25
#### Q3.1.37: 小语种模型只有识别模型,没有检测模型吗?
#### Q2.3.18: 在PP-OCR系统中文本检测的骨干网络为什么没有使用SE模块 **A**小语种包括纯英文数字的检测模型和中文的检测模型是共用的在训练中文检测模型时加入了多语言数据。https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/models_list_en.md#1-text-detection-model。
**A**SE模块是MobileNetV3网络一个重要模块目的是估计特征图每个特征通道重要性给特征图每个特征分配权重提高网络的表达能力。但是对于文本检测输入网络的分辨率比较大一般是640\*640利用SE模块估计特征图每个特征通道重要性比较困难网络提升能力有限但是该模块又比较耗时因此在PP-OCR系统中文本检测的骨干网络没有使用SE模块。实验也表明当去掉SE模块超轻量模型大小可以减小40%文本检测效果基本不受影响。详细可以参考PP-OCR技术文章https://arxiv.org/abs/2009.09941. #### Q3.1.38: module 'paddle.distributed' has no attribute get_rank
#### Q3.3.27: PaddleOCR关于文本识别模型的训练支持的数据增强方式有哪些 **A**Paddle版本问题请安装2.0版本Paddlepip install paddlepaddle==2.0.0rc1。
**A**文本识别支持的数据增强方式有随机小幅度裁剪、图像平衡、添加白噪声、颜色漂移、图像反色和Text Image AugmentationTIA变换等。可以参考[代码](../../ppocr/data/imaug/rec_img_aug.py)中的warp函数。 #### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署
#### Q3.3.28: 关于dygraph分支中文本识别模型训练要使用数据增强应该如何设置 **A**目前Paddle的预测库是支持华为鲲鹏920CPU的但是OCR还没在这些芯片上测试过可以自己调试有问题反馈给我们。
**A**:可以参考[配置文件](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)在Train['dataset']['transforms']添加RecAug字段使数据增强生效。可以通过添加对aug_prob设置表示每种数据增强采用的概率。aug_prob默认是0.4.由于tia数据增强特殊性默认不采用可以通过添加use_tia设置使tia数据增强生效。详细设置可以参考[ISSUE 1744](https://github.com/PaddlePaddle/PaddleOCR/issues/1744) #### Q3.4.31: 采用Paddle-Lite进行端侧部署出现问题环境没问题
#### Q3.4.28: PP-OCR系统中文本检测的结果有置信度吗 **A**如果你的预测库是自己编译的那么你的nb文件也要自己编译用同一个lite版本。不能直接用下载的nb文件因为版本不同。
**A**:文本检测的结果有置信度,由于推理过程中没有使用,所以没有显示的返回到最终结果中。如果需要文本检测结果的置信度,可以在[文本检测DB的后处理代码](../../ppocr/postprocess/db_postprocess.py)的155行添加scores信息。这样在[检测预测代码](../../tools/infer/predict_det.py)的197行就可以拿到文本检测的scores信息。 #### Q3.4.32: PaddleOCR的模型支持onnx转换吗
#### Q3.4.29: DB文本检测特征提取网络金字塔构建的部分代码在哪儿 **A**我们目前已经通过Paddle2ONNX来支持各模型套件的转换PaddleOCR基于PaddlePaddle 2.0的版本dygraph分支已经支持导出为ONNX欢迎关注Paddle2ONNX了解更多项目的进展
Paddle2ONNX项目https://github.com/PaddlePaddle/Paddle2ONNX
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码其中architectures是文本检测或者文本识别整体流程代码backbones是骨干网络相关代码necks是类似与FPN的颈函数代码heads是提取文本检测或者文本识别预测结果相关的头函数transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。 Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr)
<a name="OCR精选10个问题"></a> <a name="OCR精选10个问题"></a>
## 【精选】OCR精选10个问题 ## 【精选】OCR精选10个问题
@ -474,9 +475,18 @@ StyleText的用途主要是提取style_image中的字体、背景等style信
例如识别身份证照片,可以先匹配"姓名""性别"等关键字,根据这些关键字的坐标去推测其他信息的位置,再与识别的结果匹配。 例如识别身份证照片,可以先匹配"姓名""性别"等关键字,根据这些关键字的坐标去推测其他信息的位置,再与识别的结果匹配。
#### Q3.1.36 如何识别竹简上的古文? #### Q3.1.36 如何识别竹简上的古文?
**A**对于字符都是普通的汉字字符的情况只要标注足够的数据finetune模型就可以了。如果数据量不足您可以尝试StyleText工具。 **A**对于字符都是普通的汉字字符的情况只要标注足够的数据finetune模型就可以了。如果数据量不足您可以尝试StyleText工具。
而如果使用的字符是特殊的古文字、甲骨文、象形文字等,那么首先需要构建一个古文字的字典,之后再进行训练。 而如果使用的字符是特殊的古文字、甲骨文、象形文字等,那么首先需要构建一个古文字的字典,之后再进行训练。
#### Q3.1.37: 小语种模型只有识别模型,没有检测模型吗?
**A**小语种包括纯英文数字的检测模型和中文的检测模型是共用的在训练中文检测模型时加入了多语言数据。https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/models_list_en.md#1-text-detection-model。
#### Q3.1.38: module 'paddle.distributed' has no attribute get_rank
**A**Paddle版本问题请安装2.0版本Paddlepip install paddlepaddle==2.0.0rc1。
<a name="数据集3"></a> <a name="数据集3"></a>
### 数据集 ### 数据集
@ -854,3 +864,17 @@ img = cv.imdecode(img_array, -1)
#### Q3.4.29: DB文本检测特征提取网络金字塔构建的部分代码在哪儿 #### Q3.4.29: DB文本检测特征提取网络金字塔构建的部分代码在哪儿
**A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码其中architectures是文本检测或者文本识别整体流程代码backbones是骨干网络相关代码necks是类似与FPN的颈函数代码heads是提取文本检测或者文本识别预测结果相关的头函数transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。 **A**:特征提取网络金字塔构建的部分:[代码位置](../../ppocr/modeling/necks/db_fpn.py)。ppocr/modeling文件夹里面是组网相关的代码其中architectures是文本检测或者文本识别整体流程代码backbones是骨干网络相关代码necks是类似与FPN的颈函数代码heads是提取文本检测或者文本识别预测结果相关的头函数transforms是类似于TPS特征预处理模块。更多的信息可以参考[代码组织结构](./tree.md)。
#### Q3.4.30: PaddleOCR是否支持在华为鲲鹏920CPU上部署
**A**目前Paddle的预测库是支持华为鲲鹏920CPU的但是OCR还没在这些芯片上测试过可以自己调试有问题反馈给我们。
#### Q3.4.31: 采用Paddle-Lite进行端侧部署出现问题环境没问题。
**A**如果你的预测库是自己编译的那么你的nb文件也要自己编译用同一个lite版本。不能直接用下载的nb文件因为版本不同。
#### Q3.4.32: PaddleOCR的模型支持onnx转换吗
**A**我们目前已经通过Paddle2ONNX来支持各模型套件的转换PaddleOCR基于PaddlePaddle 2.0的版本dygraph分支已经支持导出为ONNX欢迎关注Paddle2ONNX了解更多项目的进展
Paddle2ONNX项目https://github.com/PaddlePaddle/Paddle2ONNX
Paddle2ONNX支持转换的[模型列表](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/docs/zh/model_zoo.md#%E5%9B%BE%E5%83%8Focr)

View File

@ -14,11 +14,10 @@ PaddleOCR开源的文本检测算法列表
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
在ICDAR2015文本检测公开数据集上算法效果如下 在ICDAR2015文本检测公开数据集上算法效果如下
|模型|骨干网络|precision|recall|Hmean|下载链接| |模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| |EAST|ResNet50_vd|85.80%|86.71%|86.25%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |EAST|MobileNetV3|79.42%|80.64%|80.03%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
@ -40,17 +39,19 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
PaddleOCR基于动态图开源的文本识别算法列表 PaddleOCR基于动态图开源的文本识别算法列表
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]ppocr推荐 - [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]ppocr推荐
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] - [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon - [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon - [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon - [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程使用MJSynth和SynthText两个文字识别数据集训练在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估算法效果如下 参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程使用MJSynth和SynthText两个文字识别数据集训练在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估算法效果如下
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接| |模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|-|-|-|-|-| |---|---|---|---|---|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)| |Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)| |Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| |CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| |CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。 PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。

View File

@ -352,10 +352,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
``` ```
# 使用方向分类器 # 使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
# 不使用方向分类器 # 不使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=false
``` ```
@ -364,7 +364,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
![](../imgs_results/2.jpg) ![](../imgs_results/system_res_00018069.jpg)
<a name="其他模型推理"></a> <a name="其他模型推理"></a>
### 2. 其他模型推理 ### 2. 其他模型推理
@ -381,4 +381,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
(coming soon) ![](../imgs_results/img_10_east_starnet.jpg)

View File

@ -1,4 +1,4 @@
## OCR模型列表V2.02020年12月12日更新) ## OCR模型列表V2.02021年1月20日更新)
**说明** 2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。 **说明** 2.0版模型和[1.1版模型](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/models_list.md)的主要区别在于动态图训练vs.静态图训练,模型性能上无明显差距。
- [一、文本检测模型](#文本检测模型) - [一、文本检测模型](#文本检测模型)
@ -22,7 +22,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon)| |ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |推理模型 (coming soon) / 训练模型 (coming soon)|
|ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
@ -35,7 +35,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |推理模型 (coming soon) / slim模型 (coming soon) | |ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
|ch_ppocr_mobile_v2.0_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_mobile_v2.0_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v2.0_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
@ -46,7 +46,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | 推理模型 (coming soon) / slim模型 (coming soon) | |en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) |
|en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | |en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="多语言识别模型"></a> <a name="多语言识别模型"></a>
@ -55,7 +55,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
**说明:** 新增的多语言模型的配置文件通过代码方式生成,您可以通过`--help`参数查看当前PaddleOCR支持生成哪些多语言的配置文件 **说明:** 新增的多语言模型的配置文件通过代码方式生成,您可以通过`--help`参数查看当前PaddleOCR支持生成哪些多语言的配置文件
```bash ```bash
# 该代码需要在指定目录运行 # 该代码需要在指定目录运行
cd PaddleOCR/configs/rec/multi_language/ cd {your/path/}PaddleOCR/configs/rec/multi_language/
python3 generate_multi_language_configs.py --help python3 generate_multi_language_configs.py --help
``` ```
下面以生成意大利语配置文件为例: 下面以生成意大利语配置文件为例:
@ -64,7 +64,7 @@ python3 generate_multi_language_configs.py --help
如果您仅仅想用配置文件测试PaddleOCR提供的多语言模型可以通过下面命令生成默认的配置文件使用PaddleOCR提供的小语种字典进行预测。 如果您仅仅想用配置文件测试PaddleOCR提供的多语言模型可以通过下面命令生成默认的配置文件使用PaddleOCR提供的小语种字典进行预测。
```bash ```bash
# 该代码需要在指定目录运行 # 该代码需要在指定目录运行
cd PaddleOCR/configs/rec/multi_language/ cd {your/path/}PaddleOCR/configs/rec/multi_language/
# 通过-l或者--language参数设置需要生成的语种的配置文件该命令会将默认参数写入配置文件 # 通过-l或者--language参数设置需要生成的语种的配置文件该命令会将默认参数写入配置文件
python3 generate_multi_language_configs.py -l it python3 generate_multi_language_configs.py -l it
``` ```
@ -77,6 +77,8 @@ python3 generate_multi_language_configs.py -l it
使用以下命令生成配置文件: 使用以下命令生成配置文件:
```bash ```bash
# 该代码需要在指定目录运行
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# -l或者--language字段是必须的 # -l或者--language字段是必须的
# --train修改训练集--val修改验证集--data_dir修改数据集目录-o修改对应默认参数 # --train修改训练集--val修改验证集--data_dir修改数据集目录-o修改对应默认参数
# --dict命令改变字典路径示例使用默认字典路径则该参数可不填 # --dict命令改变字典路径示例使用默认字典路径则该参数可不填
@ -112,7 +114,7 @@ python3 generate_multi_language_configs.py -l it \
| uk_mobile_v2.0_rec |乌克兰文识别|rec_uk_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) | | uk_mobile_v2.0_rec |乌克兰文识别|rec_uk_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
| be_mobile_v2.0_rec |白俄罗斯文识别|rec_be_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) | | be_mobile_v2.0_rec |白俄罗斯文识别|rec_be_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |泰卢固文识别|rec_te_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | | te_mobile_v2.0_rec |泰卢固文识别|rec_te_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |卡纳达文识别|[rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | | ka_mobile_v2.0_rec |卡纳达文识别|rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |泰米尔文识别|rec_ta_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | | ta_mobile_v2.0_rec |泰米尔文识别|rec_ta_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
@ -121,5 +123,5 @@ python3 generate_multi_language_configs.py -l it \
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |推理模型 (coming soon) / 训练模型 / slim模型 | |ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
|ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | |ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |

View File

@ -19,8 +19,8 @@ On the ICDAR2015 dataset, the text detection result is as follows:
|Model|Backbone|precision|recall|Hmean|Download link| |Model|Backbone|precision|recall|Hmean|Download link|
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
|EAST|ResNet50_vd|88.76%|81.36%|84.90%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| |EAST|ResNet50_vd|85.80%|86.71%|86.25%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |EAST|MobileNetV3|79.42%|80.64%|80.03%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
@ -41,17 +41,19 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
PaddleOCR open-source text recognition algorithms list: PaddleOCR open-source text recognition algorithms list:
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7] - [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] - [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon - [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon - [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon - [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow: Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
|Model|Backbone|Avg Accuracy|Module combination|Download link| |Model|Backbone|Avg Accuracy|Module combination|Download link|
|-|-|-|-|-| |---|---|---|---|---|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)| |Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar)|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)| |Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar)|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)| |CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)| |CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar)|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar)|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar)|
Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md) Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md)

View File

@ -366,15 +366,15 @@ When performing prediction, you need to specify the path of a single image or a
``` ```
# use direction classifier # use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls=true
# not use use direction classifier # not use use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/" python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/"
``` ```
After executing the command, the recognition result image is as follows: After executing the command, the recognition result image is as follows:
![](../imgs_results/2.jpg) ![](../imgs_results/system_res_00018069.jpg)
<a name="OTHER_MODELS"></a> <a name="OTHER_MODELS"></a>
### 2. OTHER MODELS ### 2. OTHER MODELS
@ -391,4 +391,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
After executing the command, the recognition result image is as follows: After executing the command, the recognition result image is as follows:
(coming soon) ![](../imgs_results/img_10_east_starnet.jpg)

View File

@ -33,7 +33,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|model name|description|config|model size|download| |model name|description|config|model size|download|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |inference model (coming soon) / slim model (coming soon) | |ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
|ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
@ -45,7 +45,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|model name|description|config|model size|download| |model name|description|config|model size|download|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |inference model (coming soon ) / slim model (coming soon) | |en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) |
|en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="Multilingual"></a> <a name="Multilingual"></a>
@ -54,6 +54,8 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
**Note** The configuration file of the new multi language model is generated by code. You can use the `--help` parameter to check which multi language are supported by current PaddleOCR. **Note** The configuration file of the new multi language model is generated by code. You can use the `--help` parameter to check which multi language are supported by current PaddleOCR.
```bash ```bash
# The code needs to run in the specified directory
cd {your/path/}PaddleOCR/configs/rec/multi_language/
python3 generate_multi_language_configs.py --help python3 generate_multi_language_configs.py --help
``` ```
@ -62,7 +64,7 @@ Take the Italian configuration file as an example
you can generate the default configuration file through the following command, and use the default language dictionary provided by paddleocr for prediction. you can generate the default configuration file through the following command, and use the default language dictionary provided by paddleocr for prediction.
```bash ```bash
# The code needs to run in the specified directory # The code needs to run in the specified directory
cd PaddleOCR/configs/rec/multi_language/ cd {your/path/}PaddleOCR/configs/rec/multi_language/
# Set the required language configuration file through -l or --language parameter # Set the required language configuration file through -l or --language parameter
# This command will write the default parameter to the configuration file. # This command will write the default parameter to the configuration file.
python3 generate_multi_language_configs.py -l it python3 generate_multi_language_configs.py -l it
@ -74,6 +76,8 @@ If you want to train your own model, you can prepare the training set file, veri
- Use the default dictionary provided by paddleocr:{your/path/}PaddleOCR/ppocr/utils/dict/it_dict.txt - Use the default dictionary provided by paddleocr:{your/path/}PaddleOCR/ppocr/utils/dict/it_dict.txt
- Training data path:{your/path/}PaddleOCR/train_data - Training data path:{your/path/}PaddleOCR/train_data
```bash ```bash
# The code needs to run in the specified directory
cd {your/path/}PaddleOCR/configs/rec/multi_language/
# The -l or --language parameter is required # The -l or --language parameter is required
# --train modify train_list path # --train modify train_list path
# --val modify eval_list path # --val modify eval_list path
@ -92,27 +96,27 @@ python3 generate_multi_language_configs.py -l it \
| german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | | german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) | | korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) | | it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
| xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) | | xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) |
| pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) | | pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) |
| ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) | | ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) |
| ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) | | ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) |
| hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) | | hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) |
| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | | chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
| ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) | | ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) |
| fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) | | fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) |
| ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) | | ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) |
| rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) | | rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) |
| oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) | | oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) |
| mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) | | mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) |
| ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) | | ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) |
| rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) | | rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) |
| bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) | | bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) |
| uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) | | uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
| be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) | | be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | | te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|[rec_ka_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | | ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained mode](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | | ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
<a name="Angle"></a> <a name="Angle"></a>
@ -120,6 +124,5 @@ python3 generate_multi_language_configs.py -l it \
|model name|description|config|model size|download| |model name|description|config|model size|download|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |inference model (coming soon) / trained model / slim model| |ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_slim_train.tar) |
|ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | |ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |

BIN
doc/fonts/arabic.ttf Normal file

Binary file not shown.

BIN
doc/fonts/chinese_cht.TTF Normal file

Binary file not shown.

BIN
doc/fonts/cyrillic.ttf Normal file

Binary file not shown.

BIN
doc/fonts/hindi.ttf Normal file

Binary file not shown.

BIN
doc/fonts/kannada.ttf Normal file

Binary file not shown.

BIN
doc/fonts/latin.ttf Normal file

Binary file not shown.

BIN
doc/fonts/marathi.ttf Normal file

Binary file not shown.

BIN
doc/fonts/nepali.ttf Normal file

Binary file not shown.

BIN
doc/fonts/persian.ttf Normal file

Binary file not shown.

BIN
doc/fonts/spanish.ttf Normal file

Binary file not shown.

BIN
doc/fonts/tamil.ttf Normal file

Binary file not shown.

BIN
doc/fonts/telugu.ttf Normal file

Binary file not shown.

BIN
doc/fonts/urdu.ttf Normal file

Binary file not shown.

BIN
doc/fonts/uyghur.ttf Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 352 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

View File

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 6.4 KiB

After

Width:  |  Height:  |  Size: 6.4 KiB

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View File

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 65 KiB

View File

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 73 KiB

View File

Before

Width:  |  Height:  |  Size: 5.7 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

View File

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View File

Before

Width:  |  Height:  |  Size: 9.4 KiB

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

Before

Width:  |  Height:  |  Size: 6.7 KiB

After

Width:  |  Height:  |  Size: 6.7 KiB

View File

Before

Width:  |  Height:  |  Size: 7.8 KiB

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

Before

Width:  |  Height:  |  Size: 4.4 KiB

After

Width:  |  Height:  |  Size: 4.4 KiB

View File

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

Before

Width:  |  Height:  |  Size: 2.7 KiB

After

Width:  |  Height:  |  Size: 2.7 KiB

View File

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.9 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

Before

Width:  |  Height:  |  Size: 5.3 KiB

After

Width:  |  Height:  |  Size: 5.3 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View File

Before

Width:  |  Height:  |  Size: 6.0 KiB

After

Width:  |  Height:  |  Size: 6.0 KiB

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

Before

Width:  |  Height:  |  Size: 6.6 KiB

After

Width:  |  Height:  |  Size: 6.6 KiB

View File

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

Before

Width:  |  Height:  |  Size: 4.4 KiB

After

Width:  |  Height:  |  Size: 4.4 KiB

View File

Before

Width:  |  Height:  |  Size: 8.5 KiB

After

Width:  |  Height:  |  Size: 8.5 KiB

View File

Before

Width:  |  Height:  |  Size: 7.0 KiB

After

Width:  |  Height:  |  Size: 7.0 KiB

View File

Before

Width:  |  Height:  |  Size: 6.1 KiB

After

Width:  |  Height:  |  Size: 6.1 KiB

View File

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 5.2 KiB

View File

Before

Width:  |  Height:  |  Size: 8.2 KiB

After

Width:  |  Height:  |  Size: 8.2 KiB

View File

Before

Width:  |  Height:  |  Size: 6.0 KiB

After

Width:  |  Height:  |  Size: 6.0 KiB

View File

Before

Width:  |  Height:  |  Size: 4.4 KiB

After

Width:  |  Height:  |  Size: 4.4 KiB

View File

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

Before

Width:  |  Height:  |  Size: 5.0 KiB

After

Width:  |  Height:  |  Size: 5.0 KiB

View File

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

View File

Before

Width:  |  Height:  |  Size: 4.8 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 109 KiB

After

Width:  |  Height:  |  Size: 107 KiB

View File

@ -290,7 +290,9 @@ class PaddleOCR(predict_system.TextSystem):
image_file = img image_file = img
img, flag = check_and_read_gif(image_file) img, flag = check_and_read_gif(image_file)
if not flag: if not flag:
img = cv2.imread(image_file) with open(image_file, 'rb') as f:
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
if img is None: if img is None:
logger.error("error in loading image:{}".format(image_file)) logger.error("error in loading image:{}".format(image_file))
return None return None

View File

@ -51,7 +51,7 @@ signal.signal(signal.SIGINT, term_mp)
signal.signal(signal.SIGTERM, term_mp) signal.signal(signal.SIGTERM, term_mp)
def build_dataloader(config, mode, device, logger): def build_dataloader(config, mode, device, logger, seed=None):
config = copy.deepcopy(config) config = copy.deepcopy(config)
support_dict = ['SimpleDataSet', 'LMDBDateSet'] support_dict = ['SimpleDataSet', 'LMDBDateSet']
@ -61,7 +61,7 @@ def build_dataloader(config, mode, device, logger):
assert mode in ['Train', 'Eval', 'Test' assert mode in ['Train', 'Eval', 'Test'
], "Mode should be Train, Eval or Test." ], "Mode should be Train, Eval or Test."
dataset = eval(module_name)(config, mode, logger) dataset = eval(module_name)(config, mode, logger, seed)
loader_config = config[mode]['loader'] loader_config = config[mode]['loader']
batch_size = loader_config['batch_size_per_card'] batch_size = loader_config['batch_size_per_card']
drop_last = loader_config['drop_last'] drop_last = loader_config['drop_last']

View File

@ -24,11 +24,11 @@ __all__ = ['SASTProcessTrain']
class SASTProcessTrain(object): class SASTProcessTrain(object):
def __init__(self, def __init__(self,
image_shape = [512, 512], image_shape=[512, 512],
min_crop_size = 24, min_crop_size=24,
min_crop_side_ratio = 0.3, min_crop_side_ratio=0.3,
min_text_size = 10, min_text_size=10,
max_text_size = 512, max_text_size=512,
**kwargs): **kwargs):
self.input_size = image_shape[1] self.input_size = image_shape[1]
self.min_crop_size = min_crop_size self.min_crop_size = min_crop_size
@ -42,12 +42,10 @@ class SASTProcessTrain(object):
:param poly: :param poly:
:return: :return:
""" """
edge = [ edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
(poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
(poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
]
return np.sum(edge) / 2. return np.sum(edge) / 2.
def gen_quad_from_poly(self, poly): def gen_quad_from_poly(self, poly):
@ -57,7 +55,8 @@ class SASTProcessTrain(object):
point_num = poly.shape[0] point_num = poly.shape[0]
min_area_quad = np.zeros((4, 2), dtype=np.float32) min_area_quad = np.zeros((4, 2), dtype=np.float32)
if True: if True:
rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation) rect = cv2.minAreaRect(poly.astype(
np.int32)) # (center (x,y), (width, height), angle of rotation)
center_point = rect[0] center_point = rect[0]
box = np.array(cv2.boxPoints(rect)) box = np.array(cv2.boxPoints(rect))
@ -102,23 +101,33 @@ class SASTProcessTrain(object):
if p_area > 0: if p_area > 0:
if tag == False: if tag == False:
print('poly in wrong direction') print('poly in wrong direction')
tag = True # reversed cases should be ignore tag = True # reversed cases should be ignore
poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1), :] poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
1), :]
quad = quad[(0, 3, 2, 1), :] quad = quad[(0, 3, 2, 1), :]
len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - quad[2]) len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] -
len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2]) quad[2])
len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] -
quad[2])
hv_tag = 1 hv_tag = 1
if len_w * 2.0 < len_h: if len_w * 2.0 < len_h:
hv_tag = 0 hv_tag = 0
validated_polys.append(poly) validated_polys.append(poly)
validated_tags.append(tag) validated_tags.append(tag)
hv_tags.append(hv_tag) hv_tags.append(hv_tag)
return np.array(validated_polys), np.array(validated_tags), np.array(hv_tags) return np.array(validated_polys), np.array(validated_tags), np.array(
hv_tags)
def crop_area(self, im, polys, tags, hv_tags, crop_background=False, max_tries=25): def crop_area(self,
im,
polys,
tags,
hv_tags,
crop_background=False,
max_tries=25):
""" """
make random crop from the input image make random crop from the input image
:param im: :param im:
@ -137,10 +146,10 @@ class SASTProcessTrain(object):
poly = np.round(poly, decimals=0).astype(np.int32) poly = np.round(poly, decimals=0).astype(np.int32)
minx = np.min(poly[:, 0]) minx = np.min(poly[:, 0])
maxx = np.max(poly[:, 0]) maxx = np.max(poly[:, 0])
w_array[minx + pad_w: maxx + pad_w] = 1 w_array[minx + pad_w:maxx + pad_w] = 1
miny = np.min(poly[:, 1]) miny = np.min(poly[:, 1])
maxy = np.max(poly[:, 1]) maxy = np.max(poly[:, 1])
h_array[miny + pad_h: maxy + pad_h] = 1 h_array[miny + pad_h:maxy + pad_h] = 1
# ensure the cropped area not across a text # ensure the cropped area not across a text
h_axis = np.where(h_array == 0)[0] h_axis = np.where(h_array == 0)[0]
w_axis = np.where(w_array == 0)[0] w_axis = np.where(w_array == 0)[0]
@ -166,17 +175,18 @@ class SASTProcessTrain(object):
if polys.shape[0] != 0: if polys.shape[0] != 0:
poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
& (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0] selected_polys = np.where(
np.sum(poly_axis_in_area, axis=1) == 4)[0]
else: else:
selected_polys = [] selected_polys = []
if len(selected_polys) == 0: if len(selected_polys) == 0:
# no text in this area # no text in this area
if crop_background: if crop_background:
return im[ymin : ymax + 1, xmin : xmax + 1, :], \ return im[ymin : ymax + 1, xmin : xmax + 1, :], \
polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts polys[selected_polys], tags[selected_polys], hv_tags[selected_polys]
else: else:
continue continue
im = im[ymin: ymax + 1, xmin: xmax + 1, :] im = im[ymin:ymax + 1, xmin:xmax + 1, :]
polys = polys[selected_polys] polys = polys[selected_polys]
tags = tags[selected_polys] tags = tags[selected_polys]
hv_tags = hv_tags[selected_polys] hv_tags = hv_tags[selected_polys]
@ -192,18 +202,28 @@ class SASTProcessTrain(object):
width_list = [] width_list = []
height_list = [] height_list = []
for quad in poly_quads: for quad in poly_quads:
quad_w = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0 quad_w = (np.linalg.norm(quad[0] - quad[1]) +
quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0 np.linalg.norm(quad[2] - quad[3])) / 2.0
quad_h = (np.linalg.norm(quad[0] - quad[3]) +
np.linalg.norm(quad[2] - quad[1])) / 2.0
width_list.append(quad_w) width_list.append(quad_w)
height_list.append(quad_h) height_list.append(quad_h)
norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0) norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0)
average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0) average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0)
for quad in poly_quads: for quad in poly_quads:
direct_vector_full = ((quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 direct_vector_full = (
direct_vector = direct_vector_full / (np.linalg.norm(direct_vector_full) + 1e-6) * norm_width (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
direction_label = tuple(map(float, [direct_vector[0], direct_vector[1], 1.0 / (average_height + 1e-6)])) direct_vector = direct_vector_full / (
cv2.fillPoly(direction_map, quad.round().astype(np.int32)[np.newaxis, :, :], direction_label) np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
direction_label = tuple(
map(float, [
direct_vector[0], direct_vector[1], 1.0 / (average_height +
1e-6)
]))
cv2.fillPoly(direction_map,
quad.round().astype(np.int32)[np.newaxis, :, :],
direction_label)
return direction_map return direction_map
def calculate_average_height(self, poly_quads): def calculate_average_height(self, poly_quads):
@ -211,13 +231,19 @@ class SASTProcessTrain(object):
""" """
height_list = [] height_list = []
for quad in poly_quads: for quad in poly_quads:
quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0 quad_h = (np.linalg.norm(quad[0] - quad[3]) +
np.linalg.norm(quad[2] - quad[1])) / 2.0
height_list.append(quad_h) height_list.append(quad_h)
average_height = max(sum(height_list) / len(height_list), 1.0) average_height = max(sum(height_list) / len(height_list), 1.0)
return average_height return average_height
def generate_tcl_label(self, hw, polys, tags, ds_ratio, def generate_tcl_label(self,
tcl_ratio=0.3, shrink_ratio_of_width=0.15): hw,
polys,
tags,
ds_ratio,
tcl_ratio=0.3,
shrink_ratio_of_width=0.15):
""" """
Generate polygon. Generate polygon.
""" """
@ -225,21 +251,30 @@ class SASTProcessTrain(object):
h, w = int(h * ds_ratio), int(w * ds_ratio) h, w = int(h * ds_ratio), int(w * ds_ratio)
polys = polys * ds_ratio polys = polys * ds_ratio
score_map = np.zeros((h, w,), dtype=np.float32) score_map = np.zeros(
(
h,
w, ), dtype=np.float32)
tbo_map = np.zeros((h, w, 5), dtype=np.float32) tbo_map = np.zeros((h, w, 5), dtype=np.float32)
training_mask = np.ones((h, w,), dtype=np.float32) training_mask = np.ones(
direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape([1, 1, 3]).astype(np.float32) (
h,
w, ), dtype=np.float32)
direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape(
[1, 1, 3]).astype(np.float32)
for poly_idx, poly_tag in enumerate(zip(polys, tags)): for poly_idx, poly_tag in enumerate(zip(polys, tags)):
poly = poly_tag[0] poly = poly_tag[0]
tag = poly_tag[1] tag = poly_tag[1]
# generate min_area_quad # generate min_area_quad
min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + min_area_quad_h = 0.5 * (
np.linalg.norm(min_area_quad[1] - min_area_quad[2])) np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
np.linalg.norm(min_area_quad[2] - min_area_quad[3])) min_area_quad_w = 0.5 * (
np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
@ -247,25 +282,37 @@ class SASTProcessTrain(object):
if tag: if tag:
# continue # continue
cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0.15) cv2.fillPoly(training_mask,
poly.astype(np.int32)[np.newaxis, :, :], 0.15)
else: else:
tcl_poly = self.poly2tcl(poly, tcl_ratio) tcl_poly = self.poly2tcl(poly, tcl_ratio)
tcl_quads = self.poly2quads(tcl_poly) tcl_quads = self.poly2quads(tcl_poly)
poly_quads = self.poly2quads(poly) poly_quads = self.poly2quads(poly)
# stcl map # stcl map
stcl_quads, quad_index = self.shrink_poly_along_width(tcl_quads, shrink_ratio_of_width=shrink_ratio_of_width, stcl_quads, quad_index = self.shrink_poly_along_width(
expand_height_ratio=1.0 / tcl_ratio) tcl_quads,
shrink_ratio_of_width=shrink_ratio_of_width,
expand_height_ratio=1.0 / tcl_ratio)
# generate tcl map # generate tcl map
cv2.fillPoly(score_map, np.round(stcl_quads).astype(np.int32), 1.0) cv2.fillPoly(score_map,
np.round(stcl_quads).astype(np.int32), 1.0)
# generate tbo map # generate tbo map
for idx, quad in enumerate(stcl_quads): for idx, quad in enumerate(stcl_quads):
quad_mask = np.zeros((h, w), dtype=np.float32) quad_mask = np.zeros((h, w), dtype=np.float32)
quad_mask = cv2.fillPoly(quad_mask, np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) quad_mask = cv2.fillPoly(
tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], quad_mask, tbo_map) quad_mask,
np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]],
quad_mask, tbo_map)
return score_map, tbo_map, training_mask return score_map, tbo_map, training_mask
def generate_tvo_and_tco(self, hw, polys, tags, tcl_ratio=0.3, ds_ratio=0.25): def generate_tvo_and_tco(self,
hw,
polys,
tags,
tcl_ratio=0.3,
ds_ratio=0.25):
""" """
Generate tcl map, tvo map and tbo map. Generate tcl map, tvo map and tbo map.
""" """
@ -297,35 +344,44 @@ class SASTProcessTrain(object):
# generate min_area_quad # generate min_area_quad
min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + min_area_quad_h = 0.5 * (
np.linalg.norm(min_area_quad[1] - min_area_quad[2])) np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
np.linalg.norm(min_area_quad[2] - min_area_quad[3])) min_area_quad_w = 0.5 * (
np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
# generate tcl map and text, 128 * 128 # generate tcl map and text, 128 * 128
tcl_poly = self.poly2tcl(poly, tcl_ratio) tcl_poly = self.poly2tcl(poly, tcl_ratio)
# generate poly_tv_xy_map # generate poly_tv_xy_map
for idx in range(4): for idx in range(4):
cv2.fillPoly(poly_tv_xy_map[2 * idx], cv2.fillPoly(
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), poly_tv_xy_map[2 * idx],
float(min(max(min_area_quad[idx, 0], 0), w))) np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
cv2.fillPoly(poly_tv_xy_map[2 * idx + 1], float(min(max(min_area_quad[idx, 0], 0), w)))
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), cv2.fillPoly(
float(min(max(min_area_quad[idx, 1], 0), h))) poly_tv_xy_map[2 * idx + 1],
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
float(min(max(min_area_quad[idx, 1], 0), h)))
# generate poly_tc_xy_map # generate poly_tc_xy_map
for idx in range(2): for idx in range(2):
cv2.fillPoly(poly_tc_xy_map[idx], cv2.fillPoly(
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), float(center_point[idx])) poly_tc_xy_map[idx],
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
float(center_point[idx]))
# generate poly_short_edge_map # generate poly_short_edge_map
cv2.fillPoly(poly_short_edge_map, cv2.fillPoly(
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), poly_short_edge_map,
float(max(min(min_area_quad_h, min_area_quad_w), 1.0))) np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
float(max(min(min_area_quad_h, min_area_quad_w), 1.0)))
# generate poly_mask and training_mask # generate poly_mask and training_mask
cv2.fillPoly(poly_mask, np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), 1) cv2.fillPoly(poly_mask,
np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
1)
tvo_map *= poly_mask tvo_map *= poly_mask
tvo_map[:8] -= poly_tv_xy_map tvo_map[:8] -= poly_tv_xy_map
@ -356,7 +412,8 @@ class SASTProcessTrain(object):
elif point_num > 4: elif point_num > 4:
vector_1 = poly[0] - poly[1] vector_1 = poly[0] - poly[1]
vector_2 = poly[1] - poly[2] vector_2 = poly[1] - poly[2]
cos_theta = np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) cos_theta = np.dot(vector_1, vector_2) / (
np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
theta = np.arccos(np.round(cos_theta, decimals=4)) theta = np.arccos(np.round(cos_theta, decimals=4))
if abs(theta) > (70 / 180 * math.pi): if abs(theta) > (70 / 180 * math.pi):
@ -374,7 +431,8 @@ class SASTProcessTrain(object):
min_area_quad = poly min_area_quad = poly
center_point = np.sum(poly, axis=0) / 4 center_point = np.sum(poly, axis=0) / 4
else: else:
rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation) rect = cv2.minAreaRect(poly.astype(
np.int32)) # (center (x,y), (width, height), angle of rotation)
center_point = rect[0] center_point = rect[0]
box = np.array(cv2.boxPoints(rect)) box = np.array(cv2.boxPoints(rect))
@ -394,16 +452,23 @@ class SASTProcessTrain(object):
return min_area_quad, center_point return min_area_quad, center_point
def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.): def shrink_quad_along_width(self,
quad,
begin_width_ratio=0.,
end_width_ratio=1.):
""" """
Generate shrink_quad_along_width. Generate shrink_quad_along_width.
""" """
ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32) ratio_pair = np.array(
[[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
def shrink_poly_along_width(self, quads, shrink_ratio_of_width, expand_height_ratio=1.0): def shrink_poly_along_width(self,
quads,
shrink_ratio_of_width,
expand_height_ratio=1.0):
""" """
shrink poly with given length. shrink poly with given length.
""" """
@ -421,22 +486,28 @@ class SASTProcessTrain(object):
upper_edge_list.append(upper_edge_len) upper_edge_list.append(upper_edge_len)
# length of left edge and right edge. # length of left edge and right edge.
left_length = np.linalg.norm(quads[0][0] - quads[0][3]) * expand_height_ratio left_length = np.linalg.norm(quads[0][0] - quads[0][
right_length = np.linalg.norm(quads[-1][1] - quads[-1][2]) * expand_height_ratio 3]) * expand_height_ratio
right_length = np.linalg.norm(quads[-1][1] - quads[-1][
2]) * expand_height_ratio
shrink_length = min(left_length, right_length, sum(upper_edge_list)) * shrink_ratio_of_width shrink_length = min(left_length, right_length,
sum(upper_edge_list)) * shrink_ratio_of_width
# shrinking length # shrinking length
upper_len_left = shrink_length upper_len_left = shrink_length
upper_len_right = sum(upper_edge_list) - shrink_length upper_len_right = sum(upper_edge_list) - shrink_length
left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
left_quad = self.shrink_quad_along_width(quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) left_quad = self.shrink_quad_along_width(
quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
right_quad = self.shrink_quad_along_width(quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) right_quad = self.shrink_quad_along_width(
quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
out_quad_list = [] out_quad_list = []
if left_idx == right_idx: if left_idx == right_idx:
out_quad_list.append([left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) out_quad_list.append(
[left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
else: else:
out_quad_list.append(left_quad) out_quad_list.append(left_quad)
for idx in range(left_idx + 1, right_idx): for idx in range(left_idx + 1, right_idx):
@ -500,7 +571,8 @@ class SASTProcessTrain(object):
""" """
Generate center line by poly clock-wise point. (4, 2) Generate center line by poly clock-wise point. (4, 2)
""" """
ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) ratio_pair = np.array(
[[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
@ -509,12 +581,14 @@ class SASTProcessTrain(object):
""" """
Generate center line by poly clock-wise point. Generate center line by poly clock-wise point.
""" """
ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) ratio_pair = np.array(
[[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
tcl_poly = np.zeros_like(poly) tcl_poly = np.zeros_like(poly)
point_num = poly.shape[0] point_num = poly.shape[0]
for idx in range(point_num // 2): for idx in range(point_num // 2):
point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]) * ratio_pair point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]
) * ratio_pair
tcl_poly[idx] = point_pair[0] tcl_poly[idx] = point_pair[0]
tcl_poly[point_num - 1 - idx] = point_pair[1] tcl_poly[point_num - 1 - idx] = point_pair[1]
return tcl_poly return tcl_poly
@ -527,8 +601,10 @@ class SASTProcessTrain(object):
up_line = self.line_cross_two_point(quad[0], quad[1]) up_line = self.line_cross_two_point(quad[0], quad[1])
lower_line = self.line_cross_two_point(quad[3], quad[2]) lower_line = self.line_cross_two_point(quad[3], quad[2])
quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) +
quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) np.linalg.norm(quad[1] - quad[2]))
quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) +
np.linalg.norm(quad[2] - quad[3]))
# average angle of left and right line. # average angle of left and right line.
angle = self.average_angle(quad) angle = self.average_angle(quad)
@ -565,7 +641,8 @@ class SASTProcessTrain(object):
quad_num = point_num // 2 - 1 quad_num = point_num // 2 - 1
for idx in range(quad_num): for idx in range(quad_num):
# reshape and adjust to clock-wise # reshape and adjust to clock-wise
quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]).reshape(4, 2)[[0, 2, 3, 1]]) quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]
).reshape(4, 2)[[0, 2, 3, 1]])
return np.array(quad_list) return np.array(quad_list)
@ -579,7 +656,8 @@ class SASTProcessTrain(object):
return None return None
h, w, _ = im.shape h, w, _ = im.shape
text_polys, text_tags, hv_tags = self.check_and_validate_polys(text_polys, text_tags, (h, w)) text_polys, text_tags, hv_tags = self.check_and_validate_polys(
text_polys, text_tags, (h, w))
if text_polys.shape[0] == 0: if text_polys.shape[0] == 0:
return None return None
@ -591,7 +669,7 @@ class SASTProcessTrain(object):
if np.random.rand() < 0.5: if np.random.rand() < 0.5:
asp_scale = 1.0 / asp_scale asp_scale = 1.0 / asp_scale
asp_scale = math.sqrt(asp_scale) asp_scale = math.sqrt(asp_scale)
asp_wx = asp_scale asp_wx = asp_scale
asp_hy = 1.0 / asp_scale asp_hy = 1.0 / asp_scale
im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
@ -610,7 +688,7 @@ class SASTProcessTrain(object):
#no background #no background
im, text_polys, text_tags, hv_tags = self.crop_area(im, \ im, text_polys, text_tags, hv_tags = self.crop_area(im, \
text_polys, text_tags, hv_tags, crop_background=False) text_polys, text_tags, hv_tags, crop_background=False)
if text_polys.shape[0] == 0: if text_polys.shape[0] == 0:
return None return None
#continue for all ignore case #continue for all ignore case
@ -621,17 +699,18 @@ class SASTProcessTrain(object):
return None return None
#resize image #resize image
std_ratio = float(self.input_size) / max(new_w, new_h) std_ratio = float(self.input_size) / max(new_w, new_h)
rand_scales = np.array([0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) rand_scales = np.array(
[0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
rz_scale = std_ratio * np.random.choice(rand_scales) rz_scale = std_ratio * np.random.choice(rand_scales)
im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
text_polys[:, :, 0] *= rz_scale text_polys[:, :, 0] *= rz_scale
text_polys[:, :, 1] *= rz_scale text_polys[:, :, 1] *= rz_scale
#add gaussian blur #add gaussian blur
if np.random.rand() < 0.1 * 0.5: if np.random.rand() < 0.1 * 0.5:
ks = np.random.permutation(5)[0] + 1 ks = np.random.permutation(5)[0] + 1
ks = int(ks/2)*2 + 1 ks = int(ks / 2) * 2 + 1
im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
#add brighter #add brighter
if np.random.rand() < 0.1 * 0.5: if np.random.rand() < 0.1 * 0.5:
im = im * (1.0 + np.random.rand() * 0.5) im = im * (1.0 + np.random.rand() * 0.5)
@ -640,13 +719,14 @@ class SASTProcessTrain(object):
if np.random.rand() < 0.1 * 0.5: if np.random.rand() < 0.1 * 0.5:
im = im * (1.0 - np.random.rand() * 0.5) im = im * (1.0 - np.random.rand() * 0.5)
im = np.clip(im, 0.0, 255.0) im = np.clip(im, 0.0, 255.0)
# Padding the im to [input_size, input_size] # Padding the im to [input_size, input_size]
new_h, new_w, _ = im.shape new_h, new_w, _ = im.shape
if min(new_w, new_h) < self.input_size * 0.5: if min(new_w, new_h) < self.input_size * 0.5:
return None return None
im_padded = np.ones((self.input_size, self.input_size, 3), dtype=np.float32) im_padded = np.ones(
(self.input_size, self.input_size, 3), dtype=np.float32)
im_padded[:, :, 2] = 0.485 * 255 im_padded[:, :, 2] = 0.485 * 255
im_padded[:, :, 1] = 0.456 * 255 im_padded[:, :, 1] = 0.456 * 255
im_padded[:, :, 0] = 0.406 * 255 im_padded[:, :, 0] = 0.406 * 255
@ -661,24 +741,29 @@ class SASTProcessTrain(object):
sw = int(np.random.rand() * del_w) sw = int(np.random.rand() * del_w)
# Padding # Padding
im_padded[sh: sh + new_h, sw: sw + new_w, :] = im.copy() im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy()
text_polys[:, :, 0] += sw text_polys[:, :, 0] += sw
text_polys[:, :, 1] += sh text_polys[:, :, 1] += sh
score_map, border_map, training_mask = self.generate_tcl_label((self.input_size, self.input_size), score_map, border_map, training_mask = self.generate_tcl_label(
text_polys, text_tags, 0.25) (self.input_size, self.input_size), text_polys, text_tags, 0.25)
# SAST head # SAST head
tvo_map, tco_map = self.generate_tvo_and_tco((self.input_size, self.input_size), text_polys, text_tags, tcl_ratio=0.3, ds_ratio=0.25) tvo_map, tco_map = self.generate_tvo_and_tco(
(self.input_size, self.input_size),
text_polys,
text_tags,
tcl_ratio=0.3,
ds_ratio=0.25)
# print("test--------tvo_map shape:", tvo_map.shape) # print("test--------tvo_map shape:", tvo_map.shape)
im_padded[:, :, 2] -= 0.485 * 255 im_padded[:, :, 2] -= 0.485 * 255
im_padded[:, :, 1] -= 0.456 * 255 im_padded[:, :, 1] -= 0.456 * 255
im_padded[:, :, 0] -= 0.406 * 255 im_padded[:, :, 0] -= 0.406 * 255
im_padded[:, :, 2] /= (255.0 * 0.229) im_padded[:, :, 2] /= (255.0 * 0.229)
im_padded[:, :, 1] /= (255.0 * 0.224) im_padded[:, :, 1] /= (255.0 * 0.224)
im_padded[:, :, 0] /= (255.0 * 0.225) im_padded[:, :, 0] /= (255.0 * 0.225)
im_padded = im_padded.transpose((2, 0, 1)) im_padded = im_padded.transpose((2, 0, 1))
data['image'] = im_padded[::-1, :, :] data['image'] = im_padded[::-1, :, :]
data['score_map'] = score_map[np.newaxis, :, :] data['score_map'] = score_map[np.newaxis, :, :]
@ -686,4 +771,4 @@ class SASTProcessTrain(object):
data['training_mask'] = training_mask[np.newaxis, :, :] data['training_mask'] = training_mask[np.newaxis, :, :]
data['tvo_map'] = tvo_map.transpose((2, 0, 1)) data['tvo_map'] = tvo_map.transpose((2, 0, 1))
data['tco_map'] = tco_map.transpose((2, 0, 1)) data['tco_map'] = tco_map.transpose((2, 0, 1))
return data return data

View File

@ -21,7 +21,7 @@ from .imaug import transform, create_operators
class LMDBDateSet(Dataset): class LMDBDateSet(Dataset):
def __init__(self, config, mode, logger): def __init__(self, config, mode, logger, seed=None):
super(LMDBDateSet, self).__init__() super(LMDBDateSet, self).__init__()
global_config = config['Global'] global_config = config['Global']

View File

@ -20,7 +20,7 @@ from .imaug import transform, create_operators
class SimpleDataSet(Dataset): class SimpleDataSet(Dataset):
def __init__(self, config, mode, logger): def __init__(self, config, mode, logger, seed=None):
super(SimpleDataSet, self).__init__() super(SimpleDataSet, self).__init__()
self.logger = logger self.logger = logger
@ -41,6 +41,7 @@ class SimpleDataSet(Dataset):
self.data_dir = dataset_config['data_dir'] self.data_dir = dataset_config['data_dir']
self.do_shuffle = loader_config['shuffle'] self.do_shuffle = loader_config['shuffle']
self.seed = seed
logger.info("Initialize indexs of datasets:%s" % label_file_list) logger.info("Initialize indexs of datasets:%s" % label_file_list)
self.data_lines = self.get_image_info_list(label_file_list, ratio_list) self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
self.data_idx_order_list = list(range(len(self.data_lines))) self.data_idx_order_list = list(range(len(self.data_lines)))
@ -55,6 +56,7 @@ class SimpleDataSet(Dataset):
for idx, file in enumerate(file_list): for idx, file in enumerate(file_list):
with open(file, "rb") as f: with open(file, "rb") as f:
lines = f.readlines() lines = f.readlines()
random.seed(self.seed)
lines = random.sample(lines, lines = random.sample(lines,
round(len(lines) * ratio_list[idx])) round(len(lines) * ratio_list[idx]))
data_lines.extend(lines) data_lines.extend(lines)
@ -62,6 +64,7 @@ class SimpleDataSet(Dataset):
def shuffle_data_random(self): def shuffle_data_random(self):
if self.do_shuffle: if self.do_shuffle:
random.seed(self.seed)
random.shuffle(self.data_lines) random.shuffle(self.data_lines)
return return

View File

@ -213,16 +213,14 @@ class GridGenerator(nn.Layer):
def build_P_paddle(self, I_r_size): def build_P_paddle(self, I_r_size):
I_r_height, I_r_width = I_r_size I_r_height, I_r_width = I_r_size
I_r_grid_x = paddle.divide( I_r_grid_x = (paddle.arange(
paddle.arange( -I_r_width, I_r_width, 2, dtype='float64') + 1.0
-I_r_width, I_r_width, 2, dtype='float64') + 1.0, ) / paddle.to_tensor(np.array([I_r_width]))
paddle.to_tensor(
I_r_width, dtype='float64')) I_r_grid_y = (paddle.arange(
I_r_grid_y = paddle.divide( -I_r_height, I_r_height, 2, dtype='float64') + 1.0
paddle.arange( ) / paddle.to_tensor(np.array([I_r_height]))
-I_r_height, I_r_height, 2, dtype='float64') + 1.0,
paddle.to_tensor(
I_r_height, dtype='float64')) # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2 # P: self.I_r_width x self.I_r_height x 2
P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
P = paddle.transpose(P, perm=[1, 0, 2]) P = paddle.transpose(P, perm=[1, 0, 2])

View File

@ -109,7 +109,7 @@ class CTCLabelDecode(BaseRecLabelDecode):
preds_idx = preds.argmax(axis=2) preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2) preds_prob = preds.max(axis=2)
text = self.decode(preds_idx, preds_prob) text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
if label is None: if label is None:
return text return text
label = self.decode(label) label = self.decode(label)

View File

@ -182,8 +182,8 @@ def train(config,
start_epoch = 1 start_epoch = 1
for epoch in range(start_epoch, epoch_num + 1): for epoch in range(start_epoch, epoch_num + 1):
if epoch > 0: train_dataloader = build_dataloader(
train_dataloader = build_dataloader(config, 'Train', device, logger) config, 'Train', device, logger, seed=epoch)
train_batch_cost = 0.0 train_batch_cost = 0.0
train_reader_cost = 0.0 train_reader_cost = 0.0
batch_sum = 0 batch_sum = 0