diff --git a/configs/rec/ch_ppocr_v2.1/rec_chinese_lite_train_distillation_v2.1.yml b/configs/rec/ch_ppocr_v2.1/rec_chinese_lite_train_distillation_v2.1.yml
index 6b60ae08..791b34cf 100644
--- a/configs/rec/ch_ppocr_v2.1/rec_chinese_lite_train_distillation_v2.1.yml
+++ b/configs/rec/ch_ppocr_v2.1/rec_chinese_lite_train_distillation_v2.1.yml
@@ -52,9 +52,10 @@ Architecture:
Neck:
name: SequenceEncoder
encoder_type: rnn
- hidden_size: 48
+ hidden_size: 64
Head:
name: CTCHead
+ mid_channels: 96
fc_decay: 0.00001
Teacher:
pretrained:
@@ -71,9 +72,10 @@ Architecture:
Neck:
name: SequenceEncoder
encoder_type: rnn
- hidden_size: 48
+ hidden_size: 64
Head:
name: CTCHead
+ mid_channels: 96
fc_decay: 0.00001
diff --git a/doc/joinus.PNG b/doc/joinus.PNG
index 4a274e63..7c67fceb 100644
Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ
diff --git a/doc/table/PaddleDetection_config.png b/doc/table/PaddleDetection_config.png
new file mode 100644
index 00000000..d18932b6
Binary files /dev/null and b/doc/table/PaddleDetection_config.png differ
diff --git a/doc/table/paper-image.jpg b/doc/table/paper-image.jpg
new file mode 100644
index 00000000..db7246b3
Binary files /dev/null and b/doc/table/paper-image.jpg differ
diff --git a/doc/table/result_all.jpg b/doc/table/result_all.jpg
new file mode 100644
index 00000000..3dd98406
Binary files /dev/null and b/doc/table/result_all.jpg differ
diff --git a/doc/table/result_text.jpg b/doc/table/result_text.jpg
new file mode 100644
index 00000000..94c9bce4
Binary files /dev/null and b/doc/table/result_text.jpg differ
diff --git a/ppocr/modeling/heads/rec_ctc_head.py b/ppocr/modeling/heads/rec_ctc_head.py
index 86e81677..9c38d31f 100755
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@@ -33,19 +33,47 @@ def get_para_bias_attr(l2_decay, k):
class CTCHead(nn.Layer):
- def __init__(self, in_channels, out_channels, fc_decay=0.0004, **kwargs):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ fc_decay=0.0004,
+ mid_channels=None,
+ **kwargs):
super(CTCHead, self).__init__()
- weight_attr, bias_attr = get_para_bias_attr(
- l2_decay=fc_decay, k=in_channels)
- self.fc = nn.Linear(
- in_channels,
- out_channels,
- weight_attr=weight_attr,
- bias_attr=bias_attr)
+ if mid_channels is None:
+ weight_attr, bias_attr = get_para_bias_attr(
+ l2_decay=fc_decay, k=in_channels)
+ self.fc = nn.Linear(
+ in_channels,
+ out_channels,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr)
+ else:
+ weight_attr1, bias_attr1 = get_para_bias_attr(
+ l2_decay=fc_decay, k=in_channels)
+ self.fc1 = nn.Linear(
+ in_channels,
+ mid_channels,
+ weight_attr=weight_attr1,
+ bias_attr=bias_attr1)
+
+ weight_attr2, bias_attr2 = get_para_bias_attr(
+ l2_decay=fc_decay, k=mid_channels)
+ self.fc2 = nn.Linear(
+ mid_channels,
+ out_channels,
+ weight_attr=weight_attr2,
+ bias_attr=bias_attr2)
self.out_channels = out_channels
+ self.mid_channels = mid_channels
def forward(self, x, targets=None):
- predicts = self.fc(x)
+ if self.mid_channels is None:
+ predicts = self.fc(x)
+ else:
+ predicts = self.fc1(x)
+ predicts = self.fc2(predicts)
+
if not self.training:
predicts = F.softmax(predicts, axis=2)
return predicts
diff --git a/test/MANIFEST.in b/test1/MANIFEST.in
similarity index 100%
rename from test/MANIFEST.in
rename to test1/MANIFEST.in
diff --git a/test/__init__.py b/test1/__init__.py
similarity index 100%
rename from test/__init__.py
rename to test1/__init__.py
diff --git a/test/api.md b/test1/api.md
similarity index 92%
rename from test/api.md
rename to test1/api.md
index 60cf18f4..7283595d 100644
--- a/test/api.md
+++ b/test1/api.md
@@ -1,5 +1,11 @@
# PaddleStructure
+install layoutparser
+```sh
+wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip3 install layoutparser-0.0.0-py3-none-any.whl
+```
+
## 1. Introduction to pipeline
PaddleStructure is a toolkit for complex layout text OCR, the process is as follows
diff --git a/test/api_ch.md b/test1/api_ch.md
similarity index 91%
rename from test/api_ch.md
rename to test1/api_ch.md
index c3a09a3d..7fafe700 100644
--- a/test/api_ch.md
+++ b/test1/api_ch.md
@@ -1,5 +1,11 @@
# PaddleStructure
+安装layoutparser
+```sh
+wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip3 install layoutparser-0.0.0-py3-none-any.whl
+```
+
## 1. pipeline介绍
PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下
@@ -18,6 +24,7 @@ PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下
## 2. LayoutParser
+[文档](layout/README.md)
## 3. Table OCR
diff --git a/test1/layout/README.md b/test1/layout/README.md
new file mode 100644
index 00000000..274a8c63
--- /dev/null
+++ b/test1/layout/README.md
@@ -0,0 +1,133 @@
+# 版面分析使用说明
+
+* [1. 安装whl包](#安装whl包)
+* [2. 使用](#使用)
+* [3. 后处理](#后处理)
+* [4. 指标](#指标)
+* [5. 训练版面分析模型](#训练版面分析模型)
+
+
+
+## 1. 安装whl包
+```bash
+wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip install -U layoutparser-0.0.0-py3-none-any.whl
+```
+
+
+
+## 2. 使用
+
+使用layoutparser识别给定文档的布局:
+
+```python
+import layoutparser as lp
+image = cv2.imread("imags/paper-image.jpg")
+image = image[..., ::-1]
+
+# 加载模型
+model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
+ threshold=0.5,
+ label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},
+ enforce_cpu=False,
+ enable_mkldnn=True)
+# 检测
+layout = model.detect(image)
+
+# 显示结果
+lp.draw_box(image, layout, box_width=3, show_element_type=True)
+```
+
+下图展示了结果,不同颜色的检测框表示不同的类别,并通过`show_element_type`在框的左上角显示具体类别:
+
+
+
+
+
+`PaddleDetectionLayoutModel`函数参数说明如下:
+
+| 参数 | 含义 | 默认值 | 备注 |
+| :------------: | :-------------------------: | :---------: | :----------------------------------------------------------: |
+| config_path | 模型配置路径 | None | 指定config_path会自动下载模型(仅第一次,之后模型存在,不会再下载) |
+| model_path | 模型路径 | None | 本地模型路径,config_path和model_path必须设置一个,不能同时为None |
+| threshold | 预测得分的阈值 | 0.5 | \ |
+| input_shape | reshape之后图片尺寸 | [3,640,640] | \ |
+| batch_size | 测试batch size | 1 | \ |
+| label_map | 类别映射表 | None | 设置config_path时,可以为None,根据数据集名称自动获取label_map |
+| enforce_cpu | 代码是否使用CPU运行 | False | 设置为False表示使用GPU,True表示强制使用CPU |
+| enforce_mkldnn | CPU预测中是否开启MKLDNN加速 | True | \ |
+| thread_num | 设置CPU线程数 | 10 | \ |
+
+目前支持以下几种模型配置和label map,您可以通过修改 `--config_path`和 `--label_map`使用这些模型,从而检测不同类型的内容:
+
+| dataset | config_path | label_map |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- |
+| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"} |
+| TableBank latex | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"} |
+| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} |
+
+* TableBank word和TableBank latex分别在word文档、latex文档数据集训练;
+* 下载TableBank数据集同时包含word和latex。
+
+
+
+## 3. 后处理
+
+版面分析检测包含多个类别,如果只想获取指定类别(如"Text"类别)的检测框、可以使用下述代码:
+
+```python
+# 首先过滤特定文本类型的区域
+text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
+figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
+
+# 因为在图像区域内可能检测到文本区域,所以只需要删除它们
+text_blocks = lp.Layout([b for b in text_blocks \
+ if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
+
+# 对文本区域排序并分配id
+h, w = image.shape[:2]
+
+left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
+
+left_blocks = text_blocks.filter_by(left_interval, center=True)
+left_blocks.sort(key = lambda b:b.coordinates[1])
+
+right_blocks = [b for b in text_blocks if b not in left_blocks]
+right_blocks.sort(key = lambda b:b.coordinates[1])
+
+# 最终合并两个列表,并按顺序添加索引
+text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
+
+# 显示结果
+lp.draw_box(image, text_blocks,
+ box_width=3,
+ show_element_id=True)
+```
+
+显示只有"Text"类别的结果:
+
+
+
+
+
+
+
+## 4. 指标
+
+| Dataset | mAP | CPU time cost | GPU time cost |
+| --------- | ---- | ------------- | ------------- |
+| PubLayNet | 93.6 | 1713.7ms | 66.6ms |
+| TableBank | 96.2 | 1968.4ms | 65.1ms |
+
+**Envrionment:**
+
+ **CPU:** Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz,24core
+
+ **GPU:** a single NVIDIA Tesla P40
+
+
+
+## 5. 训练版面分析模型
+
+上述模型基于[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection) 训练,如果您想训练自己的版面分析模型,请参考:[train_layoutparser_model](train_layoutparser_model.md)
+
diff --git a/test1/layout/train_layoutparser_model.md b/test1/layout/train_layoutparser_model.md
new file mode 100644
index 00000000..0a4554e1
--- /dev/null
+++ b/test1/layout/train_layoutparser_model.md
@@ -0,0 +1,188 @@
+# 训练版面分析
+
+* [1. 安装](#安装)
+ * [1.1 环境要求](#环境要求)
+ * [1.2 安装PaddleDetection](#安装PaddleDetection)
+* [2. 准备数据](#准备数据)
+* [3. 配置文件改动和说明](#配置文件改动和说明)
+* [4. PaddleDetection训练](#训练)
+* [5. PaddleDetection预测](#预测)
+* [6. 预测部署](#预测部署)
+ * [6.1 模型导出](#模型导出)
+ * [6.2 layout parser预测](#layout_parser预测)
+
+
+
+## 1. 安装
+
+
+
+### 1.1 环境要求
+
+- PaddlePaddle 2.1
+- OS 64 bit
+- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64 bit
+- pip/pip3(9.0.1+), 64 bit
+- CUDA >= 10.1
+- cuDNN >= 7.6
+
+
+
+### 1.2 安装PaddleDetection
+
+```bash
+# 克隆PaddleDetection仓库
+cd
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+
+cd PaddleDetection
+# 安装其他依赖
+pip install -r requirements.txt
+```
+
+更多安装教程,请参考: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
+
+
+
+## 2. 准备数据
+
+下载 [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) 数据集:
+
+```bash
+cd PaddleDetection/dataset/
+mkdir publaynet
+# 执行命令,下载
+wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733
+# 解压
+tar -xvf publaynet.tar.gz
+```
+
+解压之后PubLayNet目录结构:
+
+| File or Folder | Description | num |
+| :------------- | :----------------------------------------------- | ------- |
+| `train/` | Images in the training subset | 335,703 |
+| `val/` | Images in the validation subset | 11,245 |
+| `test/` | Images in the testing subset | 11,405 |
+| `train.json` | Annotations for training images | |
+| `val.json` | Annotations for validation images | |
+| `LICENSE.txt` | Plaintext version of the CDLA-Permissive license | |
+| `README.txt` | Text file with the file names and description | |
+
+如果使用其它数据集,请参考[准备训练数据](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md)
+
+
+
+## 3. 配置文件改动和说明
+
+我们使用 `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml`配置进行训练,配置文件摘要如下:
+
+
+
+
+
+从上图看到 `ppyolov2_r50vd_dcn_365e_coco.yml` 配置需要依赖其他的配置文件,在该例子中需要依赖:
+
+```
+coco_detection.yml:主要说明了训练数据和验证数据的路径
+
+runtime.yml:主要说明了公共的运行参数,比如是否使用GPU、每多少个epoch存储checkpoint等
+
+optimizer_365e.yml:主要说明了学习率和优化器的配置
+
+ppyolov2_r50vd_dcn.yml:主要说明模型和主干网络的情况
+
+ppyolov2_reader.yml:主要说明数据读取器配置,如batch size,并发加载子进程数等,同时包含读取后预处理操作,如resize、数据增强等等
+```
+
+根据实际情况,修改上述文件,比如数据集路径、batch size等。
+
+
+
+## 4. PaddleDetection训练
+
+PaddleDetection提供了单卡/多卡训练模式,满足用户多种训练需求
+
+* GPU 单卡训练
+
+```bash
+export CUDA_VISIBLE_DEVICES=0 #windows和Mac下不需要执行该命令
+python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
+```
+
+* GPU多卡训练
+
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval
+```
+
+--eval:表示边训练边验证
+
+* 模型恢复训练
+
+在日常训练过程中,有的用户由于一些原因导致训练中断,用户可以使用-r的命令恢复训练:
+
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000
+```
+
+注意:如果遇到 "`Out of memory error`" 问题, 尝试在 `ppyolov2_reader.yml` 文件中调小`batch_size`
+
+
+
+## 5. PaddleDetection预测
+
+设置参数,使用PaddleDetection预测:
+
+```bash
+export CUDA_VISIBLE_DEVICES=0
+python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture
+```
+
+`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算,不同阈值会产生不同的结果 `keep_top_k`表示设置输出目标的最大数量,默认值为100,用户可以根据自己的实际情况进行设定。
+
+
+
+## 6. 预测部署
+
+在layout parser中使用自己训练好的模型,
+
+
+
+### 6.1 模型导出
+
+在模型训练过程中保存的模型文件是包含前向预测和反向传播的过程,在实际的工业部署则不需要反向传播,因此需要将模型进行导成部署需要的模型格式。 在PaddleDetection中提供了 `tools/export_model.py`脚本来导出模型。
+
+导出模型名称默认是`model.*`,layout parser代码模型名称是`inference.*`, 所以修改[PaddleDetection/ppdet/engine/trainer.py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py#L512) (点开链接查看详细代码行),将`model`改为`inference`即可。
+
+执行导出模型脚本:
+
+```bash
+python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams
+```
+
+预测模型会导出到`inference/ppyolov2_r50vd_dcn_365e_coco`目录下,分别为`infer_cfg.yml`(预测不需要), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel` 。
+
+更多模型导出教程,请参考:[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md)
+
+
+
+### 6.2 layout_parser预测
+
+`model_path`指定训练好的模型路径,使用layout parser进行预测:
+
+```bash
+import layoutparser as lp
+model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True)
+```
+
+
+
+***
+
+更多PaddleDetection训练教程,请参考:[PaddleDetection训练](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md)
+
+***
+
diff --git a/test1/layoutparser-0.0.0-py3-none-any.whl b/test1/layoutparser-0.0.0-py3-none-any.whl
new file mode 100644
index 00000000..e3fa17fc
Binary files /dev/null and b/test1/layoutparser-0.0.0-py3-none-any.whl differ
diff --git a/test/paddlestructure.py b/test1/paddlestructure.py
similarity index 97%
rename from test/paddlestructure.py
rename to test1/paddlestructure.py
index 67fd85cf..171d55b3 100644
--- a/test/paddlestructure.py
+++ b/test1/paddlestructure.py
@@ -24,9 +24,9 @@ import numpy as np
from pathlib import Path
from ppocr.utils.logging import get_logger
-from test.predict_system import OCRSystem, save_res
-from test.table.predict_table import to_excel
-from test.utility import init_args, draw_result
+from test1.predict_system import OCRSystem, save_res
+from test1.table.predict_table import to_excel
+from test1.utility import init_args, draw_result
logger = get_logger()
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
diff --git a/test/predict_system.py b/test1/predict_system.py
similarity index 97%
rename from test/predict_system.py
rename to test1/predict_system.py
index 94ad4d80..9e99a48c 100644
--- a/test/predict_system.py
+++ b/test1/predict_system.py
@@ -31,8 +31,8 @@ import layoutparser as lp
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
from tools.infer.predict_system import TextSystem
-from test.table.predict_table import TableSystem, to_excel
-from test.utility import parse_args, draw_result
+from test1.table.predict_table import TableSystem, to_excel
+from test1.utility import parse_args, draw_result
logger = get_logger()
diff --git a/test/setup.py b/test1/setup.py
similarity index 93%
rename from test/setup.py
rename to test1/setup.py
index 7a2c3676..0b092c49 100644
--- a/test/setup.py
+++ b/test1/setup.py
@@ -30,9 +30,9 @@ def readme():
return README
-shutil.copytree('/table', './test/table')
-shutil.copyfile('/predict_system.py', './test/predict_system.py')
-shutil.copyfile('/utility.py', './test/utility.py')
+shutil.copytree('./table', './test1/table')
+shutil.copyfile('./predict_system.py', './test1/predict_system.py')
+shutil.copyfile('./utility.py', './test1/utility.py')
shutil.copytree('../ppocr', './ppocr')
shutil.copytree('../tools', './tools')
shutil.copyfile('../LICENSE', './LICENSE')
@@ -68,5 +68,5 @@ setup(
shutil.rmtree('ppocr')
shutil.rmtree('tools')
-shutil.rmtree('test')
+shutil.rmtree('test1')
os.remove('LICENSE')
diff --git a/test/table/README.md b/test1/table/README.md
similarity index 100%
rename from test/table/README.md
rename to test1/table/README.md
diff --git a/test/table/README_ch.md b/test1/table/README_ch.md
similarity index 100%
rename from test/table/README_ch.md
rename to test1/table/README_ch.md
diff --git a/test/table/__init__.py b/test1/table/__init__.py
similarity index 100%
rename from test/table/__init__.py
rename to test1/table/__init__.py
diff --git a/test/table/eval_table.py b/test1/table/eval_table.py
similarity index 94%
rename from test/table/eval_table.py
rename to test1/table/eval_table.py
index a027a45f..dc63e34e 100755
--- a/test/table/eval_table.py
+++ b/test1/table/eval_table.py
@@ -20,9 +20,9 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
import cv2
import json
from tqdm import tqdm
-from test.table.table_metric import TEDS
-from test.table.predict_table import TableSystem
-from test.utility import init_args
+from test1.table.table_metric import TEDS
+from test1.table.predict_table import TableSystem
+from test1.utility import init_args
from ppocr.utils.logging import get_logger
logger = get_logger()
diff --git a/test/table/matcher.py b/test1/table/matcher.py
similarity index 100%
rename from test/table/matcher.py
rename to test1/table/matcher.py
diff --git a/test/table/predict_structure.py b/test1/table/predict_structure.py
similarity index 99%
rename from test/table/predict_structure.py
rename to test1/table/predict_structure.py
index eacf2577..455bf7e7 100755
--- a/test/table/predict_structure.py
+++ b/test1/table/predict_structure.py
@@ -32,7 +32,7 @@ from ppocr.data import create_operators, transform
from ppocr.postprocess import build_post_process
from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
-from test.utility import parse_args
+from test1.utility import parse_args
logger = get_logger()
diff --git a/test/table/predict_table.py b/test1/table/predict_table.py
similarity index 98%
rename from test/table/predict_table.py
rename to test1/table/predict_table.py
index 66c0895d..b06a4f4d 100644
--- a/test/table/predict_table.py
+++ b/test1/table/predict_table.py
@@ -30,9 +30,9 @@ import tools.infer.predict_rec as predict_rec
import tools.infer.predict_det as predict_det
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
-from test.table.matcher import distance, compute_iou
-from test.utility import parse_args
-import test.table.predict_structure as predict_strture
+from test1.table.matcher import distance, compute_iou
+from test1.utility import parse_args
+import test1.table.predict_structure as predict_strture
logger = get_logger()
diff --git a/test/table/table_metric/__init__.py b/test1/table/table_metric/__init__.py
similarity index 100%
rename from test/table/table_metric/__init__.py
rename to test1/table/table_metric/__init__.py
diff --git a/test/table/table_metric/parallel.py b/test1/table/table_metric/parallel.py
similarity index 100%
rename from test/table/table_metric/parallel.py
rename to test1/table/table_metric/parallel.py
diff --git a/test/table/table_metric/table_metric.py b/test1/table/table_metric/table_metric.py
similarity index 100%
rename from test/table/table_metric/table_metric.py
rename to test1/table/table_metric/table_metric.py
diff --git a/test/table/tablepyxl/__init__.py b/test1/table/tablepyxl/__init__.py
similarity index 100%
rename from test/table/tablepyxl/__init__.py
rename to test1/table/tablepyxl/__init__.py
diff --git a/test/table/tablepyxl/style.py b/test1/table/tablepyxl/style.py
similarity index 100%
rename from test/table/tablepyxl/style.py
rename to test1/table/tablepyxl/style.py
diff --git a/test/table/tablepyxl/tablepyxl.py b/test1/table/tablepyxl/tablepyxl.py
similarity index 100%
rename from test/table/tablepyxl/tablepyxl.py
rename to test1/table/tablepyxl/tablepyxl.py
diff --git a/test/utility.py b/test1/utility.py
similarity index 100%
rename from test/utility.py
rename to test1/utility.py
diff --git a/tools/eval.py b/tools/eval.py
index 155dc498..c1315805 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -44,8 +44,15 @@ def main():
# build model
# for rec algorithm
if hasattr(post_process_class, 'character'):
- config['Architecture']["Head"]['out_channels'] = len(
- getattr(post_process_class, 'character'))
+ char_num = len(getattr(post_process_class, 'character'))
+ if config['Architecture']["algorithm"] in ["Distillation",
+ ]: # distillation model
+ for key in config['Architecture']["Models"]:
+ config['Architecture']["Models"][key]["Head"][
+ 'out_channels'] = char_num
+ else: # base rec model
+ config['Architecture']["Head"]['out_channels'] = char_num
+
model = build_model(config['Architecture'])
use_srn = config['Architecture']['algorithm'] == "SRN"
model_type = config['Architecture']['model_type']
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index baa89be1..c5e25903 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -31,7 +31,7 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.data import create_operators, transform
from ppocr.postprocess import build_post_process
-import tools.infer.benchmark_utils as benchmark_utils
+# import tools.infer.benchmark_utils as benchmark_utils
logger = get_logger()
@@ -100,8 +100,6 @@ class TextDetector(object):
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
args, 'det', logger)
- self.det_times = utility.Timer()
-
def order_points_clockwise(self, pts):
"""
reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
@@ -158,8 +156,8 @@ class TextDetector(object):
def __call__(self, img):
ori_im = img.copy()
data = {'image': img}
- self.det_times.total_time.start()
- self.det_times.preprocess_time.start()
+
+ st = time.time()
data = transform(data, self.preprocess_op)
img, shape_list = data
if img is None:
@@ -168,16 +166,12 @@ class TextDetector(object):
shape_list = np.expand_dims(shape_list, axis=0)
img = img.copy()
- self.det_times.preprocess_time.end()
- self.det_times.inference_time.start()
-
self.input_tensor.copy_from_cpu(img)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
- self.det_times.inference_time.end()
preds = {}
if self.det_algorithm == "EAST":
@@ -193,8 +187,6 @@ class TextDetector(object):
else:
raise NotImplementedError
- self.det_times.postprocess_time.start()
-
self.predictor.try_shrink_memory()
post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points']
@@ -203,10 +195,8 @@ class TextDetector(object):
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
- self.det_times.postprocess_time.end()
- self.det_times.total_time.end()
- self.det_times.img_num += 1
- return dt_boxes, self.det_times.total_time.value()
+ et = time.time()
+ return dt_boxes, et - st
if __name__ == "__main__":
@@ -216,12 +206,13 @@ if __name__ == "__main__":
count = 0
total_time = 0
draw_img_save = "./inference_results"
- cpu_mem, gpu_mem, gpu_util = 0, 0, 0
- # warmup 10 times
- fake_img = np.random.uniform(-1, 1, [640, 640, 3]).astype(np.float32)
- for i in range(10):
- dt_boxes, _ = text_detector(fake_img)
+ if args.warmup:
+ img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+ for i in range(10):
+ res = text_detector(img)
+
+ cpu_mem, gpu_mem, gpu_util = 0, 0, 0
if not os.path.exists(draw_img_save):
os.makedirs(draw_img_save)
@@ -239,12 +230,6 @@ if __name__ == "__main__":
total_time += elapse
count += 1
- if args.benchmark:
- cm, gm, gu = utility.get_current_memory_mb(0)
- cpu_mem += cm
- gpu_mem += gm
- gpu_util += gu
-
logger.info("Predict time of {}: {}".format(image_file, elapse))
src_im = utility.draw_text_det_res(dt_boxes, image_file)
img_name_pure = os.path.split(image_file)[-1]
@@ -252,36 +237,3 @@ if __name__ == "__main__":
"det_res_{}".format(img_name_pure))
logger.info("The visualized image saved in {}".format(img_path))
- # print the information about memory and time-spent
- if args.benchmark:
- mems = {
- 'cpu_rss_mb': cpu_mem / count,
- 'gpu_rss_mb': gpu_mem / count,
- 'gpu_util': gpu_util * 100 / count
- }
- else:
- mems = None
- logger.info("The predict time about detection module is as follows: ")
- det_time_dict = text_detector.det_times.report(average=True)
- det_model_name = args.det_model_dir
-
- if args.benchmark:
- # construct log information
- model_info = {
- 'model_name': args.det_model_dir.split('/')[-1],
- 'precision': args.precision
- }
- data_info = {
- 'batch_size': 1,
- 'shape': 'dynamic_shape',
- 'data_num': det_time_dict['img_num']
- }
- perf_info = {
- 'preprocess_time_s': det_time_dict['preprocess_time'],
- 'inference_time_s': det_time_dict['inference_time'],
- 'postprocess_time_s': det_time_dict['postprocess_time'],
- 'total_time_s': det_time_dict['total_time']
- }
- benchmark_log = benchmark_utils.PaddleInferBenchmark(
- text_detector.config, model_info, data_info, perf_info, mems)
- benchmark_log("Det")
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index 2eeb39b2..0d847046 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -257,13 +257,15 @@ def main(args):
text_recognizer = TextRecognizer(args)
valid_image_file_list = []
img_list = []
- cpu_mem, gpu_mem, gpu_util = 0, 0, 0
- count = 0
# warmup 10 times
- fake_img = np.random.uniform(-1, 1, [1, 32, 320, 3]).astype(np.float32)
- for i in range(10):
- dt_boxes, _ = text_recognizer(fake_img)
+ if args.warmup:
+ img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
+ for i in range(10):
+ res = text_recognizer([img])
+
+ cpu_mem, gpu_mem, gpu_util = 0, 0, 0
+ count = 0
for image_file in image_file_list:
img, flag = check_and_read_gif(image_file)
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index 7361d27f..c008f967 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -13,6 +13,7 @@
# limitations under the License.
import os
import sys
+import subprocess
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
@@ -93,7 +94,6 @@ class TextSystem(object):
dt_boxes, elapse = self.text_detector(img)
logger.debug("dt_boxes num : {}, elapse : {}".format(
-
len(dt_boxes), elapse))
if dt_boxes is None:
return None, None
@@ -147,15 +147,24 @@ def sorted_boxes(dt_boxes):
def main(args):
image_file_list = get_image_file_list(args.image_dir)
+ image_file_list = image_file_list[args.process_id::args.total_process_num]
text_sys = TextSystem(args)
is_visualize = True
font_path = args.vis_font_path
drop_score = args.drop_score
+
+ # warm up 10 times
+ if args.warmup:
+ img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+ for i in range(10):
+ res = text_sys(img)
+
total_time = 0
cpu_mem, gpu_mem, gpu_util = 0, 0, 0
_st = time.time()
count = 0
for idx, image_file in enumerate(image_file_list):
+
img, flag = check_and_read_gif(image_file)
if not flag:
img = cv2.imread(image_file)
@@ -264,4 +273,18 @@ def main(args):
if __name__ == "__main__":
- main(utility.parse_args())
+ args = utility.parse_args()
+ if args.use_mp:
+ p_list = []
+ total_process_num = args.total_process_num
+ for process_id in range(total_process_num):
+ cmd = [sys.executable, "-u"] + sys.argv + [
+ "--process_id={}".format(process_id),
+ "--use_mp={}".format(False)
+ ]
+ p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
+ p_list.append(p)
+ for p in p_list:
+ p.wait()
+ else:
+ main(args)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 3b43efe4..d491d601 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -105,7 +105,9 @@ def init_args():
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
parser.add_argument("--cpu_threads", type=int, default=10)
parser.add_argument("--use_pdserving", type=str2bool, default=False)
+ parser.add_argument("--warmup", type=str2bool, default=True)
+ # multi-process
parser.add_argument("--use_mp", type=str2bool, default=False)
parser.add_argument("--total_process_num", type=int, default=1)
parser.add_argument("--process_id", type=int, default=0)
@@ -113,7 +115,6 @@ def init_args():
parser.add_argument("--benchmark", type=bool, default=False)
parser.add_argument("--save_log_path", type=str, default="./log_output/")
-
parser.add_argument("--show_log", type=str2bool, default=True)
return parser