Merge remote-tracking branch 'upstream/dygraph' into dy3
This commit is contained in:
commit
43ccde7d88
|
@ -1,130 +0,0 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 1200
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/det_r50_vd/
|
||||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: 8
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: True
|
||||
infer_img: doc/imgs_en/img_10.jpg
|
||||
save_res_path: ./output/det_db/predicts_db.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
learning_rate:
|
||||
lr: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
Architecture:
|
||||
type: det
|
||||
algorithm: DB
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 50
|
||||
Neck:
|
||||
name: FPN
|
||||
out_channels: 256
|
||||
Head:
|
||||
name: DBHead
|
||||
k: 50
|
||||
|
||||
Loss:
|
||||
name: DBLoss
|
||||
balance_loss: true
|
||||
main_loss_type: DiceLoss
|
||||
alpha: 5
|
||||
beta: 10
|
||||
ohem_ratio: 3
|
||||
|
||||
PostProcess:
|
||||
name: DBPostProcess
|
||||
thresh: 0.3
|
||||
box_thresh: 0.6
|
||||
max_candidates: 1000
|
||||
unclip_ratio: 1.5
|
||||
|
||||
Metric:
|
||||
name: DetMetric
|
||||
main_indicator: hmean
|
||||
|
||||
TRAIN:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./detection/
|
||||
file_list:
|
||||
- ./detection/train_icdar2015_label.txt # dataset1
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- IaaAugment:
|
||||
augmenter_args:
|
||||
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
|
||||
- { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
|
||||
- { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
|
||||
- EastRandomCropData:
|
||||
size: [ 640,640 ]
|
||||
max_tries: 50
|
||||
keep_ratio: true
|
||||
- MakeBorderMap:
|
||||
shrink_ratio: 0.4
|
||||
thresh_min: 0.3
|
||||
thresh_max: 0.7
|
||||
- MakeShrinkMap:
|
||||
shrink_ratio: 0.4
|
||||
min_text_size: 8
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [ 0.485, 0.456, 0.406 ]
|
||||
std: [ 0.229, 0.224, 0.225 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- keepKeys:
|
||||
keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size: 16
|
||||
num_workers: 8
|
||||
|
||||
EVAL:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./detection/
|
||||
file_list:
|
||||
- ./detection/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
image_shape: [736,1280]
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [ 0.485, 0.456, 0.406 ]
|
||||
std: [ 0.229, 0.224, 0.225 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- keepKeys:
|
||||
keep_keys: ['image','shape','polys','ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size: 1 # must be 1
|
||||
num_workers: 8
|
|
@ -1,106 +0,0 @@
|
|||
Global:
|
||||
use_gpu: false
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
|
||||
save_epoch_step: 500
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: 127
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
max_text_length: 80
|
||||
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
|
||||
character_type: 'ch'
|
||||
use_space_char: False
|
||||
infer_mode: False
|
||||
use_tps: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
learning_rate:
|
||||
lr: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00001
|
||||
|
||||
Architecture:
|
||||
type: rec
|
||||
algorithm: CRNN
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride: [ 1, 2, 2, 2 ]
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: fc
|
||||
hidden_size: 96
|
||||
Head:
|
||||
name: CTC
|
||||
fc_decay: 0.00001
|
||||
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
TRAIN:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/train.txt # dataset1
|
||||
ratio_list: [ 0.4,0.6 ]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecAug:
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
batch_size: 256
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
EVAL:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/val.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size: 256
|
||||
num_workers: 8
|
|
@ -1,104 +0,0 @@
|
|||
Global:
|
||||
use_gpu: false
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/res34_none_bilstm_ctc/
|
||||
save_epoch_step: 500
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: 127
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
max_text_length: 80
|
||||
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
|
||||
character_type: 'ch'
|
||||
use_space_char: False
|
||||
infer_mode: False
|
||||
use_tps: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
learning_rate:
|
||||
lr: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00001
|
||||
|
||||
Architecture:
|
||||
type: rec
|
||||
algorithm: CRNN
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 34
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: fc
|
||||
hidden_size: 96
|
||||
Head:
|
||||
name: CTC
|
||||
fc_decay: 0.00001
|
||||
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
TRAIN:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/train.txt # dataset1
|
||||
ratio_list: [ 0.4,0.6 ]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecAug:
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
batch_size: 256
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
EVAL:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/val.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size: 256
|
||||
num_workers: 8
|
|
@ -1,103 +0,0 @@
|
|||
Global:
|
||||
use_gpu: false
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec/res34_none_none_ctc/
|
||||
save_epoch_step: 500
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: 127
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: True
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
# for data or label process
|
||||
max_text_length: 80
|
||||
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
|
||||
character_type: 'ch'
|
||||
use_space_char: False
|
||||
infer_mode: False
|
||||
use_tps: False
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
learning_rate:
|
||||
lr: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0.00001
|
||||
|
||||
Architecture:
|
||||
type: rec
|
||||
algorithm: CRNN
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 34
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: reshape
|
||||
Head:
|
||||
name: CTC
|
||||
fc_decay: 0.00001
|
||||
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
|
||||
TRAIN:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/train.txt # dataset1
|
||||
ratio_list: [ 0.4,0.6 ]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecAug:
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
batch_size: 256
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
|
||||
EVAL:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./rec
|
||||
file_list:
|
||||
- ./rec/val.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- CTCLabelEncode: # Class handling label
|
||||
- RecResizeImg:
|
||||
image_shape: [ 3,32,320 ]
|
||||
- keepKeys:
|
||||
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size: 256
|
||||
num_workers: 8
|
|
@ -41,7 +41,7 @@ inference 模型(`paddle.jit.save`保存的模型)
|
|||
|
||||
下载超轻量级中文检测模型:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/
|
||||
```
|
||||
上述模型是以MobileNetV3为backbone训练的DB算法,将训练好的模型转换成inference模型只需要运行如下命令:
|
||||
```
|
||||
|
@ -51,9 +51,9 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar
|
|||
# Global.load_static_weights 参数需要设置为 False。
|
||||
# Global.save_inference_dir参数设置转换的模型将保存的地址。
|
||||
|
||||
python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/
|
||||
python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/
|
||||
```
|
||||
转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.checkpoints`参数,其指向训练中保存的模型参数文件。
|
||||
转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.pretrained_model`参数,其指向训练中保存的模型参数文件。
|
||||
转换成功后,在模型保存目录下有三个文件:
|
||||
```
|
||||
inference/det_db/
|
||||
|
@ -67,7 +67,7 @@ inference/det_db/
|
|||
|
||||
下载超轻量中文识别模型:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/
|
||||
```
|
||||
|
||||
识别模型转inference模型与检测的方式相同,如下:
|
||||
|
@ -78,7 +78,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar
|
|||
# Global.load_static_weights 参数需要设置为 False。
|
||||
# Global.save_inference_dir参数设置转换的模型将保存的地址。
|
||||
|
||||
python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/
|
||||
python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/
|
||||
```
|
||||
|
||||
**注意:**如果您是在自己的数据集上训练的模型,并且调整了中文字符的字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
|
||||
|
@ -96,7 +96,7 @@ python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_trai
|
|||
|
||||
下载方向分类模型:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
|
||||
```
|
||||
|
||||
方向分类模型转inference模型与检测的方式相同,如下:
|
||||
|
@ -107,7 +107,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar
|
|||
# Global.load_static_weights 参数需要设置为 False。
|
||||
# Global.save_inference_dir参数设置转换的模型将保存的地址。
|
||||
|
||||
python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/
|
||||
python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/
|
||||
```
|
||||
|
||||
转换成功后,在目录下有三个文件:
|
||||
|
@ -152,10 +152,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di
|
|||
<a name="DB文本检测模型推理"></a>
|
||||
### 2. DB文本检测模型推理
|
||||
|
||||
首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换:
|
||||
首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar) ),可以使用如下命令进行转换:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db
|
||||
```
|
||||
|
||||
DB文本检测模型推理,可以执行如下命令:
|
||||
|
@ -173,10 +173,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img_10.jpg" --det_
|
|||
<a name="EAST文本检测模型推理"></a>
|
||||
### 3. EAST文本检测模型推理
|
||||
|
||||
首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换:
|
||||
首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址 (coming soon)](link) ),可以使用如下命令进行转换:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east
|
||||
```
|
||||
|
||||
**EAST文本检测模型推理,需要设置参数`--det_algorithm="EAST"`**,可以执行如下命令:
|
||||
|
@ -194,9 +194,9 @@ python3 tools/infer/predict_det.py --det_algorithm="EAST" --image_dir="./doc/img
|
|||
<a name="SAST文本检测模型推理"></a>
|
||||
### 4. SAST文本检测模型推理
|
||||
#### (1). 四边形文本检测模型(ICDAR2015)
|
||||
首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换:
|
||||
首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换:
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15
|
||||
|
||||
```
|
||||
**SAST文本检测模型推理,需要设置参数`--det_algorithm="SAST"`**,可以执行如下命令:
|
||||
|
@ -208,10 +208,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
|
|||
![](../imgs_results/det_res_img_10_sast.jpg)
|
||||
|
||||
#### (2). 弯曲文本检测模型(Total-Text)
|
||||
首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换:
|
||||
首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt
|
||||
|
||||
```
|
||||
|
||||
|
@ -254,10 +254,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695]
|
|||
我们以 CRNN 为例,介绍基于CTC损失的识别模型推理。 Rosetta 使用方式类似,不用设置识别算法参数rec_algorithm。
|
||||
|
||||
首先将 Rosetta 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练
|
||||
的模型为例([模型下载地址](link)),可以使用如下命令进行转换:
|
||||
的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) ),可以使用如下命令进行转换:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
|
||||
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
|
||||
|
||||
```
|
||||
|
||||
|
@ -313,9 +313,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" -
|
|||
|
||||
执行命令后,上图的预测结果为:
|
||||
``` text
|
||||
2020-09-19 16:15:05,076-INFO: index: [205 206 38 39]
|
||||
2020-09-19 16:15:05,077-INFO: word : 바탕으로
|
||||
2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
|
||||
2020-09-19 16:15:05,076-INFO: index: [205 206 38 39]
|
||||
2020-09-19 16:15:05,077-INFO: word : 바탕으로
|
||||
2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
|
||||
```
|
||||
|
||||
<a name="方向分类模型推理"></a>
|
||||
|
@ -337,8 +337,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
|
|||
执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下:
|
||||
|
||||
```
|
||||
infer_img: doc/imgs_words/ch/word_1.jpg
|
||||
result: ('0', 0.9998784)
|
||||
Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
|
||||
```
|
||||
|
||||
<a name="文本检测、方向分类和文字识别串联推理"></a>
|
||||
|
|
|
@ -9,12 +9,12 @@
|
|||
|
||||
## 2.inference模型下载
|
||||
|
||||
* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v1.1 系列模型下载](../doc_ch/models_list.md)
|
||||
* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v2.0 系列模型下载](../doc_ch/models_list.md)
|
||||
|
||||
| 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 |
|
||||
| ------------ | --------------- | ----------------|---- | ---------- | -------- |
|
||||
| 中英文超轻量OCR模型(xM) | |移动端&服务器端|[推理模型](link) / [预训练模型](link)|[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) |
|
||||
| 中英文通用OCR模型(xM) | |服务器端 |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}}) |
|
||||
| 中英文超轻量OCR模型(8.1M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| 中英文通用OCR模型(155.1M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
|
||||
|
||||
|
||||
* windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下
|
||||
|
@ -37,28 +37,29 @@ cd ..
|
|||
```
|
||||
mkdir inference && cd inference
|
||||
# 下载超轻量级中文OCR模型的检测模型并解压
|
||||
wget {} && tar xf ch_ppocr_mobile_v1.1_det_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
|
||||
# 下载超轻量级中文OCR模型的识别模型并解压
|
||||
wget {} && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
# 下载超轻量级中文OCR模型的文本方向分类器模型并解压
|
||||
wget {} && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
cd ..
|
||||
```
|
||||
|
||||
解压完毕后应有如下文件结构:
|
||||
|
||||
```
|
||||
|-inference
|
||||
|-ch_ppocr_mobile_v1.1_det_infer
|
||||
|- model
|
||||
|- params
|
||||
|-ch_ppocr_mobile_v1.1_rec_infer
|
||||
|- model
|
||||
|- params
|
||||
|-ch_ppocr_mobile-v1.1_cls_infer
|
||||
|- model
|
||||
|- params
|
||||
...
|
||||
├── ch_ppocr_mobile_v2.0_cls_infer
|
||||
│ ├── inference.pdiparams
|
||||
│ ├── inference.pdiparams.info
|
||||
│ └── inference.pdmodel
|
||||
├── ch_ppocr_mobile_v2.0_det_infer
|
||||
│ ├── inference.pdiparams
|
||||
│ ├── inference.pdiparams.info
|
||||
│ └── inference.pdmodel
|
||||
├── ch_ppocr_mobile_v2.0_rec_infer
|
||||
├── inference.pdiparams
|
||||
├── inference.pdiparams.info
|
||||
└── inference.pdmodel
|
||||
```
|
||||
|
||||
## 3.单张图像或者图像集合预测
|
||||
|
@ -68,13 +69,13 @@ cd ..
|
|||
```bash
|
||||
|
||||
# 预测image_dir指定的单张图像
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
|
||||
# 预测image_dir指定的图像集合
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
|
||||
# 如果想使用CPU进行预测,需设置use_gpu参数为False
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False
|
||||
```
|
||||
|
||||
- 通用中文OCR模型
|
||||
|
@ -83,7 +84,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode
|
|||
|
||||
```bash
|
||||
# 预测image_dir指定的单张图像
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
```
|
||||
|
||||
* 注意:
|
||||
|
|
|
@ -43,21 +43,21 @@ Next, we first introduce how to convert a trained model into an inference model,
|
|||
|
||||
Download the lightweight Chinese detection model:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/
|
||||
```
|
||||
|
||||
The above model is a DB algorithm trained with MobileNetV3 as the backbone. To convert the trained model into an inference model, just run the following command:
|
||||
```
|
||||
# -c Set the training algorithm yml configuration file
|
||||
# -o Set optional parameters
|
||||
# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.load_static_weights needs to be set to False
|
||||
# Global.save_inference_dir Set the address where the converted model will be saved.
|
||||
|
||||
python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/
|
||||
python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/
|
||||
```
|
||||
|
||||
When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.checkpoints` parameter in the configuration file.
|
||||
When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.pretrained_model` parameter in the configuration file.
|
||||
After the conversion is successful, there are three files in the model save directory:
|
||||
```
|
||||
inference/det_db/
|
||||
|
@ -71,18 +71,18 @@ inference/det_db/
|
|||
|
||||
Download the lightweight Chinese recognition model:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/
|
||||
```
|
||||
|
||||
The recognition model is converted to the inference model in the same way as the detection, as follows:
|
||||
```
|
||||
# -c Set the training algorithm yml configuration file
|
||||
# -o Set optional parameters
|
||||
# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.load_static_weights needs to be set to False
|
||||
# Global.save_inference_dir Set the address where the converted model will be saved.
|
||||
|
||||
python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/
|
||||
python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/
|
||||
```
|
||||
|
||||
If you have a model trained on your own dataset with a different dictionary file, please make sure that you modify the `character_dict_path` in the configuration file to your dictionary file path.
|
||||
|
@ -100,18 +100,18 @@ inference/det_db/
|
|||
|
||||
Download the angle classification model:
|
||||
```
|
||||
wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
|
||||
wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
|
||||
```
|
||||
|
||||
The angle classification model is converted to the inference model in the same way as the detection, as follows:
|
||||
```
|
||||
# -c Set the training algorithm yml configuration file
|
||||
# -o Set optional parameters
|
||||
# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
|
||||
# Global.load_static_weights needs to be set to False
|
||||
# Global.save_inference_dir Set the address where the converted model will be saved.
|
||||
|
||||
python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/
|
||||
python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/
|
||||
```
|
||||
|
||||
After the conversion is successful, there are two files in the directory:
|
||||
|
@ -158,10 +158,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di
|
|||
<a name="DB_DETECTION"></a>
|
||||
### 2. DB TEXT DETECTION MODEL INFERENCE
|
||||
|
||||
First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert:
|
||||
First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)), you can use the following command to convert:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db
|
||||
```
|
||||
|
||||
DB text detection model inference, you can execute the following command:
|
||||
|
@ -179,10 +179,10 @@ The visualized text detection results are saved to the `./inference_results` fol
|
|||
<a name="EAST_DETECTION"></a>
|
||||
### 3. EAST TEXT DETECTION MODEL INFERENCE
|
||||
|
||||
First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert:
|
||||
First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east
|
||||
```
|
||||
**For EAST text detection model inference, you need to set the parameter ``--det_algorithm="EAST"``**, run the following command:
|
||||
|
||||
|
@ -200,10 +200,10 @@ The visualized text detection results are saved to the `./inference_results` fol
|
|||
<a name="SAST_DETECTION"></a>
|
||||
### 4. SAST TEXT DETECTION MODEL INFERENCE
|
||||
#### (1). Quadrangle text detection model (ICDAR2015)
|
||||
First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert:
|
||||
First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15
|
||||
```
|
||||
|
||||
**For SAST quadrangle text detection model inference, you need to set the parameter `--det_algorithm="SAST"`**, run the following command:
|
||||
|
@ -217,10 +217,10 @@ The visualized text detection results are saved to the `./inference_results` fol
|
|||
![](../imgs_results/det_res_img_10_sast.jpg)
|
||||
|
||||
#### (2). Curved text detection model (Total-Text)
|
||||
First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert:
|
||||
First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link (coming soon)](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt
|
||||
python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt
|
||||
```
|
||||
|
||||
**For SAST curved text detection model inference, you need to set the parameter `--det_algorithm="SAST"` and `--det_sast_polygon=True`**, run the following command:
|
||||
|
@ -262,10 +262,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695]
|
|||
|
||||
Taking CRNN as an example, we introduce the recognition model inference based on CTC loss. Rosetta and Star-Net are used in a similar way, No need to set the recognition algorithm parameter rec_algorithm.
|
||||
|
||||
First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](link)). It can be converted as follow:
|
||||
First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)). It can be converted as follow:
|
||||
|
||||
```
|
||||
python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
|
||||
python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
|
||||
```
|
||||
|
||||
For CRNN text recognition model inference, execute the following commands:
|
||||
|
@ -335,7 +335,7 @@ The following will introduce the angle classification model inference.
|
|||
For angle classification model inference, you can execute the following commands:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
|
||||
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
|
||||
```
|
||||
|
||||
![](../imgs_words_en/word_10.png)
|
||||
|
@ -343,8 +343,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
|
|||
After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
|
||||
|
||||
```
|
||||
infer_img: doc/imgs_words_en/word_10.png
|
||||
result: ('0', 0.9999995)
|
||||
Predicts of ./doc/imgs_words_en/word_10.png:['0', 0.9999995]
|
||||
```
|
||||
|
||||
<a name="CONCATENATION"></a>
|
||||
|
|
|
@ -9,13 +9,13 @@ Please refer to [quick installation](./installation_en.md) to configure the Padd
|
|||
|
||||
## 2.inference models
|
||||
|
||||
The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v1.1 series model list](../doc_ch/models_list.md)
|
||||
The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v2.0 series model list](../doc_ch/models_list.md)
|
||||
|
||||
|
||||
| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model |
|
||||
| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model |
|
||||
| ------------ | --------------- | ----------------|---- | ---------- | -------- |
|
||||
| Ultra-lightweight Chinese OCR model(xM) | ch_ppocr_mobile_v1.1_xx |Mobile-side/Server-side|[inference model](link) / [pretrained model](link)|[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) |
|
||||
| Universal Chinese OCR model(xM) |ch_ppocr_server_v1.1_xx|Server-side |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) |
|
||||
| Ultra-lightweight Chinese OCR model(8.1M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| Universal Chinese OCR model(155.1M) | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
|
||||
|
||||
|
||||
* If `wget` is not installed in the windows environment, you can copy the link to the browser to download when downloading the model, then uncompress it and place it in the corresponding directory.
|
||||
|
||||
|
@ -37,28 +37,29 @@ Take the ultra-lightweight model as an example:
|
|||
```
|
||||
mkdir inference && cd inference
|
||||
# Download the detection model of the ultra-lightweight Chinese OCR model and uncompress it
|
||||
wget link && tar xf ch_ppocr_mobile_v1.1_det_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
|
||||
# Download the recognition model of the ultra-lightweight Chinese OCR model and uncompress it
|
||||
wget link && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar
|
||||
# Download the direction classifier model of the ultra-lightweight Chinese OCR model and uncompress it
|
||||
wget link && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
|
||||
# Download the angle classifier model of the ultra-lightweight Chinese OCR model and uncompress it
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||
cd ..
|
||||
```
|
||||
|
||||
After decompression, the file structure should be as follows:
|
||||
|
||||
```
|
||||
|-inference
|
||||
|-ch_ppocr_mobile_v1.1_det_infer
|
||||
|- model
|
||||
|- params
|
||||
|-ch_ppocr_mobile_v1.1_rec_infer
|
||||
|- model
|
||||
|- params
|
||||
|-ch_ppocr_mobile_v1.1_cls_infer
|
||||
|- model
|
||||
|- params
|
||||
...
|
||||
├── ch_ppocr_mobile_v2.0_cls_infer
|
||||
│ ├── inference.pdiparams
|
||||
│ ├── inference.pdiparams.info
|
||||
│ └── inference.pdmodel
|
||||
├── ch_ppocr_mobile_v2.0_det_infer
|
||||
│ ├── inference.pdiparams
|
||||
│ ├── inference.pdiparams.info
|
||||
│ └── inference.pdmodel
|
||||
├── ch_ppocr_mobile_v2.0_rec_infer
|
||||
├── inference.pdiparams
|
||||
├── inference.pdiparams.info
|
||||
└── inference.pdmodel
|
||||
```
|
||||
|
||||
## 3. Single image or image set prediction
|
||||
|
@ -70,13 +71,13 @@ After decompression, the file structure should be as follows:
|
|||
```bash
|
||||
|
||||
# Predict a single image specified by image_dir
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
|
||||
# Predict imageset specified by image_dir
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
|
||||
# If you want to use the CPU for prediction, you need to set the use_gpu parameter to False
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False
|
||||
```
|
||||
|
||||
- Universal Chinese OCR model
|
||||
|
@ -85,7 +86,7 @@ Please follow the above steps to download the corresponding models and update th
|
|||
|
||||
```
|
||||
# Predict a single image specified by image_dir
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True
|
||||
```
|
||||
|
||||
* Note
|
||||
|
|
|
@ -67,6 +67,7 @@ def build_dataloader(config, mode, device, logger):
|
|||
drop_last = loader_config['drop_last']
|
||||
num_workers = loader_config['num_workers']
|
||||
|
||||
use_shared_memory = False
|
||||
if mode == "Train":
|
||||
#Distribute data to multiple cards
|
||||
batch_sampler = DistributedBatchSampler(
|
||||
|
@ -74,6 +75,7 @@ def build_dataloader(config, mode, device, logger):
|
|||
batch_size=batch_size,
|
||||
shuffle=False,
|
||||
drop_last=drop_last)
|
||||
use_shared_memory = True
|
||||
else:
|
||||
#Distribute data to single card
|
||||
batch_sampler = BatchSampler(
|
||||
|
@ -87,6 +89,7 @@ def build_dataloader(config, mode, device, logger):
|
|||
batch_sampler=batch_sampler,
|
||||
places=device,
|
||||
num_workers=num_workers,
|
||||
return_list=True)
|
||||
return_list=True,
|
||||
use_shared_memory=use_shared_memory)
|
||||
|
||||
return data_loader
|
||||
|
|
|
@ -28,7 +28,7 @@ from ppocr.modeling.architectures import build_model
|
|||
from ppocr.postprocess import build_post_process
|
||||
from ppocr.utils.save_load import init_model
|
||||
from ppocr.utils.logging import get_logger
|
||||
from tools.program import load_config, merge_config,ArgsParser
|
||||
from tools.program import load_config, merge_config, ArgsParser
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -36,7 +36,6 @@ def main():
|
|||
config = load_config(FLAGS.config)
|
||||
merge_config(FLAGS.opt)
|
||||
logger = get_logger()
|
||||
print(config)
|
||||
# build post process
|
||||
|
||||
post_process_class = build_post_process(config['PostProcess'],
|
||||
|
|
|
@ -113,7 +113,6 @@ def merge_config(config):
|
|||
global_config.keys(), sub_keys[0])
|
||||
cur = global_config[sub_keys[0]]
|
||||
for idx, sub_key in enumerate(sub_keys[1:]):
|
||||
assert (sub_key in cur)
|
||||
if idx == len(sub_keys) - 2:
|
||||
cur[sub_key] = value
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue