From b5f9a7ec5b09b598d7b35c6aafdc2a4b0cd5e857 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Fri, 11 Dec 2020 18:48:23 +0800 Subject: [PATCH 1/3] update link in doc --- configs/det/bak/det_r50_vd_db.yml | 130 ------------------ .../bak/rec_mv3_none_bilstm_ctc_simple.yml | 106 -------------- .../rec/bak/rec_r34_vd_none_bilstm_ctc.yml | 104 -------------- configs/rec/bak/rec_r34_vd_none_none_ctc.yml | 103 -------------- ...yaml => rec_chinese_common_train_v2.0.yml} | 0 ...0.yaml => rec_chinese_lite_train_v2.0.yml} | 0 doc/doc_ch/inference.md | 43 +++--- doc/doc_ch/quickstart.md | 43 +++--- doc/doc_en/inference_en.md | 45 +++--- doc/doc_en/quickstart_en.md | 49 +++---- tools/export_model.py | 3 +- tools/program.py | 1 - 12 files changed, 91 insertions(+), 536 deletions(-) delete mode 100644 configs/det/bak/det_r50_vd_db.yml delete mode 100644 configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml delete mode 100644 configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml delete mode 100644 configs/rec/bak/rec_r34_vd_none_none_ctc.yml rename configs/rec/ch_ppocr_v2.0/{rec_chinese_common_train_v2.0.yaml => rec_chinese_common_train_v2.0.yml} (100%) rename configs/rec/ch_ppocr_v2.0/{rec_chinese_lite_train_v2.0.yaml => rec_chinese_lite_train_v2.0.yml} (100%) diff --git a/configs/det/bak/det_r50_vd_db.yml b/configs/det/bak/det_r50_vd_db.yml deleted file mode 100644 index a07273b4..00000000 --- a/configs/det/bak/det_r50_vd_db.yml +++ /dev/null @@ -1,130 +0,0 @@ -Global: - use_gpu: true - epoch_num: 1200 - log_smooth_window: 20 - print_batch_step: 2 - save_model_dir: ./output/det_r50_vd/ - save_epoch_step: 1200 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 8 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: False - pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/ - checkpoints: - save_inference_dir: - use_visualdl: True - infer_img: doc/imgs_en/img_10.jpg - save_res_path: ./output/det_db/predicts_db.txt - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0 - -Architecture: - type: det - algorithm: DB - Transform: - Backbone: - name: ResNet - layers: 50 - Neck: - name: FPN - out_channels: 256 - Head: - name: DBHead - k: 50 - -Loss: - name: DBLoss - balance_loss: true - main_loss_type: DiceLoss - alpha: 5 - beta: 10 - ohem_ratio: 3 - -PostProcess: - name: DBPostProcess - thresh: 0.3 - box_thresh: 0.6 - max_candidates: 1000 - unclip_ratio: 1.5 - -Metric: - name: DetMetric - main_indicator: hmean - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./detection/ - file_list: - - ./detection/train_icdar2015_label.txt # dataset1 - ratio_list: [1.0] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - IaaAugment: - augmenter_args: - - { 'type': Fliplr, 'args': { 'p': 0.5 } } - - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } } - - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } } - - EastRandomCropData: - size: [ 640,640 ] - max_tries: 50 - keep_ratio: true - - MakeBorderMap: - shrink_ratio: 0.4 - thresh_min: 0.3 - thresh_max: 0.7 - - MakeShrinkMap: - shrink_ratio: 0.4 - min_text_size: 8 - - NormalizeImage: - scale: 1./255. - mean: [ 0.485, 0.456, 0.406 ] - std: [ 0.229, 0.224, 0.225 ] - order: 'hwc' - - ToCHWImage: - - keepKeys: - keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader will return list in this order - loader: - shuffle: True - drop_last: False - batch_size: 16 - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./detection/ - file_list: - - ./detection/test_icdar2015_label.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - DetResizeForTest: - image_shape: [736,1280] - - NormalizeImage: - scale: 1./255. - mean: [ 0.485, 0.456, 0.406 ] - std: [ 0.229, 0.224, 0.225 ] - order: 'hwc' - - ToCHWImage: - - keepKeys: - keep_keys: ['image','shape','polys','ignore_tags'] - loader: - shuffle: False - drop_last: False - batch_size: 1 # must be 1 - num_workers: 8 \ No newline at end of file diff --git a/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml b/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml deleted file mode 100644 index 1be7512c..00000000 --- a/configs/rec/bak/rec_mv3_none_bilstm_ctc_simple.yml +++ /dev/null @@ -1,106 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/mv3_none_bilstm_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: MobileNetV3 - scale: 0.5 - model_name: small - small_stride: [ 1, 2, 2, 2 ] - Neck: - name: SequenceEncoder - encoder_type: fc - hidden_size: 96 - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml deleted file mode 100644 index 36e3d1c8..00000000 --- a/configs/rec/bak/rec_r34_vd_none_bilstm_ctc.yml +++ /dev/null @@ -1,104 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/res34_none_bilstm_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: ResNet - layers: 34 - Neck: - name: SequenceEncoder - encoder_type: fc - hidden_size: 96 - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/bak/rec_r34_vd_none_none_ctc.yml b/configs/rec/bak/rec_r34_vd_none_none_ctc.yml deleted file mode 100644 index 641e855b..00000000 --- a/configs/rec/bak/rec_r34_vd_none_none_ctc.yml +++ /dev/null @@ -1,103 +0,0 @@ -Global: - use_gpu: false - epoch_num: 500 - log_smooth_window: 20 - print_batch_step: 10 - save_model_dir: ./output/rec/res34_none_none_ctc/ - save_epoch_step: 500 - # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 127 - # if pretrained_model is saved in static mode, load_static_weights must set to True - load_static_weights: True - cal_metric_during_train: True - pretrained_model: - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_words/ch/word_1.jpg - # for data or label process - max_text_length: 80 - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: 'ch' - use_space_char: False - infer_mode: False - use_tps: False - - -Optimizer: - name: Adam - beta1: 0.9 - beta2: 0.999 - learning_rate: - lr: 0.001 - regularizer: - name: 'L2' - factor: 0.00001 - -Architecture: - type: rec - algorithm: CRNN - Transform: - Backbone: - name: ResNet - layers: 34 - Neck: - name: SequenceEncoder - encoder_type: reshape - Head: - name: CTC - fc_decay: 0.00001 - -Loss: - name: CTCLoss - -PostProcess: - name: CTCLabelDecode - -Metric: - name: RecMetric - main_indicator: acc - -TRAIN: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/train.txt # dataset1 - ratio_list: [ 0.4,0.6 ] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecAug: - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - batch_size: 256 - shuffle: True - drop_last: True - num_workers: 8 - -EVAL: - dataset: - name: SimpleDataSet - data_dir: ./rec - file_list: - - ./rec/val.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - CTCLabelEncode: # Class handling label - - RecResizeImg: - image_shape: [ 3,32,320 ] - - keepKeys: - keep_keys: [ 'image','label','length' ] # dataloader will return list in this order - loader: - shuffle: False - drop_last: False - batch_size: 256 - num_workers: 8 diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml similarity index 100% rename from configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml rename to configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml similarity index 100% rename from configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml rename to configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index dfd84ccc..8f4bea07 100644 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -41,7 +41,7 @@ inference 模型(`paddle.jit.save`保存的模型) 下载超轻量级中文检测模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ ``` 上述模型是以MobileNetV3为backbone训练的DB算法,将训练好的模型转换成inference模型只需要运行如下命令: ``` @@ -51,9 +51,9 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ +python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ ``` -转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.checkpoints`参数,其指向训练中保存的模型参数文件。 +转inference模型时,使用的配置文件和训练时使用的配置文件相同。另外,还需要设置配置文件中的`Global.pretrained_model`参数,其指向训练中保存的模型参数文件。 转换成功后,在模型保存目录下有三个文件: ``` inference/det_db/ @@ -67,7 +67,7 @@ inference/det_db/ 下载超轻量中文识别模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ ``` 识别模型转inference模型与检测的方式相同,如下: @@ -78,7 +78,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ ``` **注意:**如果您是在自己的数据集上训练的模型,并且调整了中文字符的字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。 @@ -96,7 +96,7 @@ python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_trai 下载方向分类模型: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ ``` 方向分类模型转inference模型与检测的方式相同,如下: @@ -107,7 +107,7 @@ wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar # Global.load_static_weights 参数需要设置为 False。 # Global.save_inference_dir参数设置转换的模型将保存的地址。 -python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ +python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ ``` 转换成功后,在目录下有三个文件: @@ -152,10 +152,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di ### 2. DB文本检测模型推理 -首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将DB文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db +python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db ``` DB文本检测模型推理,可以执行如下命令: @@ -173,10 +173,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img_10.jpg" --det_ ### 3. EAST文本检测模型推理 -首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将EAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例( [模型下载地址 (coming soon)](link) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east +python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east ``` **EAST文本检测模型推理,需要设置参数`--det_algorithm="EAST"`**,可以执行如下命令: @@ -194,9 +194,9 @@ python3 tools/infer/predict_det.py --det_algorithm="EAST" --image_dir="./doc/img ### 4. SAST文本检测模型推理 #### (1). 四边形文本检测模型(ICDAR2015) -首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 ``` **SAST文本检测模型推理,需要设置参数`--det_algorithm="SAST"`**,可以执行如下命令: @@ -208,10 +208,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ![](../imgs_results/det_res_img_10_sast.jpg) #### (2). 弯曲文本检测模型(Total-Text) -首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt ``` @@ -254,10 +254,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] 我们以 CRNN 为例,介绍基于CTC损失的识别模型推理。 Rosetta 使用方式类似,不用设置识别算法参数rec_algorithm。 首先将 Rosetta 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练 -的模型为例([模型下载地址](link)),可以使用如下命令进行转换: +的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) ),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn +python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn ``` @@ -313,9 +313,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" - 执行命令后,上图的预测结果为: ``` text -2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] -2020-09-19 16:15:05,077-INFO: word : 바탕으로 -2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535 +2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] +2020-09-19 16:15:05,077-INFO: word : 바탕으로 +2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535 ``` @@ -337,8 +337,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" 执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下: ``` -infer_img: doc/imgs_words/ch/word_1.jpg - result: ('0', 0.9998784) +Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982] ``` diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index b1025885..a2ab2346 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -9,12 +9,12 @@ ## 2.inference模型下载 -* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v1.1 系列模型下载](../doc_ch/models_list.md) +* 移动端和服务器端的检测与识别模型如下,更多模型下载(包括多语言),可以参考[PP-OCR v2.0 系列模型下载](../doc_ch/models_list.md) | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| 中英文超轻量OCR模型(xM) | |移动端&服务器端|[推理模型](link) / [预训练模型](link)|[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) | -| 中英文通用OCR模型(xM) | |服务器端 |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}) |[推理模型]({}) / [预训练模型]({}}) | +| 中英文超轻量OCR模型(8.6M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| 中英文通用OCR模型(146.4M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | * windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下 @@ -37,28 +37,29 @@ cd .. ``` mkdir inference && cd inference # 下载超轻量级中文OCR模型的检测模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # 下载超轻量级中文OCR模型的识别模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar # 下载超轻量级中文OCR模型的文本方向分类器模型并解压 -wget {} && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar cd .. ``` 解压完毕后应有如下文件结构: ``` -|-inference - |-ch_ppocr_mobile_v1.1_det_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_rec_infer - |- model - |- params - |-ch_ppocr_mobile-v1.1_cls_infer - |- model - |- params - ... +├── ch_ppocr_mobile_v2.0_cls_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_det_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_rec_infer + ├── inference.pdiparams + ├── inference.pdiparams.info + └── inference.pdmodel ``` ## 3.单张图像或者图像集合预测 @@ -68,13 +69,13 @@ cd .. ```bash # 预测image_dir指定的单张图像 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # 预测image_dir指定的图像集合 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # 如果想使用CPU进行预测,需设置use_gpu参数为False -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False ``` - 通用中文OCR模型 @@ -83,7 +84,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode ```bash # 预测image_dir指定的单张图像 -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True ``` * 注意: diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index ac1b634d..ee567451 100644 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -43,21 +43,21 @@ Next, we first introduce how to convert a trained model into an inference model, Download the lightweight Chinese detection model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_det_train.tar -C ./ch_lite/ ``` The above model is a DB algorithm trained with MobileNetV3 as the backbone. To convert the trained model into an inference model, just run the following command: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ +python3 tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db/ ``` -When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.checkpoints` parameter in the configuration file. +When converting to an inference model, the configuration file used is the same as the configuration file used during training. In addition, you also need to set the `Global.pretrained_model` parameter in the configuration file. After the conversion is successful, there are three files in the model save directory: ``` inference/det_db/ @@ -71,18 +71,18 @@ inference/det_db/ Download the lightweight Chinese recognition model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_rec_train.tar -C ./ch_lite/ ``` The recognition model is converted to the inference model in the same way as the detection, as follows: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ +python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_rec_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn/ ``` If you have a model trained on your own dataset with a different dictionary file, please make sure that you modify the `character_dict_path` in the configuration file to your dictionary file path. @@ -100,18 +100,18 @@ inference/det_db/ Download the angle classification model: ``` -wget -P ./ch_lite/ {link} && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ +wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/ ``` The angle classification model is converted to the inference model in the same way as the detection, as follows: ``` # -c Set the training algorithm yml configuration file # -o Set optional parameters -# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. +# Global.pretrained_model parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams. # Global.load_static_weights needs to be set to False # Global.save_inference_dir Set the address where the converted model will be saved. -python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ +python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/cls/ ``` After the conversion is successful, there are two files in the directory: @@ -158,10 +158,10 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/2.jpg" --det_model_di ### 2. DB TEXT DETECTION MODEL INFERENCE -First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the DB text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.checkpoints=./det_r50_vd_db_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db +python3 tools/export_model.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=./det_r50_vd_db_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_db ``` DB text detection model inference, you can execute the following command: @@ -179,10 +179,10 @@ The visualized text detection results are saved to the `./inference_results` fol ### 3. EAST TEXT DETECTION MODEL INFERENCE -First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the EAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.checkpoints=./det_r50_vd_east_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east +python3 tools/export_model.py -c configs/det/det_r50_vd_east.yml -o Global.pretrained_model=./det_r50_vd_east_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_east ``` **For EAST text detection model inference, you need to set the parameter ``--det_algorithm="EAST"``**, run the following command: @@ -200,10 +200,10 @@ The visualized text detection results are saved to the `./inference_results` fol ### 4. SAST TEXT DETECTION MODEL INFERENCE #### (1). Quadrangle text detection model (ICDAR2015) -First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link](link)), you can use the following command to convert: +First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the ICDAR2015 English dataset as an example ([model download link (coming soon)](link)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.checkpoints=./det_r50_vd_sast_icdar15_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_icdar15.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_ic15 ``` **For SAST quadrangle text detection model inference, you need to set the parameter `--det_algorithm="SAST"`**, run the following command: @@ -217,10 +217,10 @@ The visualized text detection results are saved to the `./inference_results` fol ![](../imgs_results/det_res_img_10_sast.jpg) #### (2). Curved text detection model (Total-Text) -First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert: +First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link (coming soon)](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert: ``` -python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.checkpoints=./det_r50_vd_sast_totaltext_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt +python3 tools/export_model.py -c configs/det/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/det_sast_tt ``` **For SAST curved text detection model inference, you need to set the parameter `--det_algorithm="SAST"` and `--det_sast_polygon=True`**, run the following command: @@ -262,10 +262,10 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] Taking CRNN as an example, we introduce the recognition model inference based on CTC loss. Rosetta and Star-Net are used in a similar way, No need to set the recognition algorithm parameter rec_algorithm. -First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](link)). It can be converted as follow: +First, convert the model saved in the CRNN text recognition training process into an inference model. Taking the model based on Resnet34_vd backbone network, using MJSynth and SynthText (two English text recognition synthetic datasets) for training, as an example ([model download address](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)). It can be converted as follow: ``` -python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.checkpoints=./rec_r34_vd_none_bilstm_ctc_v2.0.train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn +python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn ``` For CRNN text recognition model inference, execute the following commands: @@ -335,7 +335,7 @@ The following will introduce the angle classification model inference. For angle classification model inference, you can execute the following commands: ``` -python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/" +python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/" ``` ![](../imgs_words_en/word_10.png) @@ -343,8 +343,7 @@ python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen. ``` -infer_img: doc/imgs_words_en/word_10.png - result: ('0', 0.9999995) + Predicts of ./doc/imgs_words_en/word_10.png:['0', 0.9999995] ``` diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index 6d4ce95d..05566138 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -9,13 +9,13 @@ Please refer to [quick installation](./installation_en.md) to configure the Padd ## 2.inference models -The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v1.1 series model list](../doc_ch/models_list.md) +The detection and recognition models on the mobile and server sides are as follows. For more models (including multiple languages), please refer to [PP-OCR v2.0 series model list](../doc_ch/models_list.md) - -| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model | +| Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| Ultra-lightweight Chinese OCR model(xM) | ch_ppocr_mobile_v1.1_xx |Mobile-side/Server-side|[inference model](link) / [pretrained model](link)|[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) | -| Universal Chinese OCR model(xM) |ch_ppocr_server_v1.1_xx|Server-side |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) |[inference model](link) / [pretrained model](link) | +| Ultra-lightweight Chinese OCR model(8.6M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| Universal Chinese OCR model(146.4M) | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | + * If `wget` is not installed in the windows environment, you can copy the link to the browser to download when downloading the model, then uncompress it and place it in the corresponding directory. @@ -37,28 +37,29 @@ Take the ultra-lightweight model as an example: ``` mkdir inference && cd inference # Download the detection model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar # Download the recognition model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_rec_infer.tar -# Download the direction classifier model of the ultra-lightweight Chinese OCR model and uncompress it -wget link && tar xf ch_ppocr_mobile_v1.1_cls_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar +# Download the angle classifier model of the ultra-lightweight Chinese OCR model and uncompress it +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar cd .. ``` After decompression, the file structure should be as follows: ``` -|-inference - |-ch_ppocr_mobile_v1.1_det_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_rec_infer - |- model - |- params - |-ch_ppocr_mobile_v1.1_cls_infer - |- model - |- params - ... +├── ch_ppocr_mobile_v2.0_cls_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_det_infer +│ ├── inference.pdiparams +│ ├── inference.pdiparams.info +│ └── inference.pdmodel +├── ch_ppocr_mobile_v2.0_rec_infer + ├── inference.pdiparams + ├── inference.pdiparams.info + └── inference.pdmodel ``` ## 3. Single image or image set prediction @@ -70,13 +71,13 @@ After decompression, the file structure should be as follows: ```bash # Predict a single image specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # Predict imageset specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True # If you want to use the CPU for prediction, you need to set the use_gpu parameter to False -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_mobile_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_mobile_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True --use_gpu=False ``` - Universal Chinese OCR model @@ -85,7 +86,7 @@ Please follow the above steps to download the corresponding models and update th ``` # Predict a single image specified by image_dir -python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v1.1_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v1.1_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v1.1_cls_infer/" --use_angle_cls=True --use_space_char=True +python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_ppocr_server_v2.0_det_infer/" --rec_model_dir="./inference/ch_ppocr_server_v2.0_rec_infer/" --cls_model_dir="./inference/ch_ppocr_mobile_v2.0_cls_infer/" --use_angle_cls=True --use_space_char=True ``` * Note diff --git a/tools/export_model.py b/tools/export_model.py index 51c06178..74357d58 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -28,7 +28,7 @@ from ppocr.modeling.architectures import build_model from ppocr.postprocess import build_post_process from ppocr.utils.save_load import init_model from ppocr.utils.logging import get_logger -from tools.program import load_config, merge_config,ArgsParser +from tools.program import load_config, merge_config, ArgsParser def main(): @@ -36,7 +36,6 @@ def main(): config = load_config(FLAGS.config) merge_config(FLAGS.opt) logger = get_logger() - print(config) # build post process post_process_class = build_post_process(config['PostProcess'], diff --git a/tools/program.py b/tools/program.py index 8e84d30e..787a59d4 100755 --- a/tools/program.py +++ b/tools/program.py @@ -113,7 +113,6 @@ def merge_config(config): global_config.keys(), sub_keys[0]) cur = global_config[sub_keys[0]] for idx, sub_key in enumerate(sub_keys[1:]): - assert (sub_key in cur) if idx == len(sub_keys) - 2: cur[sub_key] = value else: From 913e11cbb8b31c870816d64b68deedbcd5d8d7b9 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Fri, 11 Dec 2020 18:54:30 +0800 Subject: [PATCH 2/3] update model size --- doc/doc_ch/quickstart.md | 4 ++-- doc/doc_en/quickstart_en.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index a2ab2346..e3d854eb 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -13,8 +13,8 @@ | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| 中英文超轻量OCR模型(8.6M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | -| 中英文通用OCR模型(146.4M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | +| 中英文超轻量OCR模型(8.1M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| 中英文通用OCR模型(155.1M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | * windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下 diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index 05566138..6b3f2db0 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -13,8 +13,8 @@ The detection and recognition models on the mobile and server sides are as follo | Model introduction | Model name | Recommended scene | Detection model | Direction Classifier | Recognition model | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| Ultra-lightweight Chinese OCR model(8.6M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | -| Universal Chinese OCR model(146.4M) | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | +| Ultra-lightweight Chinese OCR model(8.1M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | +| Universal Chinese OCR model(155.1M) | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | * If `wget` is not installed in the windows environment, you can copy the link to the browser to download when downloading the model, then uncompress it and place it in the corresponding directory. From e55e2241318fdd1b8048b008c3a790cb20cbacda Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 11 Dec 2020 21:07:43 +0800 Subject: [PATCH 3/3] set use_shared_momery as False when eval (#1394) --- ppocr/data/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py index 2f95b377..7b0faf12 100644 --- a/ppocr/data/__init__.py +++ b/ppocr/data/__init__.py @@ -67,6 +67,7 @@ def build_dataloader(config, mode, device, logger): drop_last = loader_config['drop_last'] num_workers = loader_config['num_workers'] + use_shared_memory = False if mode == "Train": #Distribute data to multiple cards batch_sampler = DistributedBatchSampler( @@ -74,6 +75,7 @@ def build_dataloader(config, mode, device, logger): batch_size=batch_size, shuffle=False, drop_last=drop_last) + use_shared_memory = True else: #Distribute data to single card batch_sampler = BatchSampler( @@ -87,6 +89,7 @@ def build_dataloader(config, mode, device, logger): batch_sampler=batch_sampler, places=device, num_workers=num_workers, - return_list=True) + return_list=True, + use_shared_memory=use_shared_memory) return data_loader