PaddleOCR/configs/e2e/e2e_r50_vd_pg.yml

113 lines
3.2 KiB
YAML
Raw Normal View History

2021-03-08 14:15:47 +08:00
Global:
use_gpu: True
2021-03-08 14:15:47 +08:00
epoch_num: 600
log_smooth_window: 20
print_batch_step: 10
2021-03-08 14:15:47 +08:00
save_model_dir: ./output/pg_r50_vd_tt/
save_epoch_step: 10
# evaluation is run every 0 iterationss after the 1000th iteration
2021-03-08 14:15:47 +08:00
eval_batch_step: [ 0, 1000 ]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights: True
2021-03-08 14:15:47 +08:00
cal_metric_during_train: False
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
save_res_path: ./output/pg_r50_vd_tt/predicts_pg.txt
Architecture:
model_type: e2e
algorithm: PGNet
2021-03-08 14:15:47 +08:00
Transform:
Backbone:
name: ResNet
layers: 50
Neck:
name: PGFPN
model_name: large
Head:
name: PGHead
model_name: large
Loss:
name: PGLoss
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
2021-03-08 14:15:47 +08:00
lr:
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0
2021-03-08 14:15:47 +08:00
PostProcess:
name: PGPostProcess
score_thresh: 0.8
cover_thresh: 0.1
nms_thresh: 0.2
Metric:
name: E2EMetric
Lexicon_Table: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ]
2021-03-08 16:09:11 +08:00
main_indicator: f_score_e2e
2021-03-08 14:15:47 +08:00
Train:
dataset:
name: PGDateSet
label_file_list: [./train_data/total_text/train/]
ratio_list: [1.0]
data_format: icdar
2021-03-08 14:15:47 +08:00
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- PGProcessTrain:
batch_size: 14
min_crop_size: 24
min_text_size: 4
max_text_size: 512
Lexicon_Table: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ]
2021-03-08 14:15:47 +08:00
- KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader:
shuffle: True
drop_last: True
batch_size_per_card: 14
num_workers: 16
2021-03-08 14:15:47 +08:00
Eval:
dataset:
name: PGDataSet
2021-03-08 14:15:47 +08:00
data_dir: ./train_data/
label_file_list: [./train_data/total_text/test/]
2021-03-08 14:15:47 +08:00
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- E2ELabelEncode:
Lexicon_Table: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ]
max_len: 50
2021-03-08 14:15:47 +08:00
- E2EResizeForTest:
valid_set: totaltext
max_side_len: 768
- NormalizeImage:
scale: 1./255.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags' ]
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
num_workers: 2