PaddleOCR/configs/e2e/e2e_r50_vd_pg.yml

117 lines
3.1 KiB
YAML
Raw Normal View History

2021-03-08 14:15:47 +08:00
Global:
use_gpu: True
2021-03-08 14:15:47 +08:00
epoch_num: 600
log_smooth_window: 20
print_batch_step: 10
2021-03-15 13:58:53 +08:00
save_model_dir: ./output/pgnet_r50_vd_totaltext/
save_epoch_step: 10
# evaluation is run every 0 iterationss after the 1000th iteration
2021-03-08 14:15:47 +08:00
eval_batch_step: [ 0, 1000 ]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
2021-04-09 16:04:02 +08:00
load_static_weights: False
2021-03-08 14:15:47 +08:00
cal_metric_during_train: False
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
2021-03-19 11:59:35 +08:00
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
2021-03-15 13:58:53 +08:00
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
2021-03-19 11:59:35 +08:00
character_dict_path: ppocr/utils/ic15_dict.txt
2021-03-15 13:58:53 +08:00
character_type: EN
2021-03-19 11:59:35 +08:00
max_text_length: 50 # the max length in seq
max_text_nums: 30 # the max seq nums in a pic
tcl_len: 64
2021-03-08 14:15:47 +08:00
Architecture:
model_type: e2e
algorithm: PGNet
2021-03-08 14:15:47 +08:00
Transform:
Backbone:
name: ResNet
layers: 50
Neck:
name: PGFPN
Head:
name: PGHead
Loss:
name: PGLoss
2021-03-19 11:59:35 +08:00
tcl_bs: 64
max_text_length: 50 # the same as Global: max_text_length
max_text_nums: 30 # the same as Globalmax_text_nums
pad_num: 36 # the length of dict for pad
2021-03-08 14:15:47 +08:00
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
2021-03-08 14:15:47 +08:00
lr:
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0
2021-03-08 14:15:47 +08:00
PostProcess:
name: PGPostProcess
2021-03-19 11:59:35 +08:00
score_thresh: 0.5
2021-04-11 16:40:46 +08:00
mode: fast # fast or slow two ways
2021-03-08 14:15:47 +08:00
Metric:
name: E2EMetric
2021-04-10 14:44:32 +08:00
gt_mat_dir: # the dir of gt_mat
2021-03-19 11:59:35 +08:00
character_dict_path: ppocr/utils/ic15_dict.txt
2021-03-08 16:09:11 +08:00
main_indicator: f_score_e2e
2021-03-08 14:15:47 +08:00
Train:
dataset:
2021-03-15 13:58:53 +08:00
name: PGDataSet
2021-04-13 13:50:44 +08:00
data_dir: ./train_data/
2021-03-15 13:58:53 +08:00
label_file_list: [.././train_data/total_text/train/]
ratio_list: [1.0]
2021-03-15 13:58:53 +08:00
data_format: icdar #two data format: icdar/textnet
2021-03-08 14:15:47 +08:00
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
2021-04-13 13:50:44 +08:00
- E2ELabelEncode:
2021-03-08 14:15:47 +08:00
- PGProcessTrain:
2021-03-15 13:58:53 +08:00
batch_size: 14 # same as loader: batch_size_per_card
2021-03-08 14:15:47 +08:00
min_crop_size: 24
min_text_size: 4
max_text_size: 512
- KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader:
shuffle: True
drop_last: True
batch_size_per_card: 14
num_workers: 16
2021-03-08 14:15:47 +08:00
Eval:
dataset:
name: PGDataSet
2021-03-08 14:15:47 +08:00
data_dir: ./train_data/
label_file_list: [./train_data/total_text/test/]
2021-03-08 14:15:47 +08:00
transforms:
- DecodeImage: # load image
2021-04-09 16:04:02 +08:00
img_mode: RGB
2021-03-08 14:15:47 +08:00
channel_first: False
- E2EResizeForTest:
max_side_len: 768
- NormalizeImage:
scale: 1./255.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
2021-04-13 13:50:44 +08:00
keep_keys: [ 'image', 'shape', 'img_id']
2021-03-08 14:15:47 +08:00
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
num_workers: 2