PaddleOCR/configs/rec/rec_mtb_nrtr.yml

103 lines
2.3 KiB
YAML
Raw Normal View History

2021-08-16 19:33:15 +08:00
Global:
use_gpu: True
epoch_num: 21
log_smooth_window: 20
print_batch_step: 10
2021-08-17 21:37:32 +08:00
save_model_dir: ./output/rec/nrtr/
2021-08-16 19:33:15 +08:00
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 2000]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path:
character_type: EN_symbol
max_text_length: 25
infer_mode: False
use_space_char: True
save_res_path: ./output/rec/predicts_nrtr.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.99
clip_norm: 5.0
lr:
name: Cosine
learning_rate: 0.0005
warmup_epoch: 2
regularizer:
name: 'L2'
factor: 0.
Architecture:
model_type: rec
algorithm: NRTR
in_channels: 1
Transform:
Backbone:
name: MTB
cnn_num: 2
Head:
2021-08-24 15:46:43 +08:00
name: Transformer
2021-08-16 19:33:15 +08:00
d_model: 512
num_encoder_layers: 6
beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
2021-08-16 19:33:15 +08:00
Loss:
name: NRTRLoss
smoothing: True
PostProcess:
name: NRTRLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: LMDBDataSet
2021-08-17 21:46:50 +08:00
data_dir: ./train_data/data_lmdb_release/training/
2021-08-16 19:33:15 +08:00
transforms:
- NRTRDecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
2021-08-24 15:46:43 +08:00
- NRTRRecResizeImg:
2021-08-16 19:33:15 +08:00
image_shape: [100, 32]
2021-08-24 15:46:43 +08:00
resize_type: PIL # PIL or OpenCV
2021-08-16 19:33:15 +08:00
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 512
drop_last: True
num_workers: 8
Eval:
dataset:
name: LMDBDataSet
2021-08-17 21:46:50 +08:00
data_dir: ./train_data/data_lmdb_release/evaluation/
2021-08-16 19:33:15 +08:00
transforms:
- NRTRDecodeImage: # load image
img_mode: BGR
channel_first: False
- NRTRLabelEncode: # Class handling label
2021-08-24 15:46:43 +08:00
- NRTRRecResizeImg:
2021-08-16 19:33:15 +08:00
image_shape: [100, 32]
2021-08-24 15:46:43 +08:00
resize_type: PIL # PIL or OpenCV
2021-08-16 19:33:15 +08:00
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 1
use_shared_memory: False