dygraph first commit

2020-10-13 17:13:33 +08:00 · 2020-10-13 17:13:33 +08:00 · aad3093a91
parent 10f7e5192d
commit aad3093a91
148 changed files with 5591 additions and 26826 deletions
--- a/configs/det/det_db_icdar15_reader.yml
+++ b/configs/det/det_db_icdar15_reader.yml
@ -1,22 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.db_process,DBProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
-
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.db_process,DBProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  test_image_shape: [736, 1280]
-  
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.db_process,DBProcessTest
-  infer_img:
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  test_image_shape: [736, 1280]
-  do_eval: True
--- a/configs/det/det_east_icdar15_reader.yml
+++ b/configs/det/det_east_icdar15_reader.yml
@ -1,23 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.east_process,EASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
-  background_ratio: 0.125
-  min_crop_side_ratio: 0.1
-  min_text_size: 10
-
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.east_process,EASTProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.east_process,EASTProcessTest
-  infer_img:
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  do_eval: True
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@ -1,54 +1,133 @@
 Global:
-  algorithm: DB
  use_gpu: true
  epoch_num: 1200
  log_smooth_window: 20
  print_batch_step: 2
-  save_model_dir: ./output/det_db/
-  save_epoch_step: 200
+  save_model_dir: ./output/20201010/
+  save_epoch_step: 1200
  # evaluation is run every 5000 iterations after the 4000th iteration
-  eval_batch_step: [4000, 5000]
-  train_batch_size_per_card: 16
-  test_batch_size_per_card: 16
-  image_shape: [3, 640, 640]
-  reader_yml: ./configs/det/det_db_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
-  checkpoints:
-  save_res_path: ./output/det_db/predicts_db.txt
+  eval_batch_step: 8
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
  save_inference_dir:
-  
+  use_visualdl: True
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  learning_rate:
+#    name: Cosine
+    lr: 0.001
+#    warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
 Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-
-Head:
-  function: ppocr.modeling.heads.det_db_head,DBHead
-  model_name: large
-  k: 50
-  inner_channels: 96
-  out_channels: 2
+  type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: FPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50

 Loss:
-  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  name: DBLoss
  balance_loss: true
  main_loss_type: DiceLoss
  alpha: 5
  beta: 10
  ohem_ratio: 3

-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-
 PostProcess:
-  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  name: DBPostProcess
  thresh: 0.3
-  box_thresh: 0.7
+  box_thresh: 0.6
  max_candidates: 1000
-  unclip_ratio: 2.0
+  unclip_ratio: 1.5
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+            - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+      - EastRandomCropData:
+          size: [ 640,640 ]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size: 16
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736,1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','shape','polys','ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 1 # must be 1
+    num_workers: 6
--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
@ -1,45 +0,0 @@
-Global:
-  algorithm: EAST
-  use_gpu: true
-  epoch_num: 100000
-  log_smooth_window: 20
-  print_batch_step: 5
-  save_model_dir: ./output/det_east/
-  save_epoch_step: 200
-  eval_batch_step: [5000, 5000]
-  train_batch_size_per_card: 16
-  test_batch_size_per_card: 16
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_east_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
-  checkpoints:
-  save_res_path: ./output/det_east/predicts_east.txt
-  save_inference_dir:
-  
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-
-Head:
-  function: ppocr.modeling.heads.det_east_head,EASTHead
-  model_name: small
-  
-Loss:
-  function: ppocr.modeling.losses.det_east_loss,EASTLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-
-PostProcess:
-  function: ppocr.postprocess.east_postprocess,EASTPostPocess
-  score_thresh: 0.8
-  cover_thresh: 0.1
-  nms_thresh: 0.2
-  
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
@ -1,53 +1,132 @@
 Global:
-  algorithm: DB
  use_gpu: true
  epoch_num: 1200
  log_smooth_window: 20
  print_batch_step: 2
-  save_model_dir: ./output/det_db/
-  save_epoch_step: 200
-  eval_batch_step: [5000, 5000]
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 16
-  image_shape: [3, 640, 640]
-  reader_yml: ./configs/det/det_db_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_db/predicts_db.txt
-  checkpoints:
+  save_model_dir: ./output/20201010/
+  save_epoch_step: 1200
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 8
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
  save_inference_dir:
+  use_visualdl: True
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  learning_rate:
+#    name: Cosine
+    lr: 0.001
+#    warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0

 Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
-  layers: 50
-
-Head:
-  function: ppocr.modeling.heads.det_db_head,DBHead
-  model_name: large
-  k: 50
-  inner_channels: 256
-  out_channels: 2
+  type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: FPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50

 Loss:
-  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  name: DBLoss
  balance_loss: true
  main_loss_type: DiceLoss
  alpha: 5
  beta: 10
  ohem_ratio: 3

-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-
 PostProcess:
-  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  name: DBPostProcess
  thresh: 0.3
-  box_thresh: 0.7
+  box_thresh: 0.6
  max_candidates: 1000
  unclip_ratio: 1.5
-  
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+            - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+      - EastRandomCropData:
+          size: [ 640,640 ]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size: 16
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736,1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','shape','polys','ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 1 # must be 1
+    num_workers: 6
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
@ -1,44 +0,0 @@
-Global:
-  algorithm: EAST
-  use_gpu: true
-  epoch_num: 100000
-  log_smooth_window: 20
-  print_batch_step: 5
-  save_model_dir: ./output/det_east/
-  save_epoch_step: 200
-  eval_batch_step: [5000, 5000]
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 16
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_east_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_east/predicts_east.txt
-  checkpoints:
-  save_inference_dir:
-
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
-  layers: 50
-
-Head:
-  function: ppocr.modeling.heads.det_east_head,EASTHead
-  model_name: large
-  
-Loss:
-  function: ppocr.modeling.losses.det_east_loss,EASTLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-
-PostProcess:
-  function: ppocr.postprocess.east_postprocess,EASTPostPocess
-  score_thresh: 0.8
-  cover_thresh: 0.1
-  nms_thresh: 0.2
-  
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
@ -1,50 +0,0 @@
-Global:
-  algorithm: SAST
-  use_gpu: true
-  epoch_num: 2000
-  log_smooth_window: 20
-  print_batch_step: 2
-  save_model_dir: ./output/det_sast/
-  save_epoch_step: 20
-  eval_batch_step: 5000
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 8
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_sast_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_sast/predicts_sast.txt
-  checkpoints: 
-  save_inference_dir:
-
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
-  layers: 50
-
-Head:
-  function: ppocr.modeling.heads.det_sast_head,SASTHead
-  model_name: large
-  only_fpn_up: False
-#   with_cab: False
-  with_cab: True
-
-Loss:
-  function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-
-Optimizer:
-  function: ppocr.optimizer,RMSProp
-  base_lr: 0.001
-  decay:
-    function: piecewise_decay
-    boundaries: [30000, 50000, 80000, 100000, 150000]
-    decay_rate: 0.3
-
-PostProcess:
-  function: ppocr.postprocess.sast_postprocess,SASTPostProcess
-  score_thresh: 0.5
-  sample_pts_num: 2
-  nms_thresh: 0.2
-  expand_scale: 1.0
-  shrink_ratio_of_width: 0.3
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
@ -1,50 +0,0 @@
-Global:
-  algorithm: SAST
-  use_gpu: true
-  epoch_num: 2000
-  log_smooth_window: 20
-  print_batch_step: 2
-  save_model_dir: ./output/det_sast/
-  save_epoch_step: 20
-  eval_batch_step: 5000
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 1
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_sast_totaltext_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_sast/predicts_sast.txt
-  checkpoints:
-  save_inference_dir:
-
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
-  layers: 50
-
-Head:
-  function: ppocr.modeling.heads.det_sast_head,SASTHead
-  model_name: large
-  only_fpn_up: False
-  # with_cab: False
-  with_cab: True
-
-Loss:
-  function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-
-Optimizer:
-  function: ppocr.optimizer,RMSProp
-  base_lr: 0.001
-  decay:
-    function: piecewise_decay
-    boundaries: [30000, 50000, 80000, 100000, 150000]
-    decay_rate: 0.3
-
-PostProcess:
-  function: ppocr.postprocess.sast_postprocess,SASTPostProcess
-  score_thresh: 0.5
-  sample_pts_num: 6
-  nms_thresh: 0.2
-  expand_scale: 1.2
-  shrink_ratio_of_width: 0.2
--- a/configs/det/det_sast_icdar15_reader.yml
+++ b/configs/det/det_sast_icdar15_reader.yml
@ -1,24 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/
-  label_file_path: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
-  data_ratio_list: [0.1, 0.45, 0.3, 0.15]
-  min_crop_side_ratio: 0.3
-  min_crop_size: 24
-  min_text_size: 4
-  max_text_size: 512
-
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  max_side_len: 1536
-  
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  infer_img: ./train_data/icdar2015/text_localization/ch4_test_images/img_11.jpg
-  max_side_len: 1536
--- a/configs/det/det_sast_totaltext_reader.yml
+++ b/configs/det/det_sast_totaltext_reader.yml
@ -1,24 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/
-  label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
-  data_ratio_list: [0.5, 0.5]
-  min_crop_side_ratio: 0.3
-  min_crop_size: 24
-  min_text_size: 4
-  max_text_size: 512
-
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  img_set_dir: ./train_data/
-  label_file_path: ./train_data/total_text_icdar_14pt/test_label_json.txt
-  max_side_len: 768
-  
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  infer_img: ./train_data/afs/total_text/Images/Test/img623.jpg
-  max_side_len: 768
--- a/configs/rec/rec_benchmark_reader.yml
+++ b/configs/rec/rec_benchmark_reader.yml
@ -1,12 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  num_workers: 8
-  lmdb_sets_dir: ./train_data/data_lmdb_release/training/
-  
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
-
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
--- a/configs/rec/rec_chinese_common_train.yml
+++ b/configs/rec/rec_chinese_common_train.yml
@ -1,45 +0,0 @@
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 3000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 128
-  test_batch_size_per_card: 128
-  image_shape: [3, 32, 320]
-  max_text_length: 25
-  character_type: ch
-  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
-  loss_type: ctc
-  distort: false
-  use_space_char: false
-  reader_yml: ./configs/rec/rec_chinese_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
@ -1,46 +0,0 @@
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 3000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 320]
-  max_text_length: 25
-  character_type: ch
-  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
-  loss_type: ctc
-  distort: false
-  use_space_char: false
-  reader_yml: ./configs/rec/rec_chinese_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: small
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 48
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_chinese_reader.yml
+++ b/configs/rec/rec_chinese_reader.yml
@ -1,13 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  num_workers: 8
-  img_set_dir: ./train_data
-  label_file_path: ./train_data/rec_gt_train.txt
-  
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  img_set_dir: ./train_data
-  label_file_path: ./train_data/rec_gt_test.txt
-
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
--- a/configs/rec/rec_icdar15_reader.yml
+++ b/configs/rec/rec_icdar15_reader.yml
@ -1,13 +0,0 @@
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  num_workers: 8
-  img_set_dir: ./train_data/ic15_data
-  label_file_path: ./train_data/ic15_data/rec_gt_train.txt
-  
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  img_set_dir: ./train_data/ic15_data
-  label_file_path: ./train_data/ic15_data/rec_gt_test.txt
-
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@ -1,49 +0,0 @@
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 1000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 300
-  eval_batch_step: 500
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  distort: true
-  debug: false
-  reader_yml: ./configs/rec/rec_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
-  decay:
-    function: cosine_decay
-    step_each_epoch: 20
-    total_epoch: 1000
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@ -1,43 +1,108 @@
 Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 72
+  use_gpu: false
+  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
+  save_model_dir: ./output/rec/test/
+  save_epoch_step: 500
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 127
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: #output/rec/rec_crnn/best_accuracy
  save_inference_dir:
-  infer_img:
-  
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  character_type: 'ch'
+  use_space_char: False
+  infer_mode: False
+  use_tps: False

-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss

 Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
+  name: Adam
  beta1: 0.9
  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.001
+    warmup_epoch: 4
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [ 1, 2, 2, 2 ]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: fc
+    hidden_size: 96
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/real_data.txt # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/label_val_all.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
@ -0,0 +1,106 @@
+Global:
+  use_gpu: true
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 1
+  save_model_dir: ./output/rec/test/
+  save_epoch_step: 500
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 1016
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: #output/rec/rec_crnn/best_accuracy
+  save_inference_dir:
+  use_visualdl: True
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: /home/zhoujun20/rec/lmdb/dict.txt
+  character_type: 'ch'
+  use_space_char: True
+  infer_mode: False
+  use_tps: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.0005
+    warmup_epoch: 1
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [ 1, 2, 2, 2 ]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+TRAIN:
+  dataset:
+    name: LMDBDateSet
+    file_list:
+      - /home/zhoujun20/rec/lmdb/train # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: LMDBDateSet
+    file_list:
+      - /home/zhoujun20/rec/lmdb/val
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@ -1,41 +0,0 @@
-Global:
-  algorithm: Rosetta
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_Rosetta
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights: 
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: reshape
-  
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml
@ -1,54 +0,0 @@
-Global:
-  algorithm: RARE
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_RARE
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: attention
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: small
-  
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
- 
-Head:
-  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-  Attention:
-    decoder_size: 96
-    word_vector_dim: 96
-  
-Loss:
-  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-  
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@ -1,51 +0,0 @@
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-  
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: small
-  
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@ -1,43 +1,106 @@
 Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 72
+  use_gpu: false
+  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
+  save_model_dir: ./output/rec/test/
+  save_epoch_step: 500
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 127
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: #output/rec/rec_crnn/best_accuracy
  save_inference_dir:
-  infer_img:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  character_type: 'ch'
+  use_space_char: False
+  infer_mode: False
+  use_tps: False


-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
 Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
+  name: Adam
  beta1: 0.9
  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.001
+    warmup_epoch: 4
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 200
+  Neck:
+    name: SequenceEncoder
+    encoder_type: fc
+    hidden_size: 96
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/real_data.txt # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/label_val_all.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@ -1,40 +0,0 @@
-Global:
-  algorithm: Rosetta
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_Rosetta
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-  
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: reshape
-  
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
@ -1,53 +0,0 @@
-Global:
-  algorithm: RARE
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_RARE
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: attention
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-  Attention:
-    decoder_size: 128
-    word_vector_dim: 128
-  
-Loss:
-  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-  
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@ -1,50 +0,0 @@
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r50fpn_vd_none_srn.yml
+++ b/configs/rec/rec_r50fpn_vd_none_srn.yml
@ -1,49 +0,0 @@
-Global:
-  algorithm: SRN
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_pvam_withrotate
-  save_epoch_step: 1
-  eval_batch_step: 8000
-  train_batch_size_per_card: 64
-  test_batch_size_per_card: 1
-  image_shape: [1, 64, 256]
-  max_text_length: 25
-  character_type: en
-  loss_type: srn
-  num_heads: 8
-  average_window: 0.15
-  max_average_window: 15625
-  min_average_window: 10000
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights: 
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_fpn,ResNet
-  layers: 50
- 
-Head:
-  function: ppocr.modeling.heads.rec_srn_all_head,SRNPredict
-  encoder_type: rnn
-  num_encoder_TUs: 2
-  num_decoder_TUs: 4
-  hidden_dims: 512
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_srn_loss,SRNLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0001
-  beta1: 0.9
-  beta2: 0.999
--- a/ppocr/data/init.py
+++ b/ppocr/data/init.py
@ -11,3 +11,114 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+import paddle
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import copy
+from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
+import paddle.distributed as dist
+
+from ppocr.data.imaug import transform, create_operators
+
+__all__ = ['build_dataloader', 'transform', 'create_operators']
+
+
+def build_dataset(config, global_config):
+    from ppocr.data.dataset import SimpleDataSet, LMDBDateSet
+    support_dict = ['SimpleDataSet', 'LMDBDateSet']
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'DataSet only support {}'.format(support_dict))
+
+    dataset = eval(module_name)(config, global_config)
+    return dataset
+
+
+def build_dataloader(config, device, distributed=False, global_config=None):
+    from ppocr.data.dataset import BatchBalancedDataLoader
+
+    config = copy.deepcopy(config)
+    dataset_config = config['dataset']
+
+    _dataset_list = []
+    file_list = dataset_config.pop('file_list')
+    if len(file_list) == 1:
+        ratio_list = [1.0]
+    else:
+        ratio_list = dataset_config.pop('ratio_list')
+    for file in file_list:
+        dataset_config['file_list'] = file
+        _dataset = build_dataset(dataset_config, global_config)
+        _dataset_list.append(_dataset)
+    data_loader = BatchBalancedDataLoader(_dataset_list, ratio_list,
+                                          distributed, device, config['loader'])
+    return data_loader, _dataset.info_dict
+
+
+def test_loader():
+    import time
+    from tools.program import load_config, ArgsParser
+
+    FLAGS = ArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+
+    place = paddle.CPUPlace()
+    paddle.disable_static(place)
+    import time
+
+    data_loader, _ = build_dataloader(
+        config['TRAIN'], place, global_config=config['Global'])
+    start = time.time()
+    print(len(data_loader))
+    for epoch in range(1):
+        print('epoch {} ****************'.format(epoch))
+        for i, batch in enumerate(data_loader):
+            if i > len(data_loader):
+                break
+            t = time.time() - start
+            start = time.time()
+            print('{}, batch : {} ,time {}'.format(i, len(batch[0]), t))
+
+            continue
+            import matplotlib.pyplot as plt
+
+            from matplotlib import pyplot as plt
+            import cv2
+            fig = plt.figure()
+            # # cv2.imwrite('img.jpg',batch[0].numpy()[0].transpose((1,2,0)))
+            # # cv2.imwrite('bmap.jpg',batch[1].numpy()[0])
+            # # cv2.imwrite('bmask.jpg',batch[2].numpy()[0])
+            # # cv2.imwrite('smap.jpg',batch[3].numpy()[0])
+            # # cv2.imwrite('smask.jpg',batch[4].numpy()[0])
+            plt.title('img')
+            plt.imshow(batch[0].numpy()[0].transpose((1, 2, 0)))
+            # plt.figure()
+            # plt.title('bmap')
+            # plt.imshow(batch[1].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('bmask')
+            # plt.imshow(batch[2].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smap')
+            # plt.imshow(batch[3].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smask')
+            # plt.imshow(batch[4].numpy()[0],cmap='Greys')
+            # plt.show()
+            # break
+
+
+if __name__ == '__main__':
+    test_loader()
--- a/ppocr/data/dataset.py
+++ b/ppocr/data/dataset.py
@ -0,0 +1,300 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+import os
+import lmdb
+import random
+import signal
+import paddle
+from paddle.io import Dataset, DataLoader, DistributedBatchSampler, BatchSampler
+
+from .imaug import transform, create_operators
+from ppocr.utils.logging import get_logger
+
+
+def term_mp(sig_num, frame):
+    """ kill all child processes
+    """
+    pid = os.getpid()
+    pgid = os.getpgid(os.getpid())
+    print("main proc {} exit, kill process group " "{}".format(pid, pgid))
+    os.killpg(pgid, signal.SIGKILL)
+
+
+signal.signal(signal.SIGINT, term_mp)
+signal.signal(signal.SIGTERM, term_mp)
+
+
+class ModeException(Exception):
+    """
+    ModeException
+    """
+
+    def __init__(self, message='', mode=''):
+        message += "\nOnly the following 3 modes are supported: " \
+                   "train, valid, test. Given mode is {}".format(mode)
+        super(ModeException, self).__init__(message)
+
+
+class SampleNumException(Exception):
+    """
+    SampleNumException
+    """
+
+    def __init__(self, message='', sample_num=0, batch_size=1):
+        message += "\nError: The number of the whole data ({}) " \
+                   "is smaller than the batch_size ({}), and drop_last " \
+                   "is turnning on, so nothing  will feed in program, " \
+                   "Terminated now. Please reset batch_size to a smaller " \
+                   "number or feed more data!".format(sample_num, batch_size)
+        super(SampleNumException, self).__init__(message)
+
+
+def get_file_list(file_list, data_dir, delimiter='\t'):
+    """
+    read label list from file and shuffle the list
+
+    Args:
+        params(dict):
+    """
+    if isinstance(file_list, str):
+        file_list = [file_list]
+    data_source_list = []
+    for file in file_list:
+        with open(file) as f:
+            full_lines = [line.strip() for line in f]
+            for line in full_lines:
+                try:
+                    img_path, label = line.split(delimiter)
+                except:
+                    logger = get_logger()
+                    logger.warning('label error in {}'.format(line))
+                img_path = os.path.join(data_dir, img_path)
+                data = {'img_path': img_path, 'label': label}
+                data_source_list.append(data)
+    return data_source_list
+
+
+class LMDBDateSet(Dataset):
+    def __init__(self, config, global_config):
+        super(LMDBDateSet, self).__init__()
+        self.data_list = self.load_lmdb_dataset(
+            config['file_list'], global_config['max_text_length'])
+        random.shuffle(self.data_list)
+
+        self.ops = create_operators(config['transforms'], global_config)
+
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+
+        self.info_dict = {'character': character}
+
+    def load_lmdb_dataset(self, data_dir, max_text_length):
+        self.env = lmdb.open(
+            data_dir,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False)
+        if not self.env:
+            print('cannot create lmdb from %s' % (data_dir))
+            exit(0)
+
+        filtered_index_list = []
+        with self.env.begin(write=False) as txn:
+            nSamples = int(txn.get('num-samples'.encode()))
+            self.nSamples = nSamples
+            for index in range(self.nSamples):
+                index += 1  # lmdb starts with 1
+                label_key = 'label-%09d'.encode() % index
+                label = txn.get(label_key).decode('utf-8')
+                if len(label) > max_text_length:
+                    # print(f'The length of the label is longer than max_length: length
+                    # {len(label)}, {label} in dataset {self.root}')
+                    continue
+
+                # By default, images containing characters which are not in opt.character are filtered.
+                # You can add [UNK] token to `opt.character` in utils.py instead of this filtering.
+                filtered_index_list.append(index)
+        return filtered_index_list
+
+    def print_lmdb_sets_info(self, lmdb_sets):
+        lmdb_info_strs = []
+        for dataset_idx in range(len(lmdb_sets)):
+            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
+                                   lmdb_sets[dataset_idx]['num_samples'])
+            lmdb_info_strs.append(tmp_str)
+        lmdb_info_strs = ''.join(lmdb_info_strs)
+        logger = get_logger()
+        logger.info("DataSummary:" + lmdb_info_strs)
+        return
+
+    def __getitem__(self, idx):
+        idx = self.data_list[idx]
+        with self.env.begin(write=False) as txn:
+            label_key = 'label-%09d'.encode() % idx
+            label = txn.get(label_key)
+            if label is not None:
+                label = label.decode('utf-8')
+                img_key = 'image-%09d'.encode() % idx
+                imgbuf = txn.get(img_key)
+                data = {'image': imgbuf, 'label': label}
+                outs = transform(data, self.ops)
+            else:
+                outs = None
+            if outs is None:
+                return self.__getitem__(np.random.randint(self.__len__()))
+            return outs
+
+    def __len__(self):
+        return len(self.data_list)
+
+
+class SimpleDataSet(Dataset):
+    def __init__(self, config, global_config):
+        super(SimpleDataSet, self).__init__()
+        delimiter = config.get('delimiter', '\t')
+        self.data_list = get_file_list(config['file_list'], config['data_dir'],
+                                       delimiter)
+        random.shuffle(self.data_list)
+
+        self.ops = create_operators(config['transforms'], global_config)
+
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+
+        self.info_dict = {'character': character}
+
+    def __getitem__(self, idx):
+        data = copy.deepcopy(self.data_list[idx])
+        with open(data['img_path'], 'rb') as f:
+            img = f.read()
+            data['image'] = img
+        outs = transform(data, self.ops)
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+
+    def __len__(self):
+        return len(self.data_list)
+
+
+class BatchBalancedDataLoader(object):
+    def __init__(self,
+                 dataset_list: list,
+                 ratio_list: list,
+                 distributed,
+                 device,
+                 loader_args: dict):
+        """
+        对datasetlist里的dataset按照ratio_list里对应的比例组合，似的每个batch里的数据按按照比例采样的
+        :param dataset_list: 数据集列表
+        :param ratio_list: 比例列表
+        :param loader_args: dataloader的配置
+        """
+        assert sum(ratio_list) == 1 and len(dataset_list) == len(ratio_list)
+
+        self.dataset_len = 0
+        self.data_loader_list = []
+        self.dataloader_iter_list = []
+        all_batch_size = loader_args.pop('batch_size')
+        batch_size_list = list(
+            map(int, [max(1.0, all_batch_size * x) for x in ratio_list]))
+        remain_num = all_batch_size - sum(batch_size_list)
+        batch_size_list[np.argmax(ratio_list)] += remain_num
+
+        for _dataset, _batch_size in zip(dataset_list, batch_size_list):
+            if distributed:
+                batch_sampler_class = DistributedBatchSampler
+            else:
+                batch_sampler_class = BatchSampler
+            batch_sampler = batch_sampler_class(
+                dataset=_dataset,
+                batch_size=_batch_size,
+                shuffle=loader_args['shuffle'],
+                drop_last=loader_args['drop_last'], )
+            _data_loader = DataLoader(
+                dataset=_dataset,
+                batch_sampler=batch_sampler,
+                places=device,
+                num_workers=loader_args['num_workers'],
+                return_list=True, )
+            self.data_loader_list.append(_data_loader)
+            self.dataloader_iter_list.append(iter(_data_loader))
+            self.dataset_len += len(_dataset)
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return min([len(x) for x in self.data_loader_list])
+
+    def __next__(self):
+        batch = []
+        for i, data_loader_iter in enumerate(self.dataloader_iter_list):
+            try:
+                _batch_i = next(data_loader_iter)
+                batch.append(_batch_i)
+            except StopIteration:
+                self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
+                _batch_i = next(self.dataloader_iter_list[i])
+                batch.append(_batch_i)
+            except ValueError:
+                pass
+        if len(batch) > 0:
+            batch_list = []
+            batch_item_size = len(batch[0])
+            for i in range(batch_item_size):
+                cur_item_list = [batch_i[i] for batch_i in batch]
+                batch_list.append(paddle.concat(cur_item_list, axis=0))
+        else:
+            batch_list = batch[0]
+        return batch_list
+
+
+def fill_batch(batch):
+    """
+    2020.09.08： The current paddle version only supports returning data with the same length.
+                Therefore, fill in the batches with inconsistent lengths.
+                this method is currently only useful for text detection
+    """
+    keys = list(range(len(batch[0])))
+    v_max_len_dict = {}
+    for k in keys:
+        v_max_len_dict[k] = max([len(item[k]) for item in batch])
+    for item in batch:
+        length = []
+        for k in keys:
+            v = item[k]
+            length.append(len(v))
+            assert isinstance(v, np.ndarray)
+            if len(v) == v_max_len_dict[k]:
+                continue
+            try:
+                tmp_shape = [v_max_len_dict[k] - len(v)] + list(v[0].shape)
+            except:
+                a = 1
+            tmp_array = np.zeros(tmp_shape, dtype=v[0].dtype)
+            new_array = np.concatenate([v, tmp_array])
+            item[k] = new_array
+        item.append(length)
+    return batch
--- a/ppocr/data/det/data_augment.py
+++ b/ppocr/data/det/data_augment.py
@ -1,47 +0,0 @@
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import random
-import cv2
-import math
-
-import imgaug
-import imgaug.augmenters as iaa
-
-
-def AugmentData(data):
-    img = data['image']
-    shape = img.shape
-
-    aug = iaa.Sequential(
-        [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
-            (0.5, 3))]).to_deterministic()
-
-    def may_augment_annotation(aug, data, shape):
-        if aug is None:
-            return data
-
-        line_polys = []
-        for poly in data['polys']:
-            new_poly = may_augment_poly(aug, shape, poly)
-            line_polys.append(new_poly)
-        data['polys'] = np.array(line_polys)
-        return data
-
-    def may_augment_poly(aug, img_shape, poly):
-        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
-        keypoints = aug.augment_keypoints(
-            [imgaug.KeypointsOnImage(
-                keypoints, shape=img_shape)])[0].keypoints
-        poly = [(p.x, p.y) for p in keypoints]
-        return poly
-
-    img_aug = aug.augment_image(img)
-    data['image'] = img_aug
-    data = may_augment_annotation(aug, data, shape)
-    return data
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@ -1,167 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import math
-import random
-import functools
-import numpy as np
-import cv2
-import string
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import get_image_file_list
-import time
-
-
-class TrainReader(object):
-    def __init__(self, params):
-        self.num_workers = params['num_workers']
-        self.label_file_path = params['label_file_path']
-        print(self.label_file_path)
-        self.use_mul_data = False
-        if isinstance(self.label_file_path, list):
-            self.use_mul_data = True
-            self.data_ratio_list = params['data_ratio_list']
-        self.batch_size = params['train_batch_size_per_card']
-        assert 'process_function' in params,\
-            "absence process_function in Reader"
-        self.process = create_module(params['process_function'])(params)
-
-    def __call__(self, process_id):     
-        def sample_iter_reader():
-            with open(self.label_file_path, "rb") as fin:
-                label_infor_list = fin.readlines()
-            img_num = len(label_infor_list)
-            img_id_list = list(range(img_num))
-            random.shuffle(img_id_list)
-            if sys.platform == "win32" and self.num_workers != 1:
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-            for img_id in range(process_id, img_num, self.num_workers):
-                label_infor = label_infor_list[img_id_list[img_id]]
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-
-        def sample_iter_reader_mul():
-            batch_size = 1000
-            data_source_list = self.label_file_path
-            batch_size_list = list(map(int, [max(1.0, batch_size * x) for x in self.data_ratio_list]))
-            print(self.data_ratio_list, batch_size_list)
-
-            data_filename_list, data_size_list, fetch_record_list = [], [], []
-            for data_source in data_source_list:
-                image_files = open(data_source, "rb").readlines()
-                random.shuffle(image_files)
-                data_filename_list.append(image_files)
-                data_size_list.append(len(image_files))
-                fetch_record_list.append(0)
-
-            image_batch = []
-            # get a batch of img_fns and poly_fns
-            for i in range(0, len(batch_size_list)):
-                bs = batch_size_list[i]
-                ds = data_size_list[i]
-                image_names = data_filename_list[i]
-                fetch_record = fetch_record_list[i]
-                data_path = data_source_list[i]
-                for j in range(fetch_record, fetch_record + bs):
-                    index = j % ds
-                    image_batch.append(image_names[index])
-
-                if (fetch_record + bs) > ds:
-                    fetch_record_list[i] = 0
-                    random.shuffle(data_filename_list[i])
-                else:
-                    fetch_record_list[i] = fetch_record + bs
-
-            if sys.platform == "win32":
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-
-            for label_infor in image_batch:
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-
-        def batch_iter_reader():
-            batch_outs = []
-            if self.use_mul_data:
-                print("Sample date from multiple datasets!")
-                for outs in sample_iter_reader_mul():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []                
-            else:
-                for outs in sample_iter_reader():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []
-
-        return batch_iter_reader
-
-
-class EvalTestReader(object):
-    def __init__(self, params):
-        self.params = params
-        assert 'process_function' in params,\
-            "absence process_function in EvalTestReader"
-
-    def __call__(self, mode):
-        process_function = create_module(self.params['process_function'])(
-            self.params)
-        batch_size = self.params['test_batch_size_per_card']
-
-        img_list = []
-        if mode != "test":
-            img_set_dir = self.params['img_set_dir']
-            img_name_list_path = self.params['label_file_path']
-            with open(img_name_list_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    img_name = line.decode().strip("\n").split("\t")[0]
-                    img_path = os.path.join(img_set_dir, img_name)
-                    img_list.append(img_path)
-        else:
-            img_path = self.params['infer_img']
-            img_list = get_image_file_list(img_path)
-
-        def batch_iter_reader():
-            batch_outs = []
-            for img_path in img_list:
-                img = cv2.imread(img_path)
-                if img is None:
-                    logger.info("{} does not exist!".format(img_path))
-                    continue
-                elif len(list(img.shape)) == 2 or img.shape[2] == 1:
-                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                outs = process_function(img)
-                outs.append(img_path)
-                batch_outs.append(outs)
-                if len(batch_outs) == batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
-
-        return batch_iter_reader
--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@ -1,216 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-import sys
-from ppocr.utils.utility import initial_logger, check_and_read_gif
-logger = initial_logger()
-
-from .data_augment import AugmentData
-from .random_crop_data import RandomCropData
-from .make_shrink_map import MakeShrinkMap
-from .make_border_map import MakeBorderMap
-
-
-class DBProcessTrain(object):
-    """
-    DB pre-process for Train mode
-    """
-
-    def __init__(self, params):
-        self.img_set_dir = params['img_set_dir']
-        self.image_shape = params['image_shape']
-
-    def order_points_clockwise(self, pts):
-        rect = np.zeros((4, 2), dtype="float32")
-        s = pts.sum(axis=1)
-        rect[0] = pts[np.argmin(s)]
-        rect[2] = pts[np.argmax(s)]
-        diff = np.diff(pts, axis=1)
-        rect[1] = pts[np.argmin(diff)]
-        rect[3] = pts[np.argmax(diff)]
-        return rect
-
-    def make_data_dict(self, imgvalue, entry):
-        boxes = []
-        texts = []
-        ignores = []
-        for rect in entry:
-            points = rect['points']
-            transcription = rect['transcription']
-            try:
-                box = self.order_points_clockwise(
-                    np.array(points).reshape(-1, 2))
-                if cv2.contourArea(box) > 0:
-                    boxes.append(box)
-                    texts.append(transcription)
-                    ignores.append(transcription in ['*', '###'])
-            except:
-                print('load label failed!')
-        data = {
-            'image': imgvalue,
-            'shape': [imgvalue.shape[0], imgvalue.shape[1]],
-            'polys': np.array(boxes),
-            'texts': texts,
-            'ignore_tags': ignores,
-        }
-        return data
-
-    def NormalizeImage(self, data):
-        im = data['image']
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        data['image'] = im
-        return data
-
-    def FilterKeys(self, data):
-        filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
-        for key in filter_keys:
-            if key in data:
-                del data[key]
-        return data
-
-    def convert_label_infor(self, label_infor):
-        label_infor = label_infor.decode()
-        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
-        substr = label_infor.strip("\n").split("\t")
-        img_path = self.img_set_dir + substr[0]
-        label = json.loads(substr[1])
-        return img_path, label
-
-    def __call__(self, label_infor):
-        img_path, gt_label = self.convert_label_infor(label_infor)
-        imgvalue, flag = check_and_read_gif(img_path)
-        if not flag:
-            imgvalue = cv2.imread(img_path)
-        if imgvalue is None:
-            logger.info("{} does not exist!".format(img_path))
-            return None
-        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
-            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
-        data = self.make_data_dict(imgvalue, gt_label)
-        data = AugmentData(data)
-        data = RandomCropData(data, self.image_shape[1:])
-        data = MakeShrinkMap(data)
-        data = MakeBorderMap(data)
-        data = self.NormalizeImage(data)
-        data = self.FilterKeys(data)
-        return data['image'], data['shrink_map'], data['shrink_mask'], data[
-            'threshold_map'], data['threshold_mask']
-
-
-class DBProcessTest(object):
-    """
-    DB pre-process for Test mode
-    """
-
-    def __init__(self, params):
-        super(DBProcessTest, self).__init__()
-        self.resize_type = 0
-        if 'test_image_shape' in params:
-            self.image_shape = params['test_image_shape']
-            # print(self.image_shape)
-            self.resize_type = 1
-        if 'max_side_len' in params:
-            self.max_side_len = params['max_side_len']
-        else:
-            self.max_side_len = 2400
-
-    def resize_image_type0(self, im):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        max_side_len = self.max_side_len
-        h, w, _ = im.shape
-
-        resize_w = w
-        resize_h = h
-
-        # limit the max side
-        if max(resize_h, resize_w) > max_side_len:
-            if resize_h > resize_w:
-                ratio = float(max_side_len) / resize_h
-            else:
-                ratio = float(max_side_len) / resize_w
-        else:
-            ratio = 1.
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-        if resize_h % 32 == 0:
-            resize_h = resize_h
-        elif resize_h // 32 <= 1:
-            resize_h = 32
-        else:
-            resize_h = (resize_h // 32 - 1) * 32
-        if resize_w % 32 == 0:
-            resize_w = resize_w
-        elif resize_w // 32 <= 1:
-            resize_w = 32
-        else:
-            resize_w = (resize_w // 32 - 1) * 32
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        except:
-            print(im.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return im, (ratio_h, ratio_w)
-
-    def resize_image_type1(self, im):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = im.shape[:2]  # (h, w, c)
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        return im, (ratio_h, ratio_w)
-
-    def normalize(self, im):
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im[:, :, 0] -= img_mean[0]
-        im[:, :, 1] -= img_mean[1]
-        im[:, :, 2] -= img_mean[2]
-        im[:, :, 0] /= img_std[0]
-        im[:, :, 1] /= img_std[1]
-        im[:, :, 2] /= img_std[2]
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        return im
-
-    def __call__(self, im):
-        if self.resize_type == 0:
-            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
-        else:
-            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
-        im = self.normalize(im)
-        im = im[np.newaxis, :]
-        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
@ -1,537 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-import sys
-import os
-
-class EASTProcessTrain(object):
-    def __init__(self, params):
-        self.img_set_dir = params['img_set_dir']
-        self.random_scale = np.array([0.5, 1, 2.0, 3.0])
-        self.background_ratio = params['background_ratio']
-        self.min_crop_side_ratio = params['min_crop_side_ratio']
-        image_shape = params['image_shape']
-        self.input_size = image_shape[1]
-        self.min_text_size = params['min_text_size']
-
-    def preprocess(self, im):
-        input_size = self.input_size
-        im_shape = im.shape
-        im_size_min = np.min(im_shape[0:2])
-        im_size_max = np.max(im_shape[0:2])
-        im_scale = float(input_size) / float(im_size_max)
-        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im[:, :, ::-1].astype(np.float32)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        new_h, new_w, _ = im.shape
-        im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
-        im_padded[:new_h, :new_w, :] = im
-        im_padded = im_padded.transpose((2, 0, 1))
-        im_padded = im_padded[np.newaxis, :]
-        return im_padded, im_scale
-
-    def convert_label_infor(self, label_infor):
-        label_infor = label_infor.decode()
-        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
-        substr = label_infor.strip("\n").split("\t")
-        img_path = os.path.join(self.img_set_dir, substr[0])
-        label = json.loads(substr[1])
-        nBox = len(label)
-        wordBBs, txts, txt_tags = [], [], []
-        for bno in range(0, nBox):
-            wordBB = label[bno]['points']
-            txt = label[bno]['transcription']
-            wordBBs.append(wordBB)
-            txts.append(txt)
-            if txt == '###':
-                txt_tags.append(True)
-            else:
-                txt_tags.append(False)
-        wordBBs = np.array(wordBBs, dtype=np.float32)
-        txt_tags = np.array(txt_tags, dtype=np.bool)
-        return img_path, wordBBs, txt_tags, txts
-
-    def rotate_im_poly(self, im, text_polys):
-        """
-        rotate image with 90 / 180 / 270 degre
-        """
-        im_w, im_h = im.shape[1], im.shape[0]
-        dst_im = im.copy()
-        dst_polys = []
-        rand_degree_ratio = np.random.rand()
-        rand_degree_cnt = 1
-        if 0.333 < rand_degree_ratio < 0.666:
-            rand_degree_cnt = 2
-        elif rand_degree_ratio > 0.666:
-            rand_degree_cnt = 3
-        for i in range(rand_degree_cnt):
-            dst_im = np.rot90(dst_im)
-        rot_degree = -90 * rand_degree_cnt
-        rot_angle = rot_degree * math.pi / 180.0
-        n_poly = text_polys.shape[0]
-        cx, cy = 0.5 * im_w, 0.5 * im_h
-        ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
-        for i in range(n_poly):
-            wordBB = text_polys[i]
-            poly = []
-            for j in range(4):
-                sx, sy = wordBB[j][0], wordBB[j][1]
-                dx = math.cos(rot_angle) * (sx - cx)\
-                    - math.sin(rot_angle) * (sy - cy) + ncx
-                dy = math.sin(rot_angle) * (sx - cx)\
-                    + math.cos(rot_angle) * (sy - cy) + ncy
-                poly.append([dx, dy])
-            dst_polys.append(poly)
-        dst_polys = np.array(dst_polys, dtype=np.float32)
-        return dst_im, dst_polys
-
-    def polygon_area(self, poly):
-        """
-        compute area of a polygon
-        :param poly:
-        :return:
-        """
-        edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
-                (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
-                (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
-                (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
-        return np.sum(edge) / 2.
-
-    def check_and_validate_polys(self, polys, tags, img_height, img_width):
-        """
-        check so that the text poly is in the same direction,
-        and also filter some invalid polygons
-        :param polys:
-        :param tags:
-        :return:
-        """
-        h, w = img_height, img_width
-        if polys.shape[0] == 0:
-            return polys
-        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
-        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
-
-        validated_polys = []
-        validated_tags = []
-        for poly, tag in zip(polys, tags):
-            p_area = self.polygon_area(poly)
-            #invalid poly
-            if abs(p_area) < 1:
-                continue
-            if p_area > 0:
-                #'poly in wrong direction'
-                if not tag:
-                    tag = True  #reversed cases should be ignore
-                poly = poly[(0, 3, 2, 1), :]
-            validated_polys.append(poly)
-            validated_tags.append(tag)
-        return np.array(validated_polys), np.array(validated_tags)
-
-    def draw_img_polys(self, img, polys):
-        if len(img.shape) == 4:
-            img = np.squeeze(img, axis=0)
-        if img.shape[0] == 3:
-            img = img.transpose((1, 2, 0))
-            img[:, :, 2] += 123.68
-            img[:, :, 1] += 116.78
-            img[:, :, 0] += 103.94
-        cv2.imwrite("tmp.jpg", img)
-        img = cv2.imread("tmp.jpg")
-        for box in polys:
-            box = box.astype(np.int32).reshape((-1, 1, 2))
-            cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
-        import random
-        ino = random.randint(0, 100)
-        cv2.imwrite("tmp_%d.jpg" % ino, img)
-        return
-
-    def shrink_poly(self, poly, r):
-        """
-        fit a poly inside the origin poly, maybe bugs here...
-        used for generate the score map
-        :param poly: the text poly
-        :param r: r in the paper
-        :return: the shrinked poly
-        """
-        # shrink ratio
-        R = 0.3
-        # find the longer pair
-        dist0 = np.linalg.norm(poly[0] - poly[1])
-        dist1 = np.linalg.norm(poly[2] - poly[3])
-        dist2 = np.linalg.norm(poly[0] - poly[3])
-        dist3 = np.linalg.norm(poly[1] - poly[2])
-        if dist0 + dist1 > dist2 + dist3:
-            # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
-            ## p0, p1
-            theta = np.arctan2((poly[1][1] - poly[0][1]),
-                               (poly[1][0] - poly[0][0]))
-            poly[0][0] += R * r[0] * np.cos(theta)
-            poly[0][1] += R * r[0] * np.sin(theta)
-            poly[1][0] -= R * r[1] * np.cos(theta)
-            poly[1][1] -= R * r[1] * np.sin(theta)
-            ## p2, p3
-            theta = np.arctan2((poly[2][1] - poly[3][1]),
-                               (poly[2][0] - poly[3][0]))
-            poly[3][0] += R * r[3] * np.cos(theta)
-            poly[3][1] += R * r[3] * np.sin(theta)
-            poly[2][0] -= R * r[2] * np.cos(theta)
-            poly[2][1] -= R * r[2] * np.sin(theta)
-            ## p0, p3
-            theta = np.arctan2((poly[3][0] - poly[0][0]),
-                               (poly[3][1] - poly[0][1]))
-            poly[0][0] += R * r[0] * np.sin(theta)
-            poly[0][1] += R * r[0] * np.cos(theta)
-            poly[3][0] -= R * r[3] * np.sin(theta)
-            poly[3][1] -= R * r[3] * np.cos(theta)
-            ## p1, p2
-            theta = np.arctan2((poly[2][0] - poly[1][0]),
-                               (poly[2][1] - poly[1][1]))
-            poly[1][0] += R * r[1] * np.sin(theta)
-            poly[1][1] += R * r[1] * np.cos(theta)
-            poly[2][0] -= R * r[2] * np.sin(theta)
-            poly[2][1] -= R * r[2] * np.cos(theta)
-        else:
-            ## p0, p3
-            # print poly
-            theta = np.arctan2((poly[3][0] - poly[0][0]),
-                               (poly[3][1] - poly[0][1]))
-            poly[0][0] += R * r[0] * np.sin(theta)
-            poly[0][1] += R * r[0] * np.cos(theta)
-            poly[3][0] -= R * r[3] * np.sin(theta)
-            poly[3][1] -= R * r[3] * np.cos(theta)
-            ## p1, p2
-            theta = np.arctan2((poly[2][0] - poly[1][0]),
-                               (poly[2][1] - poly[1][1]))
-            poly[1][0] += R * r[1] * np.sin(theta)
-            poly[1][1] += R * r[1] * np.cos(theta)
-            poly[2][0] -= R * r[2] * np.sin(theta)
-            poly[2][1] -= R * r[2] * np.cos(theta)
-            ## p0, p1
-            theta = np.arctan2((poly[1][1] - poly[0][1]),
-                               (poly[1][0] - poly[0][0]))
-            poly[0][0] += R * r[0] * np.cos(theta)
-            poly[0][1] += R * r[0] * np.sin(theta)
-            poly[1][0] -= R * r[1] * np.cos(theta)
-            poly[1][1] -= R * r[1] * np.sin(theta)
-            ## p2, p3
-            theta = np.arctan2((poly[2][1] - poly[3][1]),
-                               (poly[2][0] - poly[3][0]))
-            poly[3][0] += R * r[3] * np.cos(theta)
-            poly[3][1] += R * r[3] * np.sin(theta)
-            poly[2][0] -= R * r[2] * np.cos(theta)
-            poly[2][1] -= R * r[2] * np.sin(theta)
-        return poly
-
-    def generate_quad(self, im_size, polys, tags):
-        """
-        Generate quadrangle.
-        """
-        h, w = im_size
-        poly_mask = np.zeros((h, w), dtype=np.uint8)
-        score_map = np.zeros((h, w), dtype=np.uint8)
-        # (x1, y1, ..., x4, y4, short_edge_norm)
-        geo_map = np.zeros((h, w, 9), dtype=np.float32)
-        # mask used during traning, to ignore some hard areas
-        training_mask = np.ones((h, w), dtype=np.uint8)
-        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
-            poly = poly_tag[0]
-            tag = poly_tag[1]
-
-            r = [None, None, None, None]
-            for i in range(4):
-                dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
-                dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
-                r[i] = min(dist1, dist2)
-            # score map
-            shrinked_poly = self.shrink_poly(
-                poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
-            cv2.fillPoly(score_map, shrinked_poly, 1)
-            cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
-            # if the poly is too small, then ignore it during training
-            poly_h = min(
-                np.linalg.norm(poly[0] - poly[3]),
-                np.linalg.norm(poly[1] - poly[2]))
-            poly_w = min(
-                np.linalg.norm(poly[0] - poly[1]),
-                np.linalg.norm(poly[2] - poly[3]))
-            if min(poly_h, poly_w) < self.min_text_size:
-                cv2.fillPoly(training_mask,
-                             poly.astype(np.int32)[np.newaxis, :, :], 0)
-
-            if tag:
-                cv2.fillPoly(training_mask,
-                             poly.astype(np.int32)[np.newaxis, :, :], 0)
-
-            xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
-            # geo map.
-            y_in_poly = xy_in_poly[:, 0]
-            x_in_poly = xy_in_poly[:, 1]
-            poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
-            poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
-            for pno in range(4):
-                geo_channel_beg = pno * 2
-                geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
-                    x_in_poly - poly[pno, 0]
-                geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
-                    y_in_poly - poly[pno, 1]
-            geo_map[y_in_poly, x_in_poly, 8] = \
-                1.0 / max(min(poly_h, poly_w), 1.0)
-        return score_map, geo_map, training_mask
-
-    def crop_area(self,
-                  im,
-                  polys,
-                  tags,
-                  txts,
-                  crop_background=False,
-                  max_tries=50):
-        """
-        make random crop from the input image
-        :param im:
-        :param polys:
-        :param tags:
-        :param crop_background:
-        :param max_tries:
-        :return:
-        """
-        h, w, _ = im.shape
-        pad_h = h // 10
-        pad_w = w // 10
-        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
-        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
-        for poly in polys:
-            poly = np.round(poly, decimals=0).astype(np.int32)
-            minx = np.min(poly[:, 0])
-            maxx = np.max(poly[:, 0])
-            w_array[minx + pad_w:maxx + pad_w] = 1
-            miny = np.min(poly[:, 1])
-            maxy = np.max(poly[:, 1])
-            h_array[miny + pad_h:maxy + pad_h] = 1
-        # ensure the cropped area not across a text
-        h_axis = np.where(h_array == 0)[0]
-        w_axis = np.where(w_array == 0)[0]
-        if len(h_axis) == 0 or len(w_axis) == 0:
-            return im, polys, tags, txts
-
-        for i in range(max_tries):
-            xx = np.random.choice(w_axis, size=2)
-            xmin = np.min(xx) - pad_w
-            xmax = np.max(xx) - pad_w
-            xmin = np.clip(xmin, 0, w - 1)
-            xmax = np.clip(xmax, 0, w - 1)
-            yy = np.random.choice(h_axis, size=2)
-            ymin = np.min(yy) - pad_h
-            ymax = np.max(yy) - pad_h
-            ymin = np.clip(ymin, 0, h - 1)
-            ymax = np.clip(ymax, 0, h - 1)
-            if xmax - xmin < self.min_crop_side_ratio * w or \
-               ymax - ymin < self.min_crop_side_ratio * h:
-                # area too small
-                continue
-            if polys.shape[0] != 0:
-                poly_axis_in_area = (polys[:, :, 0] >= xmin)\
-                    & (polys[:, :, 0] <= xmax)\
-                    & (polys[:, :, 1] >= ymin)\
-                    & (polys[:, :, 1] <= ymax)
-                selected_polys = np.where(
-                    np.sum(poly_axis_in_area, axis=1) == 4)[0]
-            else:
-                selected_polys = []
-
-            if len(selected_polys) == 0:
-                # no text in this area
-                if crop_background:
-                    im = im[ymin:ymax + 1, xmin:xmax + 1, :]
-                    polys = []
-                    tags = []
-                    txts = []
-                    return im, polys, tags, txts
-                else:
-                    continue
-
-            im = im[ymin:ymax + 1, xmin:xmax + 1, :]
-            polys = polys[selected_polys]
-            tags = tags[selected_polys]
-            txts_tmp = []
-            for selected_poly in selected_polys:
-                txts_tmp.append(txts[selected_poly])
-            txts = txts_tmp
-            polys[:, :, 0] -= xmin
-            polys[:, :, 1] -= ymin
-            return im, polys, tags, txts
-        return im, polys, tags, txts
-
-    def crop_background_infor(self, im, text_polys, text_tags, text_strs):
-        im, text_polys, text_tags, text_strs = self.crop_area(
-            im, text_polys, text_tags, text_strs, crop_background=True)
-        if len(text_polys) > 0:
-            return None
-        # pad and resize image
-        input_size = self.input_size
-        im, ratio = self.preprocess(im)
-        score_map = np.zeros((input_size, input_size), dtype=np.float32)
-        geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
-        training_mask = np.ones((input_size, input_size), dtype=np.float32)
-        return im, score_map, geo_map, training_mask
-
-    def crop_foreground_infor(self, im, text_polys, text_tags, text_strs):
-        im, text_polys, text_tags, text_strs = self.crop_area(
-            im, text_polys, text_tags, text_strs, crop_background=False)
-        if text_polys.shape[0] == 0:
-            return None
-        #continue for all ignore case
-        if np.sum((text_tags * 1.0)) >= text_tags.size:
-            return None
-        # pad and resize image
-        input_size = self.input_size
-        im, ratio = self.preprocess(im)
-        text_polys[:, :, 0] *= ratio
-        text_polys[:, :, 1] *= ratio
-        _, _, new_h, new_w = im.shape
-        #         print(im.shape)
-        #         self.draw_img_polys(im, text_polys)
-        score_map, geo_map, training_mask = self.generate_quad(
-            (new_h, new_w), text_polys, text_tags)
-        return im, score_map, geo_map, training_mask
-
-    def __call__(self, label_infor):
-        infor = self.convert_label_infor(label_infor)
-        im_path, text_polys, text_tags, text_strs = infor
-        im = cv2.imread(im_path)
-        if im is None:
-            return None
-        if text_polys.shape[0] == 0:
-            return None
-        #add rotate cases
-        if np.random.rand() < 0.5:
-            im, text_polys = self.rotate_im_poly(im, text_polys)
-        h, w, _ = im.shape
-        text_polys, text_tags = self.check_and_validate_polys(text_polys,
-                                                              text_tags, h, w)
-        if text_polys.shape[0] == 0:
-            return None
-
-        # random scale this image
-        rd_scale = np.random.choice(self.random_scale)
-        im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
-        text_polys *= rd_scale
-        if np.random.rand() < self.background_ratio:
-            outs = self.crop_background_infor(im, text_polys, text_tags,
-                                              text_strs)
-        else:
-            outs = self.crop_foreground_infor(im, text_polys, text_tags,
-                                              text_strs)
-
-        if outs is None:
-            return None
-        im, score_map, geo_map, training_mask = outs
-        score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
-        geo_map = np.swapaxes(geo_map, 1, 2)
-        geo_map = np.swapaxes(geo_map, 1, 0)
-        geo_map = geo_map[:, ::4, ::4].astype(np.float32)
-        training_mask = training_mask[np.newaxis, ::4, ::4]
-        training_mask = training_mask.astype(np.float32)
-        return im, score_map, geo_map, training_mask
-
-
-class EASTProcessTest(object):
-    def __init__(self, params):
-        super(EASTProcessTest, self).__init__()
-        self.resize_type = 0
-        if 'test_image_shape' in params:
-            self.image_shape = params['test_image_shape']
-            # print(self.image_shape)
-            self.resize_type = 1
-        if 'max_side_len' in params:
-            self.max_side_len = params['max_side_len']
-        else:
-            self.max_side_len = 2400
-
-    def resize_image_type0(self, im):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        max_side_len = self.max_side_len
-        h, w, _ = im.shape
-
-        resize_w = w
-        resize_h = h
-
-        # limit the max side
-        if max(resize_h, resize_w) > max_side_len:
-            if resize_h > resize_w:
-                ratio = float(max_side_len) / resize_h
-            else:
-                ratio = float(max_side_len) / resize_w
-        else:
-            ratio = 1.
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-        if resize_h % 32 == 0:
-            resize_h = resize_h
-        elif resize_h // 32 <= 1:
-            resize_h = 32
-        else:
-            resize_h = (resize_h // 32 - 1) * 32
-        if resize_w % 32 == 0:
-            resize_w = resize_w
-        elif resize_w // 32 <= 1:
-            resize_w = 32
-        else:
-            resize_w = (resize_w // 32 - 1) * 32
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        except:
-            print(im.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return im, (ratio_h, ratio_w)
-
-    def resize_image_type1(self, im):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = im.shape[:2]  # (h, w, c)
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        return im, (ratio_h, ratio_w)
-
-    def __call__(self, im):
-        if self.resize_type == 0:
-            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
-        else:
-            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im[:, :, ::-1].astype(np.float32)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        im = im.transpose((2, 0, 1))
-        im = im[np.newaxis, :]
-        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/make_border_map.py
+++ b/ppocr/data/det/make_border_map.py
@ -1,147 +0,0 @@
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-np.seterr(divide='ignore', invalid='ignore')
-import pyclipper
-from shapely.geometry import Polygon
-import sys
-import warnings
-warnings.simplefilter("ignore")
-
-
-def draw_border_map(polygon, canvas, mask, shrink_ratio):
-    polygon = np.array(polygon)
-    assert polygon.ndim == 2
-    assert polygon.shape[1] == 2
-
-    polygon_shape = Polygon(polygon)
-    if polygon_shape.area <= 0:
-        return
-    distance = polygon_shape.area * (
-        1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-    subject = [tuple(l) for l in polygon]
-    padding = pyclipper.PyclipperOffset()
-    padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-
-    padded_polygon = np.array(padding.Execute(distance)[0])
-    cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
-
-    xmin = padded_polygon[:, 0].min()
-    xmax = padded_polygon[:, 0].max()
-    ymin = padded_polygon[:, 1].min()
-    ymax = padded_polygon[:, 1].max()
-    width = xmax - xmin + 1
-    height = ymax - ymin + 1
-
-    polygon[:, 0] = polygon[:, 0] - xmin
-    polygon[:, 1] = polygon[:, 1] - ymin
-
-    xs = np.broadcast_to(
-        np.linspace(
-            0, width - 1, num=width).reshape(1, width), (height, width))
-    ys = np.broadcast_to(
-        np.linspace(
-            0, height - 1, num=height).reshape(height, 1), (height, width))
-
-    distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
-    for i in range(polygon.shape[0]):
-        j = (i + 1) % polygon.shape[0]
-        absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
-        distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
-    distance_map = distance_map.min(axis=0)
-
-    xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
-    xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
-    ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
-    ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
-    canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
-        1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
-                         xmin_valid - xmin:xmax_valid - xmax + width],
-        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
-
-
-def _distance(xs, ys, point_1, point_2):
-    '''
-    compute the distance from point to a line
-    ys: coordinates in the first axis
-    xs: coordinates in the second axis
-    point_1, point_2: (x, y), the end of the line
-    '''
-    height, width = xs.shape[:2]
-    square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
-    square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
-    square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
-        1] - point_2[1])
-
-    cosin = (square_distance - square_distance_1 - square_distance_2) / (
-        2 * np.sqrt(square_distance_1 * square_distance_2))
-    square_sin = 1 - np.square(cosin)
-    square_sin = np.nan_to_num(square_sin)
-    result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
-                     square_distance)
-
-    result[cosin <
-           0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
-                                                                       0]
-    # self.extend_line(point_1, point_2, result)
-    return result
-
-
-def extend_line(point_1, point_2, result, shrink_ratio):
-    ex_point_1 = (
-        int(
-            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_1),
-        tuple(point_1),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    ex_point_2 = (
-        int(
-            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_2),
-        tuple(point_2),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    return ex_point_1, ex_point_2
-
-
-def MakeBorderMap(data):
-    shrink_ratio = 0.4
-    thresh_min = 0.3
-    thresh_max = 0.7
-
-    im = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-
-    canvas = np.zeros(im.shape[:2], dtype=np.float32)
-    mask = np.zeros(im.shape[:2], dtype=np.float32)
-
-    for i in range(len(text_polys)):
-        if ignore_tags[i]:
-            continue
-        draw_border_map(
-            text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
-    canvas = canvas * (thresh_max - thresh_min) + thresh_min
-
-    data['threshold_map'] = canvas
-    data['threshold_mask'] = mask
-    return data
--- a/ppocr/data/det/make_shrink_map.py
+++ b/ppocr/data/det/make_shrink_map.py
@ -1,88 +0,0 @@
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-from shapely.geometry import Polygon
-import pyclipper
-
-
-def validate_polygons(polygons, ignore_tags, h, w):
-    '''
-    polygons (numpy.array, required): of shape (num_instances, num_points, 2)
-    '''
-    if len(polygons) == 0:
-        return polygons, ignore_tags
-    assert len(polygons) == len(ignore_tags)
-    for polygon in polygons:
-        polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
-        polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
-
-    for i in range(len(polygons)):
-        area = polygon_area(polygons[i])
-        if abs(area) < 1:
-            ignore_tags[i] = True
-        if area > 0:
-            polygons[i] = polygons[i][::-1, :]
-    return polygons, ignore_tags
-
-
-def polygon_area(polygon):
-    edge = 0
-    for i in range(polygon.shape[0]):
-        next_index = (i + 1) % polygon.shape[0]
-        edge += (polygon[next_index, 0] - polygon[i, 0]) * (
-            polygon[next_index, 1] - polygon[i, 1])
-
-    return edge / 2.
-
-
-def MakeShrinkMap(data):
-    min_text_size = 8
-    shrink_ratio = 0.4
-
-    image = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-
-    h, w = image.shape[:2]
-    text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
-    gt = np.zeros((h, w), dtype=np.float32)
-    # gt = np.zeros((1, h, w), dtype=np.float32)
-    mask = np.ones((h, w), dtype=np.float32)
-    for i in range(len(text_polys)):
-        polygon = text_polys[i]
-        height = max(polygon[:, 1]) - min(polygon[:, 1])
-        width = max(polygon[:, 0]) - min(polygon[:, 0])
-        # height = min(np.linalg.norm(polygon[0] - polygon[3]),
-        #             np.linalg.norm(polygon[1] - polygon[2]))
-        # width = min(np.linalg.norm(polygon[0] - polygon[1]),
-        #             np.linalg.norm(polygon[2] - polygon[3]))
-        if ignore_tags[i] or min(height, width) < min_text_size:
-            cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
-            ignore_tags[i] = True
-        else:
-            polygon_shape = Polygon(polygon)
-            distance = polygon_shape.area * (
-                1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-            subject = [tuple(l) for l in text_polys[i]]
-            padding = pyclipper.PyclipperOffset()
-            padding.AddPath(subject, pyclipper.JT_ROUND,
-                            pyclipper.ET_CLOSEDPOLYGON)
-            shrinked = padding.Execute(-distance)
-            if shrinked == []:
-                cv2.fillPoly(mask,
-                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
-                ignore_tags[i] = True
-                continue
-            shrinked = np.array(shrinked[0]).reshape(-1, 2)
-            cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
-            # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
-
-    data['shrink_map'] = gt
-    data['shrink_mask'] = mask
-    return data
--- a/ppocr/data/det/random_crop_data.py
+++ b/ppocr/data/det/random_crop_data.py
@ -1,155 +0,0 @@
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-import random
-
-
-def is_poly_in_rect(poly, x, y, w, h):
-    poly = np.array(poly)
-    if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
-        return False
-    if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
-        return False
-    return True
-
-
-def is_poly_outside_rect(poly, x, y, w, h):
-    poly = np.array(poly)
-    if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
-        return True
-    if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
-        return True
-    return False
-
-
-def split_regions(axis):
-    regions = []
-    min_axis = 0
-    for i in range(1, axis.shape[0]):
-        if axis[i] != axis[i - 1] + 1:
-            region = axis[min_axis:i]
-            min_axis = i
-            regions.append(region)
-    return regions
-
-
-def random_select(axis, max_size):
-    xx = np.random.choice(axis, size=2)
-    xmin = np.min(xx)
-    xmax = np.max(xx)
-    xmin = np.clip(xmin, 0, max_size - 1)
-    xmax = np.clip(xmax, 0, max_size - 1)
-    return xmin, xmax
-
-
-def region_wise_random_select(regions, max_size):
-    selected_index = list(np.random.choice(len(regions), 2))
-    selected_values = []
-    for index in selected_index:
-        axis = regions[index]
-        xx = int(np.random.choice(axis, size=1))
-        selected_values.append(xx)
-    xmin = min(selected_values)
-    xmax = max(selected_values)
-    return xmin, xmax
-
-
-def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
-    h, w, _ = im.shape
-    h_array = np.zeros(h, dtype=np.int32)
-    w_array = np.zeros(w, dtype=np.int32)
-    for points in text_polys:
-        points = np.round(points, decimals=0).astype(np.int32)
-        minx = np.min(points[:, 0])
-        maxx = np.max(points[:, 0])
-        w_array[minx:maxx] = 1
-        miny = np.min(points[:, 1])
-        maxy = np.max(points[:, 1])
-        h_array[miny:maxy] = 1
-    # ensure the cropped area not across a text
-    h_axis = np.where(h_array == 0)[0]
-    w_axis = np.where(w_array == 0)[0]
-
-    if len(h_axis) == 0 or len(w_axis) == 0:
-        return 0, 0, w, h
-
-    h_regions = split_regions(h_axis)
-    w_regions = split_regions(w_axis)
-
-    for i in range(max_tries):
-        if len(w_regions) > 1:
-            xmin, xmax = region_wise_random_select(w_regions, w)
-        else:
-            xmin, xmax = random_select(w_axis, w)
-        if len(h_regions) > 1:
-            ymin, ymax = region_wise_random_select(h_regions, h)
-        else:
-            ymin, ymax = random_select(h_axis, h)
-
-        if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
-            # area too small
-            continue
-        num_poly_in_rect = 0
-        for poly in text_polys:
-            if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
-                                        ymax - ymin):
-                num_poly_in_rect += 1
-                break
-
-        if num_poly_in_rect > 0:
-            return xmin, ymin, xmax - xmin, ymax - ymin
-
-    return 0, 0, w, h
-
-
-def RandomCropData(data, size):
-    max_tries = 10
-    min_crop_side_ratio = 0.1
-    require_original_image = False
-    keep_ratio = True
-
-    im = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-    texts = data['texts']
-    all_care_polys = [
-        text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
-    ]
-    # 计算crop区域
-    crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
-                                               min_crop_side_ratio, max_tries)
-    # crop 图片 保持比例填充
-    scale_w = size[0] / crop_w
-    scale_h = size[1] / crop_h
-    scale = min(scale_w, scale_h)
-    h = int(crop_h * scale)
-    w = int(crop_w * scale)
-    if keep_ratio:
-        padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
-        padimg[:h, :w] = cv2.resize(
-            im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
-        img = padimg
-    else:
-        img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
-                         tuple(size))
-    # crop 文本框
-    text_polys_crop = []
-    ignore_tags_crop = []
-    texts_crop = []
-    for poly, text, tag in zip(text_polys, texts, ignore_tags):
-        poly = ((poly - (crop_x, crop_y)) * scale).tolist()
-        if not is_poly_outside_rect(poly, 0, 0, w, h):
-            text_polys_crop.append(poly)
-            ignore_tags_crop.append(tag)
-            texts_crop.append(text)
-    data['image'] = img
-    data['polys'] = np.array(text_polys_crop)
-    data['ignore_tags'] = ignore_tags_crop
-    data['texts'] = texts_crop
-    return data
--- a/ppocr/data/det/sast_process.py
+++ b/ppocr/data/det/sast_process.py
@ -1,781 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-
-
-class SASTProcessTrain(object):
-    """
-    SAST process function for training
-    """
-    def __init__(self, params):
-        self.img_set_dir = params['img_set_dir']
-        self.min_crop_side_ratio = params['min_crop_side_ratio']
-        self.min_crop_size = params['min_crop_size']
-        image_shape = params['image_shape']
-        self.input_size = image_shape[1]
-        self.min_text_size = params['min_text_size']
-        self.max_text_size = params['max_text_size']
-
-    def convert_label_infor(self, label_infor):
-        label_infor = label_infor.decode()
-        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
-        substr = label_infor.strip("\n").split("\t")
-        img_path = self.img_set_dir + substr[0]
-        label = json.loads(substr[1])
-        nBox = len(label)
-        wordBBs, txts, txt_tags = [], [], []
-        for bno in range(0, nBox):
-            wordBB = label[bno]['points']
-            txt = label[bno]['transcription']
-            wordBBs.append(wordBB)
-            txts.append(txt)
-            if txt == '###':
-                txt_tags.append(True)
-            else:
-                txt_tags.append(False)
-        wordBBs = np.array(wordBBs, dtype=np.float32)
-        txt_tags = np.array(txt_tags, dtype=np.bool)
-        return img_path, wordBBs, txt_tags, txts
-
-    def quad_area(self, poly):
-        """
-        compute area of a polygon
-        :param poly:
-        :return:
-        """
-        edge = [
-            (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
-            (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
-            (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
-            (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
-        ]
-        return np.sum(edge) / 2.
-
-    def gen_quad_from_poly(self, poly):
-        """
-        Generate min area quad from poly.
-        """
-        point_num = poly.shape[0]
-        min_area_quad = np.zeros((4, 2), dtype=np.float32)
-        if True:
-            rect = cv2.minAreaRect(poly.astype(np.int32))  # (center (x,y), (width, height), angle of rotation)
-            center_point = rect[0]
-            box = np.array(cv2.boxPoints(rect))
-
-            first_point_idx = 0
-            min_dist = 1e4
-            for i in range(4):
-                dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
-                    np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
-                    np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
-                    np.linalg.norm(box[(i + 3) % 4] - poly[-1])
-                if dist < min_dist:
-                    min_dist = dist
-                    first_point_idx = i
-            for i in range(4):
-                min_area_quad[i] = box[(first_point_idx + i) % 4]
-
-        return min_area_quad
-
-    def check_and_validate_polys(self, polys, tags, xxx_todo_changeme):
-        """
-        check so that the text poly is in the same direction,
-        and also filter some invalid polygons
-        :param polys:
-        :param tags:
-        :return:
-        """
-        (h, w) = xxx_todo_changeme
-        if polys.shape[0] == 0:
-            return polys, np.array([]), np.array([])
-        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
-        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
-
-        validated_polys = []
-        validated_tags = []
-        hv_tags = []
-        for poly, tag in zip(polys, tags):
-            quad = self.gen_quad_from_poly(poly)
-            p_area = self.quad_area(quad)
-            if abs(p_area) < 1:
-                print('invalid poly')
-                continue
-            if p_area > 0:
-                if tag == False:
-                    print('poly in wrong direction')
-                    tag = True # reversed cases should be ignore
-                poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1), :]
-                quad = quad[(0, 3, 2, 1), :]
-
-            len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - quad[2])
-            len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])
-            hv_tag = 1
-        
-            if len_w * 2.0 <  len_h:
-                hv_tag = 0
-
-            validated_polys.append(poly)
-            validated_tags.append(tag)
-            hv_tags.append(hv_tag)
-        return np.array(validated_polys), np.array(validated_tags), np.array(hv_tags)
-
-    def crop_area(self, im, polys, tags, hv_tags, txts, crop_background=False, max_tries=25):
-        """
-        make random crop from the input image
-        :param im:
-        :param polys:
-        :param tags:
-        :param crop_background:
-        :param max_tries: 50 -> 25
-        :return:
-        """
-        h, w, _ = im.shape
-        pad_h = h // 10
-        pad_w = w // 10
-        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
-        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
-        for poly in polys:
-            poly = np.round(poly, decimals=0).astype(np.int32)
-            minx = np.min(poly[:, 0])
-            maxx = np.max(poly[:, 0])
-            w_array[minx + pad_w: maxx + pad_w] = 1
-            miny = np.min(poly[:, 1])
-            maxy = np.max(poly[:, 1])
-            h_array[miny + pad_h: maxy + pad_h] = 1
-        # ensure the cropped area not across a text
-        h_axis = np.where(h_array == 0)[0]
-        w_axis = np.where(w_array == 0)[0]
-        if len(h_axis) == 0 or len(w_axis) == 0:
-            return im, polys, tags, hv_tags, txts
-        for i in range(max_tries):
-            xx = np.random.choice(w_axis, size=2)
-            xmin = np.min(xx) - pad_w
-            xmax = np.max(xx) - pad_w
-            xmin = np.clip(xmin, 0, w - 1)
-            xmax = np.clip(xmax, 0, w - 1)
-            yy = np.random.choice(h_axis, size=2)
-            ymin = np.min(yy) - pad_h
-            ymax = np.max(yy) - pad_h
-            ymin = np.clip(ymin, 0, h - 1)
-            ymax = np.clip(ymax, 0, h - 1)
-            # if xmax - xmin < ARGS.min_crop_side_ratio * w or \
-            #   ymax - ymin < ARGS.min_crop_side_ratio * h:
-            if xmax - xmin < self.min_crop_size or \
-            ymax - ymin < self.min_crop_size:
-                # area too small
-                continue
-            if polys.shape[0] != 0:
-                poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
-                                    & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
-                selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0]
-            else:
-                selected_polys = []
-            if len(selected_polys) == 0:
-                # no text in this area
-                if crop_background:
-                    txts_tmp = []
-                    for selected_poly in selected_polys:
-                        txts_tmp.append(txts[selected_poly])
-                    txts = txts_tmp 
-                    return im[ymin : ymax + 1, xmin : xmax + 1, :], \
-                        polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts
-                else:
-                    continue
-            im = im[ymin: ymax + 1, xmin: xmax + 1, :]
-            polys = polys[selected_polys]
-            tags = tags[selected_polys]
-            hv_tags = hv_tags[selected_polys]
-            txts_tmp = []
-            for selected_poly in selected_polys:
-                txts_tmp.append(txts[selected_poly])
-            txts = txts_tmp 
-            polys[:, :, 0] -= xmin
-            polys[:, :, 1] -= ymin
-            return im, polys, tags, hv_tags, txts
-
-        return im, polys, tags, hv_tags, txts
-
-    def generate_direction_map(self, poly_quads, direction_map):
-        """
-        """
-        width_list = []
-        height_list = []
-        for quad in poly_quads:
-            quad_w = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0
-            quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0
-            width_list.append(quad_w)
-            height_list.append(quad_h)
-        norm_width = max(sum(width_list) / (len(width_list) +  1e-6), 1.0)
-        average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0)
-
-        for quad in poly_quads:
-            direct_vector_full = ((quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
-            direct_vector = direct_vector_full / (np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
-            direction_label = tuple(map(float, [direct_vector[0], direct_vector[1], 1.0 / (average_height + 1e-6)]))
-            cv2.fillPoly(direction_map, quad.round().astype(np.int32)[np.newaxis, :, :], direction_label)
-        return direction_map
-
-    def calculate_average_height(self, poly_quads):
-        """
-        """
-        height_list = []
-        for quad in poly_quads:
-            quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0
-            height_list.append(quad_h)
-        average_height = max(sum(height_list) / len(height_list), 1.0)
-        return average_height
-
-    def generate_tcl_label(self, hw, polys, tags, ds_ratio,
-                            tcl_ratio=0.3, shrink_ratio_of_width=0.15):
-        """
-        Generate polygon.
-        """
-        h, w = hw
-        h, w = int(h * ds_ratio), int(w * ds_ratio)
-        polys = polys * ds_ratio
-
-        score_map = np.zeros((h, w,), dtype=np.float32)
-        tbo_map = np.zeros((h, w, 5), dtype=np.float32)
-        training_mask = np.ones((h, w,), dtype=np.float32)
-        direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape([1, 1, 3]).astype(np.float32)
-
-        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
-            poly = poly_tag[0] 
-            tag = poly_tag[1]
-
-            # generate min_area_quad
-            min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
-            min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
-                                    np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
-            min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
-                                    np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
-
-            if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
-                or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
-                continue
-
-            if tag:
-                # continue
-                cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0.15)
-            else:
-                tcl_poly = self.poly2tcl(poly, tcl_ratio)
-                tcl_quads = self.poly2quads(tcl_poly)
-                poly_quads = self.poly2quads(poly)
-                # stcl map
-                stcl_quads, quad_index = self.shrink_poly_along_width(tcl_quads, shrink_ratio_of_width=shrink_ratio_of_width,
-                                                                expand_height_ratio=1.0 / tcl_ratio)
-                # generate tcl map
-                cv2.fillPoly(score_map, np.round(stcl_quads).astype(np.int32), 1.0)
-
-                # generate tbo map
-                for idx, quad in enumerate(stcl_quads):
-                    quad_mask = np.zeros((h, w), dtype=np.float32)
-                    quad_mask = cv2.fillPoly(quad_mask, np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
-                    tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], quad_mask, tbo_map)
-        return score_map, tbo_map, training_mask
-
-    def generate_tvo_and_tco(self, hw, polys, tags, tcl_ratio=0.3, ds_ratio=0.25):
-        """
-        Generate tcl map, tvo map and tbo map.
-        """
-        h, w = hw
-        h, w = int(h * ds_ratio), int(w * ds_ratio)
-        polys = polys * ds_ratio
-        poly_mask = np.zeros((h, w), dtype=np.float32)
-
-        tvo_map = np.ones((9, h, w), dtype=np.float32)
-        tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1))
-        tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T
-        poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32)
-
-        # tco map
-        tco_map = np.ones((3, h, w), dtype=np.float32)
-        tco_map[0] = np.tile(np.arange(0, w), (h, 1))
-        tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T
-        poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32)
-
-        poly_short_edge_map = np.ones((h, w), dtype=np.float32)
-
-        for poly, poly_tag in zip(polys, tags):
-
-            if poly_tag == True:
-                continue
-
-            # adjust point order for vertical poly
-            poly = self.adjust_point(poly)
-
-            # generate min_area_quad
-            min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
-            min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
-                                    np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
-            min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
-                                    np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
-
-            # generate tcl map and text, 128 * 128
-            tcl_poly = self.poly2tcl(poly, tcl_ratio)
-
-            # generate poly_tv_xy_map
-            for idx in range(4):
-                cv2.fillPoly(poly_tv_xy_map[2 * idx],
-                            np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
-                            float(min(max(min_area_quad[idx, 0], 0), w)))
-                cv2.fillPoly(poly_tv_xy_map[2 * idx + 1],
-                            np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
-                            float(min(max(min_area_quad[idx, 1], 0), h)))
-
-            # generate poly_tc_xy_map
-            for idx in range(2):
-                cv2.fillPoly(poly_tc_xy_map[idx],
-                            np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), float(center_point[idx]))
-
-            # generate poly_short_edge_map
-            cv2.fillPoly(poly_short_edge_map,
-                        np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
-                        float(max(min(min_area_quad_h, min_area_quad_w), 1.0)))
-
-            # generate poly_mask and training_mask
-            cv2.fillPoly(poly_mask, np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), 1)
-
-        tvo_map *= poly_mask
-        tvo_map[:8] -= poly_tv_xy_map
-        tvo_map[-1] /= poly_short_edge_map
-        tvo_map = tvo_map.transpose((1, 2, 0))
-
-        tco_map *= poly_mask
-        tco_map[:2] -= poly_tc_xy_map
-        tco_map[-1] /= poly_short_edge_map
-        tco_map = tco_map.transpose((1, 2, 0))
-
-        return tvo_map, tco_map
-
-    def adjust_point(self, poly):
-        """
-        adjust point order.
-        """
-        point_num = poly.shape[0]
-        if point_num == 4:
-            len_1 = np.linalg.norm(poly[0] - poly[1])
-            len_2 = np.linalg.norm(poly[1] - poly[2])
-            len_3 = np.linalg.norm(poly[2] - poly[3])
-            len_4 = np.linalg.norm(poly[3] - poly[0])
-
-            if (len_1 + len_3) * 1.5 < (len_2 + len_4):
-                poly = poly[[1, 2, 3, 0], :]
-
-        elif point_num > 4:
-            vector_1 = poly[0] - poly[1]
-            vector_2 = poly[1] - poly[2]
-            cos_theta = np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
-            theta = np.arccos(np.round(cos_theta, decimals=4))
-
-            if abs(theta) > (70 / 180 * math.pi):
-                index = list(range(1, point_num)) + [0]
-                poly = poly[np.array(index), :]
-        return poly
-
-    def gen_min_area_quad_from_poly(self, poly):
-        """
-        Generate min area quad from poly.
-        """
-        point_num = poly.shape[0]
-        min_area_quad = np.zeros((4, 2), dtype=np.float32)
-        if point_num == 4:
-            min_area_quad = poly
-            center_point = np.sum(poly, axis=0) / 4
-        else:
-            rect = cv2.minAreaRect(poly.astype(np.int32))  # (center (x,y), (width, height), angle of rotation)
-            center_point = rect[0]
-            box = np.array(cv2.boxPoints(rect))
-
-            first_point_idx = 0
-            min_dist = 1e4
-            for i in range(4):
-                dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
-                    np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
-                    np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
-                    np.linalg.norm(box[(i + 3) % 4] - poly[-1])
-                if dist < min_dist:
-                    min_dist = dist
-                    first_point_idx = i
-
-            for i in range(4):
-                min_area_quad[i] = box[(first_point_idx + i) % 4]
-
-        return min_area_quad, center_point
-
-    def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.):
-        """
-        Generate shrink_quad_along_width.
-        """
-        ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
-        p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
-        p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
-        return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
-
-    def shrink_poly_along_width(self, quads, shrink_ratio_of_width, expand_height_ratio=1.0):
-        """
-        shrink poly with given length.
-        """
-        upper_edge_list = []
-
-        def get_cut_info(edge_len_list, cut_len):
-            for idx, edge_len in enumerate(edge_len_list):
-                cut_len -= edge_len
-                if cut_len <= 0.000001:
-                    ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx]
-                    return idx, ratio
-
-        for quad in quads:
-            upper_edge_len = np.linalg.norm(quad[0] - quad[1])
-            upper_edge_list.append(upper_edge_len)
-
-        # length of left edge and right edge.
-        left_length = np.linalg.norm(quads[0][0] - quads[0][3]) * expand_height_ratio
-        right_length = np.linalg.norm(quads[-1][1] - quads[-1][2]) * expand_height_ratio
-
-        shrink_length = min(left_length, right_length, sum(upper_edge_list)) * shrink_ratio_of_width
-        # shrinking length
-        upper_len_left = shrink_length
-        upper_len_right = sum(upper_edge_list) - shrink_length
-
-        left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
-        left_quad = self.shrink_quad_along_width(quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
-        right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
-        right_quad = self.shrink_quad_along_width(quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
-        
-        out_quad_list = []
-        if left_idx == right_idx:
-            out_quad_list.append([left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
-        else:
-            out_quad_list.append(left_quad)
-            for idx in range(left_idx + 1, right_idx):
-                out_quad_list.append(quads[idx])
-            out_quad_list.append(right_quad)
-
-        return np.array(out_quad_list), list(range(left_idx, right_idx + 1))
-
-    def vector_angle(self, A, B):
-        """
-        Calculate the angle between vector AB and x-axis positive direction.
-        """
-        AB = np.array([B[1] - A[1], B[0] - A[0]])
-        return np.arctan2(*AB)
-
-    def theta_line_cross_point(self, theta, point):
-        """
-        Calculate the line through given point and angle in ax + by + c =0 form.
-        """
-        x, y = point
-        cos = np.cos(theta)
-        sin = np.sin(theta)
-        return [sin, -cos, cos * y - sin * x]
-
-    def line_cross_two_point(self, A, B):
-        """
-        Calculate the line through given point A and B in ax + by + c =0 form.
-        """
-        angle = self.vector_angle(A, B)
-        return self.theta_line_cross_point(angle, A)
-
-    def average_angle(self, poly):
-        """
-        Calculate the average angle between left and right edge in given poly.
-        """
-        p0, p1, p2, p3 = poly
-        angle30 = self.vector_angle(p3, p0)
-        angle21 = self.vector_angle(p2, p1)
-        return (angle30 + angle21) / 2
-
-    def line_cross_point(self, line1, line2):
-        """
-        line1 and line2 in  0=ax+by+c form, compute the cross point of line1 and line2
-        """
-        a1, b1, c1 = line1
-        a2, b2, c2 = line2
-        d = a1 * b2 - a2 * b1
-
-        if d == 0:
-            #print("line1", line1)
-            #print("line2", line2)
-            print('Cross point does not exist')
-            return np.array([0, 0], dtype=np.float32)
-        else:
-            x = (b1 * c2 - b2 * c1) / d
-            y = (a2 * c1 - a1 * c2) / d
-
-        return np.array([x, y], dtype=np.float32)
-
-    def quad2tcl(self, poly, ratio):
-        """
-        Generate center line by poly clock-wise point. (4, 2)
-        """
-        ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
-        p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
-        p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
-        return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
-
-    def poly2tcl(self, poly, ratio):
-        """
-        Generate center line by poly clock-wise point.
-        """
-        ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
-        tcl_poly = np.zeros_like(poly)
-        point_num = poly.shape[0]
-
-        for idx in range(point_num // 2):
-            point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]) * ratio_pair
-            tcl_poly[idx] = point_pair[0]
-            tcl_poly[point_num - 1 - idx] = point_pair[1]
-        return tcl_poly
-
-    def gen_quad_tbo(self, quad, tcl_mask, tbo_map):
-        """
-        Generate tbo_map for give quad.
-        """
-        # upper and lower line function: ax + by + c = 0;
-        up_line = self.line_cross_two_point(quad[0], quad[1])
-        lower_line = self.line_cross_two_point(quad[3], quad[2])
-
-        quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2]))
-        quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3]))
-
-        # average angle of left and right line.
-        angle = self.average_angle(quad)
-
-        xy_in_poly = np.argwhere(tcl_mask == 1)
-        for y, x in xy_in_poly:
-            point = (x, y)
-            line = self.theta_line_cross_point(angle, point)
-            cross_point_upper = self.line_cross_point(up_line, line)
-            cross_point_lower = self.line_cross_point(lower_line, line)
-            ##FIX, offset reverse
-            upper_offset_x, upper_offset_y = cross_point_upper - point
-            lower_offset_x, lower_offset_y = cross_point_lower - point
-            tbo_map[y, x, 0] = upper_offset_y
-            tbo_map[y, x, 1] = upper_offset_x
-            tbo_map[y, x, 2] = lower_offset_y
-            tbo_map[y, x, 3] = lower_offset_x
-            tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2
-        return tbo_map
-
-    def poly2quads(self, poly):
-        """
-        Split poly into quads.
-        """
-        quad_list = []
-        point_num = poly.shape[0]
-
-        # point pair
-        point_pair_list = []
-        for idx in range(point_num // 2):
-            point_pair = [poly[idx], poly[point_num - 1 - idx]]
-            point_pair_list.append(point_pair)
-
-        quad_num = point_num // 2 - 1
-        for idx in range(quad_num):
-            # reshape and adjust to clock-wise
-            quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]).reshape(4, 2)[[0, 2, 3, 1]])
-
-        return np.array(quad_list)
-
-    def extract_polys(self, poly_txt_path):
-        """
-        Read text_polys, txt_tags, txts from give txt file.
-        """
-        text_polys, txt_tags, txts = [], [], []
-
-        with open(poly_txt_path) as f:
-            for line in f.readlines():
-                poly_str, txt = line.strip().split('\t')
-                poly = map(float, poly_str.split(','))
-                text_polys.append(np.array(poly, dtype=np.float32).reshape(-1, 2))
-                txts.append(txt)
-                if txt == '###':
-                    txt_tags.append(True)
-                else:
-                    txt_tags.append(False)
-
-        return np.array(map(np.array, text_polys)), \
-            np.array(txt_tags, dtype=np.bool), txts
-
-    def __call__(self, label_infor):
-        infor = self.convert_label_infor(label_infor)
-        im_path, text_polys, text_tags, text_strs = infor
-        im = cv2.imread(im_path)
-        if im is None:
-            return None
-        if text_polys.shape[0] == 0:
-            return None
-
-        h, w, _ = im.shape
-        text_polys, text_tags, hv_tags = self.check_and_validate_polys(text_polys, text_tags, (h, w))
-
-        if text_polys.shape[0] == 0:
-            return None
-
-        #set aspect ratio and keep area fix
-        asp_scales = np.arange(1.0, 1.55, 0.1)
-        asp_scale = np.random.choice(asp_scales)
-
-        if np.random.rand() < 0.5:
-            asp_scale = 1.0 / asp_scale
-        asp_scale = math.sqrt(asp_scale)
-        
-        asp_wx = asp_scale
-        asp_hy = 1.0 / asp_scale
-        im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
-        text_polys[:, :, 0] *= asp_wx
-        text_polys[:, :, 1] *= asp_hy
-
-        h, w, _ = im.shape
-        if max(h, w) > 2048:
-            rd_scale = 2048.0 / max(h, w)
-            im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
-            text_polys *= rd_scale
-        h, w, _ = im.shape
-        if min(h, w) < 16:
-            return None
-
-        #no background
-        im, text_polys, text_tags, hv_tags, text_strs = self.crop_area(im, \
-            text_polys, text_tags, hv_tags, text_strs, crop_background=False)
-        if text_polys.shape[0] == 0:
-            return None
-        #continue for all ignore case
-        if np.sum((text_tags * 1.0)) >= text_tags.size:
-            return None
-        new_h, new_w, _ = im.shape
-        if (new_h is None) or (new_w is None):
-            return None
-        #resize image
-        std_ratio = float(self.input_size) / max(new_w, new_h)
-        rand_scales = np.array([0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
-        rz_scale = std_ratio * np.random.choice(rand_scales)
-        im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
-        text_polys[:, :, 0] *= rz_scale
-        text_polys[:, :, 1] *= rz_scale
-        
-        #add gaussian blur
-        if np.random.rand() < 0.1 * 0.5:
-            ks = np.random.permutation(5)[0] + 1
-            ks = int(ks/2)*2 + 1
-            im =  cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
-        #add brighter
-        if np.random.rand() < 0.1 * 0.5:
-            im = im * (1.0 + np.random.rand() * 0.5)
-            im = np.clip(im, 0.0, 255.0)
-        #add darker
-        if np.random.rand() < 0.1 * 0.5:
-            im = im * (1.0 - np.random.rand() * 0.5)
-            im = np.clip(im, 0.0, 255.0)
-        
-        # Padding the im to [input_size, input_size]
-        new_h, new_w, _ = im.shape
-        if min(new_w, new_h) < self.input_size * 0.5:
-            return None
-
-        im_padded = np.ones((self.input_size, self.input_size, 3), dtype=np.float32)
-        im_padded[:, :, 2] = 0.485 * 255
-        im_padded[:, :, 1] = 0.456 * 255
-        im_padded[:, :, 0] = 0.406 * 255
-
-        # Random the start position
-        del_h = self.input_size - new_h
-        del_w = self.input_size - new_w
-        sh, sw = 0, 0
-        if del_h > 1:
-            sh = int(np.random.rand() * del_h)
-        if del_w > 1:
-            sw = int(np.random.rand() * del_w)
-
-        # Padding
-        im_padded[sh: sh + new_h, sw: sw + new_w, :] = im.copy()
-        text_polys[:, :, 0] += sw
-        text_polys[:, :, 1] += sh
-
-        score_map, border_map, training_mask = self.generate_tcl_label((self.input_size, self.input_size), 
-                            text_polys, text_tags, 0.25)
-        
-        # SAST head
-        tvo_map, tco_map = self.generate_tvo_and_tco((self.input_size, self.input_size), text_polys, text_tags,  tcl_ratio=0.3, ds_ratio=0.25)
-        # print("test--------tvo_map shape:", tvo_map.shape)
-
-        im_padded[:, :, 2] -= 0.485 * 255
-        im_padded[:, :, 1] -= 0.456 * 255
-        im_padded[:, :, 0] -= 0.406 * 255
-        im_padded[:, :, 2] /= (255.0 * 0.229) 
-        im_padded[:, :, 1] /= (255.0 * 0.224) 
-        im_padded[:, :, 0] /= (255.0 * 0.225) 
-        im_padded = im_padded.transpose((2, 0, 1))
-
-        return im_padded[::-1, :, :], score_map[np.newaxis, :, :], border_map.transpose((2, 0, 1)), training_mask[np.newaxis, :, :], tvo_map.transpose((2, 0, 1)), tco_map.transpose((2, 0, 1))
-
-    
-class SASTProcessTest(object):
-    """
-    SAST process function for test
-    """
-    def __init__(self, params):
-        super(SASTProcessTest, self).__init__()
-        if 'max_side_len' in params:
-            self.max_side_len = params['max_side_len']
-        else:
-            self.max_side_len = 2400
-
-    def resize_image(self, im):
-        """
-        resize image to a size multiple of max_stride which is required by the network
-        :param im: the resized image
-        :param max_side_len: limit of max image size to avoid out of memory in gpu
-        :return: the resized image and the resize ratio
-        """
-        h, w, _ = im.shape
-
-        resize_w = w
-        resize_h = h
-
-        # Fix the longer side
-        if resize_h > resize_w:
-            ratio = float(self.max_side_len) / resize_h
-        else:
-            ratio = float(self.max_side_len) / resize_w
-
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-
-        max_stride = 128
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-
-        return im, (ratio_h, ratio_w)
-
-    def __call__(self, im):
-        src_h, src_w, _ = im.shape
-        im, (ratio_h, ratio_w) = self.resize_image(im)
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im[:, :, ::-1].astype(np.float32)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        im = im.transpose((2, 0, 1))
-        im = im[np.newaxis, :]
-        return [im, (ratio_h, ratio_w, src_h, src_w)]
--- a/ppocr/data/imaug/init.py
+++ b/ppocr/data/imaug/init.py
@ -0,0 +1,59 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from .iaa_augment import IaaAugment
+from .make_border_map import MakeBorderMap
+from .make_shrink_map import MakeShrinkMap
+from .random_crop_data import EastRandomCropData, PSERandomCrop
+
+from .rec_img_aug import RecAug, RecResizeImg
+
+from .operators import *
+from .label_ops import *
+
+
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+
+
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), ('operator config should be a list')
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator,
+                          dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
--- a/ppocr/data/imaug/iaa_augment.py
+++ b/ppocr/data/imaug/iaa_augment.py
@ -0,0 +1,101 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import imgaug
+import imgaug.augmenters as iaa
+
+
+class AugmenterBuilder(object):
+    def __init__(self):
+        pass
+
+    def build(self, args, root=True):
+        if args is None or len(args) == 0:
+            return None
+        elif isinstance(args, list):
+            if root:
+                sequence = [self.build(value, root=False) for value in args]
+                return iaa.Sequential(sequence)
+            else:
+                return getattr(iaa, args[0])(
+                    *[self.to_tuple_if_list(a) for a in args[1:]])
+        elif isinstance(args, dict):
+            cls = getattr(iaa, args['type'])
+            return cls(**{
+                k: self.to_tuple_if_list(v)
+                for k, v in args['args'].items()
+            })
+        else:
+            raise RuntimeError('unknown augmenter arg: ' + str(args))
+
+    def to_tuple_if_list(self, obj):
+        if isinstance(obj, list):
+            return tuple(obj)
+        return obj
+
+
+class IaaAugment():
+    def __init__(self, augmenter_args=None, **kwargs):
+        if augmenter_args is None:
+            augmenter_args = [{
+                'type': 'Fliplr',
+                'args': {
+                    'p': 0.5
+                }
+            }, {
+                'type': 'Affine',
+                'args': {
+                    'rotate': [-10, 10]
+                }
+            }, {
+                'type': 'Resize',
+                'args': {
+                    'size': [0.5, 3]
+                }
+            }]
+        self.augmenter = AugmenterBuilder().build(augmenter_args)
+
+    def __call__(self, data):
+        image = data['image']
+        shape = image.shape
+
+        if self.augmenter:
+            aug = self.augmenter.to_deterministic()
+            data['image'] = aug.augment_image(image)
+            data = self.may_augment_annotation(aug, data, shape)
+        return data
+
+    def may_augment_annotation(self, aug, data, shape):
+        if aug is None:
+            return data
+
+        line_polys = []
+        for poly in data['polys']:
+            new_poly = self.may_augment_poly(aug, shape, poly)
+            line_polys.append(new_poly)
+        data['polys'] = np.array(line_polys)
+        return data
+
+    def may_augment_poly(self, aug, img_shape, poly):
+        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+        keypoints = aug.augment_keypoints(
+            [imgaug.KeypointsOnImage(
+                keypoints, shape=img_shape)])[0].keypoints
+        poly = [(p.x, p.y) for p in keypoints]
+        return poly
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@ -0,0 +1,197 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+from ppocr.utils.logging import get_logger
+
+
+class DetLabelEncode(object):
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        import json
+        label = data['label']
+        label = json.loads(label)
+        nBox = len(label)
+        boxes, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            box = label[bno]['points']
+            txt = label[bno]['transcription']
+            boxes.append(box)
+            txts.append(txt)
+            if txt in ['*', '###']:
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        boxes = np.array(boxes, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+
+        data['polys'] = boxes
+        data['texts'] = txts
+        data['ignore_tags'] = txt_tags
+        return data
+
+    def order_points_clockwise(self, pts):
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]
+        rect[3] = pts[np.argmax(diff)]
+        return rect
+
+
+class BaseRecLabelEncode(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False):
+        support_character_type = ['ch', 'en', 'en_sensitive']
+        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
+            support_character_type, self.character_str)
+
+        self.max_text_len = max_text_length
+        if character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif character_type == "ch":
+            self.character_str = ""
+            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            if use_space_char:
+                self.character_str += " "
+            dict_character = list(self.character_str)
+        elif character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            import string
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        self.character_type = character_type
+        dict_character = self.add_special_char(dict_character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+
+    def add_special_char(self, dict_character):
+        return dict_character
+
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if len(text) > self.max_text_len:
+            return None
+        if self.character_type == "en":
+            text = text.lower()
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                # logger = get_logger()
+                # logger.warning('{} is not in dict'.format(char))
+                continue
+            text_list.append(self.dict[char])
+        if len(text_list) == 0:
+            return None
+        return text_list
+
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+
+
+class CTCLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(CTCLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character
+
+
+class AttnLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(AttnLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+
+    def add_special_char(self, dict_character):
+        dict_character = [self.beg_str, self.end_str] + dict_character
+        return dict_character
+
+    def __call__(self, text):
+        text = self.encode(text)
+        return text
+
+    def get_ignored_tokens(self):
+        beg_idx = self.get_beg_end_flag_idx("beg")
+        end_idx = self.get_beg_end_flag_idx("end")
+        return [beg_idx, end_idx]
+
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if beg_or_end == "beg":
+            idx = np.array(self.dict[self.beg_str])
+        elif beg_or_end == "end":
+            idx = np.array(self.dict[self.end_str])
+        else:
+            assert False, "Unsupport type %s in get_beg_end_flag_idx" \
+                          % beg_or_end
+        return idx
--- a/ppocr/data/imaug/make_border_map.py
+++ b/ppocr/data/imaug/make_border_map.py
@ -0,0 +1,157 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+import sys
+import warnings
+
+warnings.simplefilter("ignore")
+
+__all__ = ['MakeBorderMap']
+
+
+class MakeBorderMap(object):
+    def __init__(self,
+                 shrink_ratio=0.4,
+                 thresh_min=0.3,
+                 thresh_max=0.7,
+                 **kwargs):
+        self.shrink_ratio = shrink_ratio
+        self.thresh_min = thresh_min
+        self.thresh_max = thresh_max
+
+    def __call__(self, data: dict) -> dict:
+
+        img = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+
+        canvas = np.zeros(img.shape[:2], dtype=np.float32)
+        mask = np.zeros(img.shape[:2], dtype=np.float32)
+
+        for i in range(len(text_polys)):
+            if ignore_tags[i]:
+                continue
+            self.draw_border_map(text_polys[i], canvas, mask=mask)
+        canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
+
+        data['threshold_map'] = canvas
+        data['threshold_mask'] = mask
+        return data
+
+    def draw_border_map(self, polygon, canvas, mask):
+        polygon = np.array(polygon)
+        assert polygon.ndim == 2
+        assert polygon.shape[1] == 2
+
+        polygon_shape = Polygon(polygon)
+        if polygon_shape.area <= 0:
+            return
+        distance = polygon_shape.area * (
+            1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+        subject = [tuple(l) for l in polygon]
+        padding = pyclipper.PyclipperOffset()
+        padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+
+        padded_polygon = np.array(padding.Execute(distance)[0])
+        cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+
+        xmin = padded_polygon[:, 0].min()
+        xmax = padded_polygon[:, 0].max()
+        ymin = padded_polygon[:, 1].min()
+        ymax = padded_polygon[:, 1].max()
+        width = xmax - xmin + 1
+        height = ymax - ymin + 1
+
+        polygon[:, 0] = polygon[:, 0] - xmin
+        polygon[:, 1] = polygon[:, 1] - ymin
+
+        xs = np.broadcast_to(
+            np.linspace(
+                0, width - 1, num=width).reshape(1, width), (height, width))
+        ys = np.broadcast_to(
+            np.linspace(
+                0, height - 1, num=height).reshape(height, 1), (height, width))
+
+        distance_map = np.zeros(
+            (polygon.shape[0], height, width), dtype=np.float32)
+        for i in range(polygon.shape[0]):
+            j = (i + 1) % polygon.shape[0]
+            absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])
+            distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+        distance_map = distance_map.min(axis=0)
+
+        xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+        xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+        ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+        ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+            1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+                             xmin_valid - xmin:xmax_valid - xmax + width],
+            canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+
+    def _distance(self, xs, ys, point_1, point_2):
+        '''
+        compute the distance from point to a line
+        ys: coordinates in the first axis
+        xs: coordinates in the second axis
+        point_1, point_2: (x, y), the end of the line
+        '''
+        height, width = xs.shape[:2]
+        square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[
+            1])
+        square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[
+            1])
+        square_distance = np.square(point_1[0] - point_2[0]) + np.square(
+            point_1[1] - point_2[1])
+
+        cosin = (square_distance - square_distance_1 - square_distance_2) / (
+            2 * np.sqrt(square_distance_1 * square_distance_2))
+        square_sin = 1 - np.square(cosin)
+        square_sin = np.nan_to_num(square_sin)
+        result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+                         square_distance)
+
+        result[cosin <
+               0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin
+                                                                           < 0]
+        # self.extend_line(point_1, point_2, result)
+        return result
+
+    def extend_line(self, point_1, point_2, result, shrink_ratio):
+        ex_point_1 = (int(
+            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_1[1] + (point_1[1] - point_2[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_1),
+            tuple(point_1),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        ex_point_2 = (int(
+            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_2[1] + (point_2[1] - point_1[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_2),
+            tuple(point_2),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        return ex_point_1, ex_point_2
--- a/ppocr/data/imaug/make_shrink_map.py
+++ b/ppocr/data/imaug/make_shrink_map.py
@ -0,0 +1,94 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+__all__ = ['MakeShrinkMap']
+
+
+class MakeShrinkMap(object):
+    r'''
+    Making binary mask from detection data with ICDAR format.
+    Typically following the process of class `MakeICDARData`.
+    '''
+
+    def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):
+        self.min_text_size = min_text_size
+        self.shrink_ratio = shrink_ratio
+
+    def __call__(self, data):
+        image = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+
+        h, w = image.shape[:2]
+        text_polys, ignore_tags = self.validate_polygons(text_polys,
+                                                         ignore_tags, h, w)
+        gt = np.zeros((h, w), dtype=np.float32)
+        # gt = np.zeros((1, h, w), dtype=np.float32)
+        mask = np.ones((h, w), dtype=np.float32)
+        for i in range(len(text_polys)):
+            polygon = text_polys[i]
+            height = max(polygon[:, 1]) - min(polygon[:, 1])
+            width = max(polygon[:, 0]) - min(polygon[:, 0])
+            if ignore_tags[i] or min(height, width) < self.min_text_size:
+                cv2.fillPoly(mask,
+                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                ignore_tags[i] = True
+            else:
+                polygon_shape = Polygon(polygon)
+                distance = polygon_shape.area * (
+                    1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+                subject = [tuple(l) for l in text_polys[i]]
+                padding = pyclipper.PyclipperOffset()
+                padding.AddPath(subject, pyclipper.JT_ROUND,
+                                pyclipper.ET_CLOSEDPOLYGON)
+                shrinked = padding.Execute(-distance)
+                if shrinked == []:
+                    cv2.fillPoly(mask,
+                                 polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                    ignore_tags[i] = True
+                    continue
+                shrinked = np.array(shrinked[0]).reshape(-1, 2)
+                cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+                # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
+
+        data['shrink_map'] = gt
+        data['shrink_mask'] = mask
+        return data
+
+    def validate_polygons(self, polygons, ignore_tags, h, w):
+        '''
+        polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+        '''
+        if len(polygons) == 0:
+            return polygons, ignore_tags
+        assert len(polygons) == len(ignore_tags)
+        for polygon in polygons:
+            polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+            polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+
+        for i in range(len(polygons)):
+            area = self.polygon_area(polygons[i])
+            if abs(area) < 1:
+                ignore_tags[i] = True
+            if area > 0:
+                polygons[i] = polygons[i][::-1, :]
+        return polygons, ignore_tags
+
+    def polygon_area(self, polygon):
+        # return cv2.contourArea(polygon.astype(np.float32))
+        edge = 0
+        for i in range(polygon.shape[0]):
+            next_index = (i + 1) % polygon.shape[0]
+            edge += (polygon[next_index, 0] - polygon[i, 0]) * (
+                polygon[next_index, 1] - polygon[i, 1])
+
+        return edge / 2.
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@ -0,0 +1,185 @@
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import sys
+import six
+import cv2
+import numpy as np
+
+
+class DecodeImage(object):
+    """ decode image """
+
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+
+        data['image'] = img
+        return data
+
+
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std
+    """
+
+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+
+        assert isinstance(img,
+                          np.ndarray), "invalid input 'img' in NormalizeImage"
+        data['image'] = (
+            img.astype('float32') * self.scale - self.mean) / self.std
+        return data
+
+
+class ToCHWImage(object):
+    """ convert hwc image to chw image
+    """
+
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+
+
+class keepKeys(object):
+    def __init__(self, keep_keys, **kwargs):
+        self.keep_keys = keep_keys
+
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+
+
+class DetResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        if 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs['limit_side_len']
+            self.limit_type = kwargs.get('limit_type', 'min')
+        else:
+            self.limit_side_len = 736
+            self.limit_type = 'min'
+
+    def __call__(self, data):
+        img = data['image']
+
+        if self.resize_type == 0:
+            img, shape = self.resize_image_type0(img)
+        else:
+            img, shape = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = shape
+        return data
+
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, np.array([ori_h, ori_w])
+
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w, _ = img.shape
+
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        return img, np.array([h, w])
--- a/ppocr/data/imaug/random_crop_data.py
+++ b/ppocr/data/imaug/random_crop_data.py
@ -0,0 +1,210 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+import random
+
+
+def is_poly_in_rect(poly, x, y, w, h):
+    poly = np.array(poly)
+    if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
+        return False
+    if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
+        return False
+    return True
+
+
+def is_poly_outside_rect(poly, x, y, w, h):
+    poly = np.array(poly)
+    if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
+        return True
+    if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
+        return True
+    return False
+
+
+def split_regions(axis):
+    regions = []
+    min_axis = 0
+    for i in range(1, axis.shape[0]):
+        if axis[i] != axis[i - 1] + 1:
+            region = axis[min_axis:i]
+            min_axis = i
+            regions.append(region)
+    return regions
+
+
+def random_select(axis, max_size):
+    xx = np.random.choice(axis, size=2)
+    xmin = np.min(xx)
+    xmax = np.max(xx)
+    xmin = np.clip(xmin, 0, max_size - 1)
+    xmax = np.clip(xmax, 0, max_size - 1)
+    return xmin, xmax
+
+
+def region_wise_random_select(regions, max_size):
+    selected_index = list(np.random.choice(len(regions), 2))
+    selected_values = []
+    for index in selected_index:
+        axis = regions[index]
+        xx = int(np.random.choice(axis, size=1))
+        selected_values.append(xx)
+    xmin = min(selected_values)
+    xmax = max(selected_values)
+    return xmin, xmax
+
+
+def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
+    h, w, _ = im.shape
+    h_array = np.zeros(h, dtype=np.int32)
+    w_array = np.zeros(w, dtype=np.int32)
+    for points in text_polys:
+        points = np.round(points, decimals=0).astype(np.int32)
+        minx = np.min(points[:, 0])
+        maxx = np.max(points[:, 0])
+        w_array[minx:maxx] = 1
+        miny = np.min(points[:, 1])
+        maxy = np.max(points[:, 1])
+        h_array[miny:maxy] = 1
+    # ensure the cropped area not across a text
+    h_axis = np.where(h_array == 0)[0]
+    w_axis = np.where(w_array == 0)[0]
+
+    if len(h_axis) == 0 or len(w_axis) == 0:
+        return 0, 0, w, h
+
+    h_regions = split_regions(h_axis)
+    w_regions = split_regions(w_axis)
+
+    for i in range(max_tries):
+        if len(w_regions) > 1:
+            xmin, xmax = region_wise_random_select(w_regions, w)
+        else:
+            xmin, xmax = random_select(w_axis, w)
+        if len(h_regions) > 1:
+            ymin, ymax = region_wise_random_select(h_regions, h)
+        else:
+            ymin, ymax = random_select(h_axis, h)
+
+        if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
+            # area too small
+            continue
+        num_poly_in_rect = 0
+        for poly in text_polys:
+            if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
+                                        ymax - ymin):
+                num_poly_in_rect += 1
+                break
+
+        if num_poly_in_rect > 0:
+            return xmin, ymin, xmax - xmin, ymax - ymin
+
+    return 0, 0, w, h
+
+
+class EastRandomCropData(object):
+    def __init__(self,
+                 size=(640, 640),
+                 max_tries=10,
+                 min_crop_side_ratio=0.1,
+                 keep_ratio=True,
+                 **kwargs):
+        self.size = size
+        self.max_tries = max_tries
+        self.min_crop_side_ratio = min_crop_side_ratio
+        self.keep_ratio = keep_ratio
+
+    def __call__(self, data):
+        img = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+        texts = data['texts']
+        all_care_polys = [
+            text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
+        ]
+        # 计算crop区域
+        crop_x, crop_y, crop_w, crop_h = crop_area(
+            img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
+        # crop 图片 保持比例填充
+        scale_w = self.size[0] / crop_w
+        scale_h = self.size[1] / crop_h
+        scale = min(scale_w, scale_h)
+        h = int(crop_h * scale)
+        w = int(crop_w * scale)
+        if self.keep_ratio:
+            padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
+                              img.dtype)
+            padimg[:h, :w] = cv2.resize(
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+            img = padimg
+        else:
+            img = cv2.resize(
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+                tuple(self.size))
+        # crop 文本框
+        text_polys_crop = []
+        ignore_tags_crop = []
+        texts_crop = []
+        for poly, text, tag in zip(text_polys, texts, ignore_tags):
+            poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+            if not is_poly_outside_rect(poly, 0, 0, w, h):
+                text_polys_crop.append(poly)
+                ignore_tags_crop.append(tag)
+                texts_crop.append(text)
+        data['image'] = img
+        data['polys'] = np.array(text_polys_crop)
+        data['ignore_tags'] = ignore_tags_crop
+        data['texts'] = texts_crop
+        return data
+
+
+class PSERandomCrop(object):
+    def __init__(self, size, **kwargs):
+        self.size = size
+
+    def __call__(self, data):
+        imgs = data['imgs']
+
+        h, w = imgs[0].shape[0:2]
+        th, tw = self.size
+        if w == tw and h == th:
+            return imgs
+
+        # label中存在文本实例，并且按照概率进行裁剪，使用threshold_label_map控制
+        if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
+            # 文本实例的左上角点
+            tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+            tl[tl < 0] = 0
+            # 文本实例的右下角点
+            br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+            br[br < 0] = 0
+            # 保证选到右下角点时，有足够的距离进行crop
+            br[0] = min(br[0], h - th)
+            br[1] = min(br[1], w - tw)
+
+            for _ in range(50000):
+                i = random.randint(tl[0], br[0])
+                j = random.randint(tl[1], br[1])
+                # 保证shrink_label_map有文本
+                if imgs[1][i:i + th, j:j + tw].sum() <= 0:
+                    continue
+                else:
+                    break
+        else:
+            i = random.randint(0, h - th)
+            j = random.randint(0, w - tw)
+
+        # return i, j, th, tw
+        for idx in range(len(imgs)):
+            if len(imgs[idx].shape) == 3:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
+            else:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw]
+        data['imgs'] = imgs
+        return data
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@ -1,31 +1,70 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 import math
 import cv2
 import numpy as np
 import random
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
+
+from .text_image_aug import tia_perspective, tia_stretch, tia_distort


-def get_bounding_box_rect(pos):
-    left = min(pos[0])
-    right = max(pos[0])
-    top = min(pos[1])
-    bottom = max(pos[1])
-    return [left, top, right, bottom]
+class RecAug(object):
+    def __init__(self, **kwargsz):
+        pass
+
+    def __call__(self, data):
+        img = data['image']
+        img = warp(img, 10)
+        data['image'] = img
+        return data
+
+
+class RecResizeImg(object):
+    def __init__(self,
+                 image_shape,
+                 infer_mode=False,
+                 character_type='ch',
+                 use_tps=False,
+                 **kwargs):
+        self.image_shape = image_shape
+        self.infer_mode = infer_mode
+        self.character_type = character_type
+        self.use_tps = use_tps
+
+    def __call__(self, data):
+        img = data['image']
+        if self.infer_mode and self.character_type == "ch" and not self.use_tps:
+            norm_img = resize_norm_img_chinese(img, self.image_shape)
+        else:
+            norm_img = resize_norm_img(img, self.image_shape)
+        data['image'] = norm_img
+        return data


 def resize_norm_img(img, image_shape):
@ -77,19 +116,6 @@ def resize_norm_img_chinese(img, image_shape):
    return padding_im


-def get_img_data(value):
-    """get_img_data"""
-    if not value:
-        return None
-    imgdata = np.frombuffer(value, dtype='uint8')
-    if imgdata is None:
-        return None
-    imgori = cv2.imdecode(imgdata, 1)
-    if imgori is None:
-        return None
-    return imgori
-
-
 def flag():
    """
    flag
@ -196,6 +222,9 @@ class Config:
        self.h = h

        self.perspective = True
+        self.stretch = True
+        self.distort = True
+
        self.crop = True
        self.affine = False
        self.reverse = True
@ -299,168 +328,39 @@ def warp(img, ang):
    config.make(w, h, ang)
    new_img = img

+    prob = 0.4
+
+    if config.distort:
+        img_height, img_width = img.shape[0:2]
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
+            new_img = tia_distort(new_img, random.randint(3, 6))
+
+    if config.stretch:
+        img_height, img_width = img.shape[0:2]
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
+            new_img = tia_stretch(new_img, random.randint(3, 6))
+
    if config.perspective:
-        tp = random.randint(1, 100)
-        if tp >= 50:
-            warpR, (r1, c1), ratio, dst = get_warpR(config)
-            new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
-            new_img = cv2.warpPerspective(
-                new_img,
-                warpR, (int(new_w * ratio), h),
-                borderMode=config.borderMode)
+        if random.random() <= prob:
+            new_img = tia_perspective(new_img)
+
    if config.crop:
        img_height, img_width = img.shape[0:2]
-        tp = random.randint(1, 100)
-        if tp >= 50 and img_height >= 20 and img_width >= 20:
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
            new_img = get_crop(new_img)
-    if config.affine:
-        warpT = get_warpAffine(config)
-        new_img = cv2.warpAffine(
-            new_img, warpT, (w, h), borderMode=config.borderMode)
+
    if config.blur:
-        tp = random.randint(1, 100)
-        if tp >= 50:
+        if random.random() <= prob:
            new_img = blur(new_img)
    if config.color:
-        tp = random.randint(1, 100)
-        if tp >= 50:
+        if random.random() <= prob:
            new_img = cvtColor(new_img)
    if config.jitter:
        new_img = jitter(new_img)
    if config.noise:
-        tp = random.randint(1, 100)
-        if tp >= 50:
+        if random.random() <= prob:
            new_img = add_gasuss_noise(new_img)
    if config.reverse:
-        tp = random.randint(1, 100)
-        if tp >= 50:
+        if random.random() <= prob:
            new_img = 255 - new_img
    return new_img
-
-
-def process_image(img,
-                  image_shape,
-                  label=None,
-                  char_ops=None,
-                  loss_type=None,
-                  max_text_length=None,
-                  tps=None,
-                  infer_mode=False,
-                  distort=False):
-    if distort:
-        img = warp(img, 10)
-    if infer_mode and char_ops.character_type == "ch" and not tps:
-        norm_img = resize_norm_img_chinese(img, image_shape)
-    else:
-        norm_img = resize_norm_img(img, image_shape)
-
-    norm_img = norm_img[np.newaxis, :]
-    if label is not None:
-        # char_num = char_ops.get_char_num()
-        text = char_ops.encode(label)
-        if len(text) == 0 or len(text) > max_text_length:
-            logger.info(
-                "Warning in ppocr/data/rec/img_tools.py: Wrong data type."
-                "Excepted string with length between 1 and {}, but "
-                "got '{}'. Label is '{}'".format(max_text_length,
-                                                 len(text), label))
-            return None
-        else:
-            if loss_type == "ctc":
-                text = text.reshape(-1, 1)
-                return (norm_img, text)
-            elif loss_type == "attention":
-                beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
-                end_flag_idx = char_ops.get_beg_end_flag_idx("end")
-                beg_text = np.append(beg_flag_idx, text)
-                end_text = np.append(text, end_flag_idx)
-                beg_text = beg_text.reshape(-1, 1)
-                end_text = end_text.reshape(-1, 1)
-                return (norm_img, beg_text, end_text)
-            else:
-                assert False, "Unsupport loss_type %s in process_image"\
-                    % loss_type
-    return (norm_img)
-
-def resize_norm_img_srn(img, image_shape):
-    imgC, imgH, imgW = image_shape
-
-    img_black = np.zeros((imgH, imgW))
-    im_hei = img.shape[0]
-    im_wid = img.shape[1]
-
-    if im_wid <= im_hei * 1:
-        img_new = cv2.resize(img, (imgH * 1, imgH))
-    elif im_wid <= im_hei * 2:
-        img_new = cv2.resize(img, (imgH * 2, imgH))
-    elif im_wid <= im_hei * 3:
-        img_new = cv2.resize(img, (imgH * 3, imgH))
-    else:
-        img_new = cv2.resize(img, (imgW, imgH))
-
-    img_np = np.asarray(img_new)
-    img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
-    img_black[:, 0:img_np.shape[1]] = img_np
-    img_black = img_black[:, :, np.newaxis]
-
-    row, col, c = img_black.shape
-    c = 1
-
-    return np.reshape(img_black, (c, row, col)).astype(np.float32)
-
-def srn_other_inputs(image_shape,
-                     num_heads,
-                     max_text_length,
-                     char_num):
-
-    imgC, imgH, imgW = image_shape
-    feature_dim = int((imgH / 8) * (imgW / 8))
-
-    encoder_word_pos = np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype('int64')
-    gsrm_word_pos = np.array(range(0, max_text_length)).reshape((max_text_length, 1)).astype('int64')
-
-    lbl_weight = np.array([int(char_num-1)] * max_text_length).reshape((-1,1)).astype('int64')
-
-    gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) 
-    gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape([-1, 1, max_text_length, max_text_length])
-    gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, [1, num_heads, 1, 1]) * [-1e9] 
-
-    gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape([-1, 1, max_text_length, max_text_length])
-    gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, [1, num_heads, 1, 1]) * [-1e9] 
-
-    encoder_word_pos = encoder_word_pos[np.newaxis, :]
-    gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
-
-    return [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2]
-
-def process_image_srn(img,
-                      image_shape,
-                      num_heads,
-                      max_text_length,
-                      label=None,
-                      char_ops=None,
-                      loss_type=None):
-    norm_img = resize_norm_img_srn(img, image_shape)
-    norm_img = norm_img[np.newaxis, :]
-    char_num = char_ops.get_char_num()
-
-    [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
-        srn_other_inputs(image_shape, num_heads, max_text_length,char_num)
-
-    if label is not None:
-        text = char_ops.encode(label)
-        if len(text) == 0 or len(text) > max_text_length:
-            return None
-        else:
-            if loss_type == "srn":
-                text_padded = [int(char_num-1)] * max_text_length
-                for i in range(len(text)):
-                    text_padded[i] = text[i]
-                    lbl_weight[i] = [1.0]
-                text_padded = np.array(text_padded)
-                text = text_padded.reshape(-1, 1)
-                return (norm_img, text,encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2,lbl_weight)
-            else:
-                assert False, "Unsupport loss_type %s in process_image"\
-                    % loss_type
-    return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2)
--- a/ppocr/data/imaug/text_image_aug/init.py
+++ b/ppocr/data/imaug/text_image_aug/init.py
@ -1,13 +1,17 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from .augment import tia_perspective, tia_distort, tia_stretch
+
+__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective']
--- a/ppocr/data/imaug/text_image_aug/augment.py
+++ b/ppocr/data/imaug/text_image_aug/augment.py
@ -0,0 +1,116 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from .warp_mls import WarpMLS
+
+
+def tia_distort(src, segment=4):
+    img_h, img_w = src.shape[:2]
+
+    cut = img_w // segment
+    thresh = cut // 3
+
+    src_pts = list()
+    dst_pts = list()
+
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+
+    dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
+    dst_pts.append(
+        [np.random.randint(thresh), img_h - np.random.randint(thresh)])
+
+    half_thresh = thresh * 0.5
+
+    for cut_idx in np.arange(1, segment, 1):
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            np.random.randint(thresh) - half_thresh
+        ])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            img_h + np.random.randint(thresh) - half_thresh
+        ])
+
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+
+    return dst
+
+
+def tia_stretch(src, segment=4):
+    img_h, img_w = src.shape[:2]
+
+    cut = img_w // segment
+    thresh = cut * 4 // 5
+
+    src_pts = list()
+    dst_pts = list()
+
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+
+    dst_pts.append([0, 0])
+    dst_pts.append([img_w, 0])
+    dst_pts.append([img_w, img_h])
+    dst_pts.append([0, img_h])
+
+    half_thresh = thresh * 0.5
+
+    for cut_idx in np.arange(1, segment, 1):
+        move = np.random.randint(thresh) - half_thresh
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([cut * cut_idx + move, 0])
+        dst_pts.append([cut * cut_idx + move, img_h])
+
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+
+    return dst
+
+
+def tia_perspective(src):
+    img_h, img_w = src.shape[:2]
+
+    thresh = img_h // 2
+
+    src_pts = list()
+    dst_pts = list()
+
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+
+    dst_pts.append([0, np.random.randint(thresh)])
+    dst_pts.append([img_w, np.random.randint(thresh)])
+    dst_pts.append([img_w, img_h - np.random.randint(thresh)])
+    dst_pts.append([0, img_h - np.random.randint(thresh)])
+
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+
+    return dst
--- a/ppocr/data/imaug/text_image_aug/warp_mls.py
+++ b/ppocr/data/imaug/text_image_aug/warp_mls.py
@ -0,0 +1,164 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+class WarpMLS:
+    def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.):
+        self.src = src
+        self.src_pts = src_pts
+        self.dst_pts = dst_pts
+        self.pt_count = len(self.dst_pts)
+        self.dst_w = dst_w
+        self.dst_h = dst_h
+        self.trans_ratio = trans_ratio
+        self.grid_size = 100
+        self.rdx = np.zeros((self.dst_h, self.dst_w))
+        self.rdy = np.zeros((self.dst_h, self.dst_w))
+
+    @staticmethod
+    def __bilinear_interp(x, y, v11, v12, v21, v22):
+        return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 *
+                                                      (1 - y) + v22 * y) * x
+
+    def generate(self):
+        self.calc_delta()
+        return self.gen_img()
+
+    def calc_delta(self):
+        w = np.zeros(self.pt_count, dtype=np.float32)
+
+        if self.pt_count < 2:
+            return
+
+        i = 0
+        while 1:
+            if self.dst_w <= i < self.dst_w + self.grid_size - 1:
+                i = self.dst_w - 1
+            elif i >= self.dst_w:
+                break
+
+            j = 0
+            while 1:
+                if self.dst_h <= j < self.dst_h + self.grid_size - 1:
+                    j = self.dst_h - 1
+                elif j >= self.dst_h:
+                    break
+
+                sw = 0
+                swp = np.zeros(2, dtype=np.float32)
+                swq = np.zeros(2, dtype=np.float32)
+                new_pt = np.zeros(2, dtype=np.float32)
+                cur_pt = np.array([i, j], dtype=np.float32)
+
+                k = 0
+                for k in range(self.pt_count):
+                    if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                        break
+
+                    w[k] = 1. / (
+                        (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) +
+                        (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]))
+
+                    sw += w[k]
+                    swp = swp + w[k] * np.array(self.dst_pts[k])
+                    swq = swq + w[k] * np.array(self.src_pts[k])
+
+                if k == self.pt_count - 1:
+                    pstar = 1 / sw * swp
+                    qstar = 1 / sw * swq
+
+                    miu_s = 0
+                    for k in range(self.pt_count):
+                        if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                            continue
+                        pt_i = self.dst_pts[k] - pstar
+                        miu_s += w[k] * np.sum(pt_i * pt_i)
+
+                    cur_pt -= pstar
+                    cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
+
+                    for k in range(self.pt_count):
+                        if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                            continue
+
+                        pt_i = self.dst_pts[k] - pstar
+                        pt_j = np.array([-pt_i[1], pt_i[0]])
+
+                        tmp_pt = np.zeros(2, dtype=np.float32)
+                        tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \
+                                    np.sum(pt_j * cur_pt) * self.src_pts[k][1]
+                        tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \
+                                    np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
+                        tmp_pt *= (w[k] / miu_s)
+                        new_pt += tmp_pt
+
+                    new_pt += qstar
+                else:
+                    new_pt = self.src_pts[k]
+
+                self.rdx[j, i] = new_pt[0] - i
+                self.rdy[j, i] = new_pt[1] - j
+
+                j += self.grid_size
+            i += self.grid_size
+
+    def gen_img(self):
+        src_h, src_w = self.src.shape[:2]
+        dst = np.zeros_like(self.src, dtype=np.float32)
+
+        for i in np.arange(0, self.dst_h, self.grid_size):
+            for j in np.arange(0, self.dst_w, self.grid_size):
+                ni = i + self.grid_size
+                nj = j + self.grid_size
+                w = h = self.grid_size
+                if ni >= self.dst_h:
+                    ni = self.dst_h - 1
+                    h = ni - i + 1
+                if nj >= self.dst_w:
+                    nj = self.dst_w - 1
+                    w = nj - j + 1
+
+                di = np.reshape(np.arange(h), (-1, 1))
+                dj = np.reshape(np.arange(w), (1, -1))
+                delta_x = self.__bilinear_interp(
+                    di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
+                    self.rdx[ni, j], self.rdx[ni, nj])
+                delta_y = self.__bilinear_interp(
+                    di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
+                    self.rdy[ni, j], self.rdy[ni, nj])
+                nx = j + dj + delta_x * self.trans_ratio
+                ny = i + di + delta_y * self.trans_ratio
+                nx = np.clip(nx, 0, src_w - 1)
+                ny = np.clip(ny, 0, src_h - 1)
+                nxi = np.array(np.floor(nx), dtype=np.int32)
+                nyi = np.array(np.floor(ny), dtype=np.int32)
+                nxi1 = np.array(np.ceil(nx), dtype=np.int32)
+                nyi1 = np.array(np.ceil(ny), dtype=np.int32)
+
+                if len(self.src.shape) == 3:
+                    x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
+                    y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
+                else:
+                    x = ny - nyi
+                    y = nx - nxi
+                dst[i:i + h, j:j + w] = self.__bilinear_interp(
+                    x, y, self.src[nyi, nxi], self.src[nyi, nxi1],
+                    self.src[nyi1, nxi], self.src[nyi1, nxi1])
+
+        dst = np.clip(dst, 0, 255)
+        dst = np.array(dst, dtype=np.uint8)
+
+        return dst
--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@ -1,77 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import random
-import numpy as np
-
-import paddle
-from ppocr.utils.utility import create_module
-from copy import deepcopy
-
-from .rec.img_tools import process_image
-import cv2
-
-import sys
-import signal
-
-
-# handle terminate reader process, do not print stack frame
-def _reader_quit(signum, frame):
-    print("Reader process exit.")
-    sys.exit()
-
-
-def _term_group(sig_num, frame):
-    print('pid {} terminated, terminate group '
-          '{}...'.format(os.getpid(), os.getpgrp()))
-    os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
-
-
-signal.signal(signal.SIGTERM, _reader_quit)
-signal.signal(signal.SIGINT, _term_group)
-
-
-def reader_main(config=None, mode=None):
-    """Create a reader for trainning
-
-    Args:
-        settings: arguments
-
-    Returns:
-        train reader
-    """
-    assert mode in ["train", "eval", "test"],\
-        "Nonsupport mode:{}".format(mode)
-    global_params = config['Global']
-    if mode == "train":
-        params = deepcopy(config['TrainReader'])
-    elif mode == "eval":
-        params = deepcopy(config['EvalReader'])
-    else:
-        params = deepcopy(config['TestReader'])
-    params['mode'] = mode
-    params.update(global_params)
-    reader_function = params['reader_function']
-    function = create_module(reader_function)(params)
-    if mode == "train":
-        if sys.platform == "win32":
-            return function(0)
-        readers = []
-        num_workers = params['num_workers']
-        for process_id in range(num_workers):
-            readers.append(function(process_id))
-        return paddle.reader.multiprocess_reader(readers, False)
-    else:
-        return function(mode)
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@ -1,335 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import math
-import random
-import numpy as np
-import cv2
-
-import string
-import lmdb
-
-from ppocr.utils.utility import initial_logger
-from ppocr.utils.utility import get_image_file_list
-logger = initial_logger()
-
-from .img_tools import process_image, process_image_srn, get_img_data
-
-
-class LMDBReader(object):
-    def __init__(self, params):
-        if params['mode'] != 'train':
-            self.num_workers = 1
-        else:
-            self.num_workers = params['num_workers']
-        self.lmdb_sets_dir = params['lmdb_sets_dir']
-        self.char_ops = params['char_ops']
-        self.image_shape = params['image_shape']
-        self.loss_type = params['loss_type']
-        self.max_text_length = params['max_text_length']
-        self.mode = params['mode']
-        self.drop_last = False
-        self.use_tps = False
-        self.num_heads = None
-        if "num_heads" in params:
-            self.num_heads = params['num_heads']
-        if "tps" in params:
-            self.ues_tps = True
-        self.use_distort = False
-        if "distort" in params:
-            self.use_distort = params['distort'] and params['use_gpu']
-            if not params['use_gpu']:
-                logger.info(
-                    "Distort operation can only support in GPU. Distort will be set to False."
-                )
-        if params['mode'] == 'train':
-            self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = True
-        else:
-            self.batch_size = params['test_batch_size_per_card']
-            self.drop_last = False
-            self.use_distort = False
-        self.infer_img = params['infer_img']
-
-    def load_hierarchical_lmdb_dataset(self):
-        lmdb_sets = {}
-        dataset_idx = 0
-        for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'):
-            if not dirnames:
-                env = lmdb.open(
-                    dirpath,
-                    max_readers=32,
-                    readonly=True,
-                    lock=False,
-                    readahead=False,
-                    meminit=False)
-                txn = env.begin(write=False)
-                num_samples = int(txn.get('num-samples'.encode()))
-                lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
-                    "txn":txn, "num_samples":num_samples}
-                dataset_idx += 1
-        return lmdb_sets
-
-    def print_lmdb_sets_info(self, lmdb_sets):
-        lmdb_info_strs = []
-        for dataset_idx in range(len(lmdb_sets)):
-            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
-                                   lmdb_sets[dataset_idx]['num_samples'])
-            lmdb_info_strs.append(tmp_str)
-        lmdb_info_strs = ''.join(lmdb_info_strs)
-        logger.info("DataSummary:" + lmdb_info_strs)
-        return
-
-    def close_lmdb_dataset(self, lmdb_sets):
-        for dataset_idx in lmdb_sets:
-            lmdb_sets[dataset_idx]['env'].close()
-        return
-
-    def get_lmdb_sample_info(self, txn, index):
-        label_key = 'label-%09d'.encode() % index
-        label = txn.get(label_key)
-        if label is None:
-            return None
-        label = label.decode('utf-8')
-        img_key = 'image-%09d'.encode() % index
-        imgbuf = txn.get(img_key)
-        img = get_img_data(imgbuf)
-        if img is None:
-            return None
-        return img, label
-
-    def __call__(self, process_id):
-        if self.mode != 'train':
-            process_id = 0
-
-        def sample_iter_reader():
-            if self.mode != 'train' and self.infer_img is not None:
-                image_file_list = get_image_file_list(self.infer_img)
-                for single_img in image_file_list:
-                    img = cv2.imread(single_img)
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    if self.loss_type == 'srn':
-                        norm_img = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length)
-                    else:
-                        norm_img = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            tps=self.use_tps,
-                            infer_mode=True)
-                    yield norm_img
-            else:
-                lmdb_sets = self.load_hierarchical_lmdb_dataset()
-                if process_id == 0:
-                    self.print_lmdb_sets_info(lmdb_sets)
-                cur_index_sets = [1 + process_id] * len(lmdb_sets)
-                while True:
-                    finish_read_num = 0
-                    for dataset_idx in range(len(lmdb_sets)):
-                        cur_index = cur_index_sets[dataset_idx]
-                        if cur_index > lmdb_sets[dataset_idx]['num_samples']:
-                            finish_read_num += 1
-                        else:
-                            sample_info = self.get_lmdb_sample_info(
-                                lmdb_sets[dataset_idx]['txn'], cur_index)
-                            cur_index_sets[dataset_idx] += self.num_workers
-                            if sample_info is None:
-                                continue
-                            img, label = sample_info
-                            outs = []
-                            if self.loss_type == "srn":
-                                outs = process_image_srn(
-                                    img=img,
-                                    image_shape=self.image_shape,
-                                    num_heads=self.num_heads,
-                                    max_text_length=self.max_text_length,
-                                    label=label,
-                                    char_ops=self.char_ops,
-                                    loss_type=self.loss_type)
-
-                            else:
-                                outs = process_image(
-                                    img=img,
-                                    image_shape=self.image_shape,
-                                    label=label,
-                                    char_ops=self.char_ops,
-                                    loss_type=self.loss_type,
-                                    max_text_length=self.max_text_length)
-                            if outs is None:
-                                continue
-                            yield outs
-
-                    if finish_read_num == len(lmdb_sets):
-                        break
-                self.close_lmdb_dataset(lmdb_sets)
-
-        def batch_iter_reader():
-            batch_outs = []
-            for outs in sample_iter_reader():
-                batch_outs.append(outs)
-                if len(batch_outs) == self.batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if not self.drop_last:
-                if len(batch_outs) != 0:
-                    yield batch_outs
-
-        if self.infer_img is None:
-            return batch_iter_reader
-        return sample_iter_reader
-
-
-class SimpleReader(object):
-    def __init__(self, params):
-        if params['mode'] != 'train':
-            self.num_workers = 1
-        else:
-            self.num_workers = params['num_workers']
-        if params['mode'] != 'test':
-            self.img_set_dir = params['img_set_dir']
-            self.label_file_path = params['label_file_path']
-        self.use_gpu = params['use_gpu']
-        self.char_ops = params['char_ops']
-        self.image_shape = params['image_shape']
-        self.loss_type = params['loss_type']
-        self.max_text_length = params['max_text_length']
-        self.mode = params['mode']
-        self.infer_img = params['infer_img']
-        self.use_tps = False
-        if "num_heads" in params:
-            self.num_heads = params['num_heads']
-        if "tps" in params:
-            self.use_tps = True
-        self.use_distort = False
-        if "distort" in params:
-            self.use_distort = params['distort'] and params['use_gpu']
-            if not params['use_gpu']:
-                logger.info(
-                    "Distort operation can only support in GPU.Distort will be set to False."
-                )
-        if params['mode'] == 'train':
-            self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = True
-        else:
-            self.batch_size = params['test_batch_size_per_card']
-            self.drop_last = False
-            self.use_distort = False
-
-    def __call__(self, process_id):
-        if self.mode != 'train':
-            process_id = 0
-
-        def get_device_num():
-            if self.use_gpu:
-                gpus = os.environ.get("CUDA_VISIBLE_DEVICES", '1')
-                gpu_num = len(gpus.split(','))
-                return gpu_num
-            else:
-                cpu_num = os.environ.get("CPU_NUM", 1)
-                return int(cpu_num)
-
-        def sample_iter_reader():
-            if self.mode != 'train' and self.infer_img is not None:
-                image_file_list = get_image_file_list(self.infer_img)
-                for single_img in image_file_list:
-                    img = cv2.imread(single_img)
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    if self.loss_type == 'srn':
-                        norm_img = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length)
-                    else:
-                        norm_img = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            tps=self.use_tps,
-                            infer_mode=True)
-                    yield norm_img
-            else:
-                with open(self.label_file_path, "rb") as fin:
-                    label_infor_list = fin.readlines()
-                img_num = len(label_infor_list)
-                img_id_list = list(range(img_num))
-                random.shuffle(img_id_list)
-                if sys.platform == "win32" and self.num_workers != 1:
-                    print("multiprocess is not fully compatible with Windows."
-                          "num_workers will be 1.")
-                    self.num_workers = 1
-                if self.batch_size * get_device_num(
-                ) * self.num_workers > img_num:
-                    raise Exception(
-                        "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
-                        format(img_num, self.batch_size * get_device_num() *
-                               self.num_workers))
-                for img_id in range(process_id, img_num, self.num_workers):
-                    label_infor = label_infor_list[img_id_list[img_id]]
-                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
-                    img_path = self.img_set_dir + "/" + substr[0]
-                    img = cv2.imread(img_path)
-                    if img is None:
-                        logger.info("{} does not exist!".format(img_path))
-                        continue
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-
-                    label = substr[1]
-                    if self.loss_type == "srn":
-                        outs = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length,
-                            label=label,
-                            char_ops=self.char_ops,
-                            loss_type=self.loss_type)
-
-                    else:
-                        outs = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            label=label,
-                            char_ops=self.char_ops,
-                            loss_type=self.loss_type,
-                            max_text_length=self.max_text_length,
-                            distort=self.use_distort)
-                    if outs is None:
-                        continue
-                    yield outs
-
-        def batch_iter_reader():
-            batch_outs = []
-            for outs in sample_iter_reader():
-                batch_outs.append(outs)
-                if len(batch_outs) == self.batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if not self.drop_last:
-                if len(batch_outs) != 0:
-                    yield batch_outs
-
-        if self.infer_img is None:
-            return batch_iter_reader
-        return sample_iter_reader
--- a/ppocr/metrics/DetMetric.py
+++ b/ppocr/metrics/DetMetric.py
@ -0,0 +1,72 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+__all__ = ['DetMetric']
+
+from .eval_det_iou import DetectionIoUEvaluator
+
+
+class DetMetric(object):
+    def __init__(self, main_indicator='hmean', **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        '''
+       batch: a list produced by dataloaders.
+           image: np.ndarray  of shape (N, C, H, W).
+           ratio_list: np.ndarray  of shape(N,2)
+           polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+           ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+       preds: a list of dict produced by post process
+            points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+       '''
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+        for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
+                                                 ignore_tags_batch):
+            # prepare gt
+            gt_info_list = [{
+                'points': gt_polyon,
+                'text': '',
+                'ignore': ignore_tag
+            } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
+            # prepare det
+            det_info_list = [{
+                'points': det_polyon,
+                'text': ''
+            } for det_polyon in pred['points']]
+            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
+            self.results.append(result)
+
+    def get_metric(self):
+        """
+        return metircs {
+                 'precision': 0,
+                 'recall': 0,
+                 'hmean': 0
+            }
+        """
+
+        metircs = self.evaluator.combine_results(self.results)
+        self.reset()
+        return metircs
+
+    def reset(self):
+        self.results = []  # clear results
--- a/ppocr/metrics/RecMetric.py
+++ b/ppocr/metrics/RecMetric.py
@ -0,0 +1,59 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import Levenshtein
+
+
+class RecMetric(object):
+    def __init__(self, main_indicator='acc', **kwargs):
+        self.main_indicator = main_indicator
+        self.reset()
+
+    def __call__(self, pred_label, *args, **kwargs):
+        preds, labels = pred_label
+        correct_num = 0
+        all_num = 0
+        norm_edit_dis = 0.0
+        for (pred, pred_conf), (target, _) in zip(preds, labels):
+            norm_edit_dis += Levenshtein.distance(pred, target) / max(
+                len(pred), len(target))
+            if pred == target:
+                correct_num += 1
+            all_num += 1
+            # if all_num < 10 and kwargs.get('show_str', False):
+            #     print('{} -> {}'.format(pred, target))
+        self.correct_num += correct_num
+        self.all_num += all_num
+        self.norm_edit_dis += norm_edit_dis
+        return {
+            'acc': correct_num / all_num,
+            'norm_edit_dis': 1 - norm_edit_dis / all_num
+        }
+
+    def get_metric(self):
+        """
+        return metircs {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = self.correct_num / self.all_num
+        norm_edit_dis = 1 - self.norm_edit_dis / self.all_num
+        self.reset()
+        return {'acc': acc, 'norm_edit_dis': norm_edit_dis}
+
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+        self.norm_edit_dis = 0
--- a/ppocr/metrics/init.py
+++ b/ppocr/metrics/init.py
@ -0,0 +1,36 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_metric']
+
+
+def build_metric(config):
+    from .DetMetric import DetMetric
+    from .RecMetric import RecMetric
+
+    support_dict = ['DetMetric', 'RecMetric']
+
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'metric only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/tools/eval_utils/eval_det_iou.py
+++ b/tools/eval_utils/eval_det_iou.py
@ -88,8 +88,8 @@ class DetectionIoUEvaluator(object):
            points = gt[n]['points']
            # transcription = gt[n]['text']
            dontCare = gt[n]['ignore']
-#             points = Polygon(points)
-#             points = points.buffer(0)
+            #             points = Polygon(points)
+            #             points = points.buffer(0)
            if not Polygon(points).is_valid or not Polygon(points).is_simple:
                continue

@ -105,8 +105,8 @@ class DetectionIoUEvaluator(object):

        for n in range(len(pred)):
            points = pred[n]['points']
-#             points = Polygon(points)
-#             points = points.buffer(0)
+            #             points = Polygon(points)
+            #             points = points.buffer(0)
            if not Polygon(points).is_valid or not Polygon(points).is_simple:
                continue

--- a/ppocr/modeling/init.py
+++ b/ppocr/modeling/init.py
@ -11,3 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import copy
+from .losses import build_loss
+
+__all__ = ['build_model', 'build_loss']
+
+
+def build_model(config):
+    from .architectures import Model
+
+    config = copy.deepcopy(config)
+    module_class = Model(config)
+    return module_class
--- a/ppocr/modeling/architectures/init.py
+++ b/ppocr/modeling/architectures/init.py
@ -11,3 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from .model import Model
+__all__ = ['Model']
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@ -1,146 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-
-
-class DetModel(object):
-    def __init__(self, params):
-        """
-        Detection module for OCR text detection.
-        args:
-            params (dict): the super parameters for detection module.
-        """
-        global_params = params['Global']
-        self.algorithm = global_params['algorithm']
-
-        backbone_params = deepcopy(params["Backbone"])
-        backbone_params.update(global_params)
-        self.backbone = create_module(backbone_params['function'])\
-                (params=backbone_params)
-
-        head_params = deepcopy(params["Head"])
-        head_params.update(global_params)
-        self.head = create_module(head_params['function'])\
-                (params=head_params)
-
-        loss_params = deepcopy(params["Loss"])
-        loss_params.update(global_params)
-        self.loss = create_module(loss_params['function'])\
-                (params=loss_params)
-
-        self.image_shape = global_params['image_shape']
-
-    def create_feed(self, mode):
-        """
-        create Dataloader feeds
-        args:
-            mode (str): 'train' for training  or else for evaluation
-        return: (image, corresponding label, dataloader)
-        """
-        image_shape = deepcopy(self.image_shape)
-        if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
-            raise Exception("The size of the image must be divisible by 4, "
-                            "received image shape is {}, please reset the "
-                            "Global.image_shape in the yml file".format(
-                                image_shape))
-
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        if mode == "train":
-            if self.algorithm == "EAST":
-                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
-                score = fluid.layers.data(
-                    name='score', shape=[1, h, w], dtype='float32')
-                geo = fluid.layers.data(
-                    name='geo', shape=[9, h, w], dtype='float32')
-                mask = fluid.layers.data(
-                    name='mask', shape=[1, h, w], dtype='float32')
-                feed_list = [image, score, geo, mask]
-                labels = {'score': score, 'geo': geo, 'mask': mask}
-            elif self.algorithm == "DB":
-                shrink_map = fluid.layers.data(
-                    name='shrink_map', shape=image_shape[1:], dtype='float32')
-                shrink_mask = fluid.layers.data(
-                    name='shrink_mask', shape=image_shape[1:], dtype='float32')
-                threshold_map = fluid.layers.data(
-                    name='threshold_map',
-                    shape=image_shape[1:],
-                    dtype='float32')
-                threshold_mask = fluid.layers.data(
-                    name='threshold_mask',
-                    shape=image_shape[1:],
-                    dtype='float32')
-                feed_list=[image, shrink_map, shrink_mask,\
-                    threshold_map, threshold_mask]
-                labels = {'shrink_map':shrink_map,\
-                    'shrink_mask':shrink_mask,\
-                    'threshold_map':threshold_map,\
-                    'threshold_mask':threshold_mask}
-            elif self.algorithm == "SAST":
-                input_score = fluid.layers.data(
-                    name='score', shape=[1, 128, 128], dtype='float32')
-                input_border = fluid.layers.data(
-                    name='border', shape=[5, 128, 128], dtype='float32')
-                input_mask = fluid.layers.data(
-                    name='mask', shape=[1, 128, 128], dtype='float32')
-                input_tvo = fluid.layers.data(
-                    name='tvo', shape=[9, 128, 128], dtype='float32')
-                input_tco = fluid.layers.data(
-                    name='tco', shape=[3, 128, 128], dtype='float32')
-                feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
-                labels = {'input_score': input_score,\
-                    'input_border': input_border,\
-                    'input_mask': input_mask,\
-                    'input_tvo': input_tvo,\
-                    'input_tco': input_tco}
-            loader = fluid.io.DataLoader.from_generator(
-                feed_list=feed_list,
-                capacity=64,
-                use_double_buffer=True,
-                iterable=False)
-        else:
-            labels = None
-            loader = None
-        return image, labels, loader
-
-    def __call__(self, mode):
-        """
-        run forward of defined module
-        args:
-            mode (str): 'train' for training; 'export'  for inference,
-                others for evaluation]
-        """
-        image, labels, loader = self.create_feed(mode)
-        conv_feas = self.backbone(image)
-        if self.algorithm == "DB":
-            predicts = self.head(conv_feas, mode)
-        else:
-            predicts = self.head(conv_feas)
-        if mode == "train":
-            losses = self.loss(predicts, labels)
-            return loader, losses
-        elif mode == "export":
-            return [image, predicts]
-        else:
-            return loader, predicts
--- a/ppocr/modeling/architectures/model.py
+++ b/ppocr/modeling/architectures/model.py
@ -0,0 +1,129 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os, sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append('/home/zhoujun20/PaddleOCR')
+
+import paddle
+from paddle import nn
+from ppocr.modeling.transform import build_transform
+from ppocr.modeling.backbones import build_backbone
+from ppocr.modeling.necks import build_neck
+from ppocr.modeling.heads import build_head
+
+__all__ = ['Model']
+
+
+class Model(nn.Layer):
+    def __init__(self, config):
+        """
+        Detection module for OCR.
+        args:
+            config (dict): the super parameters for module.
+        """
+        super(Model, self).__init__()
+        algorithm = config['algorithm']
+        self.type = config['type']
+        self.model_name = '{}_{}'.format(self.type, algorithm)
+
+        in_channels = config.get('in_channels', 3)
+        # build transfrom,
+        # for rec, transfrom can be TPS,None
+        # for det and cls, transfrom shoule to be None,
+        #                  if you make model differently, you can use transfrom in det and cls
+        if 'Transform' not in config or config['Transform'] is None:
+            self.use_transform = False
+        else:
+            self.use_transform = True
+            config['Transform']['in_channels'] = in_channels
+            self.transform = build_transform(config['Transform'])
+            in_channels = self.transform.out_channels
+
+        # build backbone, backbone is need for del, rec and cls
+        config["Backbone"]['in_channels'] = in_channels
+        self.backbone = build_backbone(config["Backbone"], self.type)
+        in_channels = self.backbone.out_channels
+
+        # build neck
+        # for rec, neck can be cnn,rnn or reshape(None)
+        # for det, neck can be FPN, BIFPN and so on.
+        # for cls, neck should be none
+        if 'Neck' not in config or config['Neck'] is None:
+            self.use_neck = False
+        else:
+            self.use_neck = True
+            config['Neck']['in_channels'] = in_channels
+            self.neck = build_neck(config['Neck'])
+            in_channels = self.neck.out_channels
+
+        # # build head, head is need for del, rec and cls
+        config["Head"]['in_channels'] = in_channels
+        self.head = build_head(config["Head"])
+
+    # @paddle.jit.to_static
+    def forward(self, x):
+        if self.use_transform:
+            x = self.transform(x)
+        x = self.backbone(x)
+        if self.use_neck:
+            x = self.neck(x)
+        x = self.head(x)
+        return x
+
+
+def check_static():
+    import numpy as np
+    from ppocr.utils.save_load import load_dygraph_pretrain
+    from ppocr.utils.logging import get_logger
+    from tools import program
+
+    config = program.load_config('configs/det/det_r50_vd_db.yml')
+
+    # import cv2
+    # data = cv2.imread('doc/imgs/1.jpg')
+    # data = normalize(data)
+    logger = get_logger()
+    data = np.zeros((1, 3, 640, 640), dtype=np.float32)
+    paddle.disable_static()
+
+    config['Architecture']['in_channels'] = 3
+    config['Architecture']["Head"]['out_channels'] = 6624
+    model = Model(config['Architecture'])
+    model.eval()
+    load_dygraph_pretrain(
+        model,
+        logger,
+        '/Users/zhoujun20/Desktop/code/PaddleOCR/db/db',
+        load_static_weights=True)
+    x = paddle.to_variable(data)
+    y = model(x)
+    for y1 in y:
+        print(y1.shape)
+    #
+    # # from matplotlib import pyplot as plt
+    # # plt.imshow(y.numpy())
+    # # plt.show()
+    static_out = np.load('/Users/zhoujun20/Desktop/code/PaddleOCR/db/db.npy')
+    diff = y.numpy() - static_out
+    print(y.shape, static_out.shape, diff.mean())
+
+
+if __name__ == '__main__':
+    check_static()
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@ -1,228 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-
-
-class RecModel(object):
-    def __init__(self, params):
-        super(RecModel, self).__init__()
-        global_params = params['Global']
-        char_num = global_params['char_ops'].get_char_num()
-        global_params['char_num'] = char_num
-        self.char_type = global_params['character_type']
-        self.infer_img = global_params['infer_img']
-        if "TPS" in params:
-            tps_params = deepcopy(params["TPS"])
-            tps_params.update(global_params)
-            self.tps = create_module(tps_params['function'])\
-                (params=tps_params)
-        else:
-            self.tps = None
-
-        backbone_params = deepcopy(params["Backbone"])
-        backbone_params.update(global_params)
-        self.backbone = create_module(backbone_params['function'])\
-                (params=backbone_params)
-
-        head_params = deepcopy(params["Head"])
-        head_params.update(global_params)
-        self.head = create_module(head_params['function'])\
-                (params=head_params)
-
-        loss_params = deepcopy(params["Loss"])
-        loss_params.update(global_params)
-        self.loss = create_module(loss_params['function'])\
-                (params=loss_params)
-
-        self.loss_type = global_params['loss_type']
-        self.image_shape = global_params['image_shape']
-        self.max_text_length = global_params['max_text_length']
-        if "num_heads" in global_params:
-            self.num_heads = global_params["num_heads"]
-        else:
-            self.num_heads = None
-
-    def create_feed(self, mode):
-        image_shape = deepcopy(self.image_shape)
-        image_shape.insert(0, -1)
-        if mode == "train":
-            image = fluid.data(name='image', shape=image_shape, dtype='float32')
-            if self.loss_type == "attention":
-                label_in = fluid.data(
-                    name='label_in',
-                    shape=[None, 1],
-                    dtype='int32',
-                    lod_level=1)
-                label_out = fluid.data(
-                    name='label_out',
-                    shape=[None, 1],
-                    dtype='int32',
-                    lod_level=1)
-                feed_list = [image, label_in, label_out]
-                labels = {'label_in': label_in, 'label_out': label_out}
-            elif self.loss_type == "srn":
-                encoder_word_pos = fluid.data(
-                    name="encoder_word_pos",
-                    shape=[
-                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
-                        1
-                    ],
-                    dtype="int64")
-                gsrm_word_pos = fluid.data(
-                    name="gsrm_word_pos",
-                    shape=[-1, self.max_text_length, 1],
-                    dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(
-                    name="gsrm_slf_attn_bias1",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                gsrm_slf_attn_bias2 = fluid.data(
-                    name="gsrm_slf_attn_bias2",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                lbl_weight = fluid.layers.data(
-                    name="lbl_weight", shape=[-1, 1], dtype='int64')
-                label = fluid.data(
-                    name='label', shape=[-1, 1], dtype='int32', lod_level=1)
-                feed_list = [
-                    image, label, encoder_word_pos, gsrm_word_pos,
-                    gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight
-                ]
-                labels = {
-                    'label': label,
-                    'encoder_word_pos': encoder_word_pos,
-                    'gsrm_word_pos': gsrm_word_pos,
-                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
-                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,
-                    'lbl_weight': lbl_weight
-                }
-            else:
-                label = fluid.data(
-                    name='label', shape=[None, 1], dtype='int32', lod_level=1)
-                feed_list = [image, label]
-                labels = {'label': label}
-            loader = fluid.io.DataLoader.from_generator(
-                feed_list=feed_list,
-                capacity=64,
-                use_double_buffer=True,
-                iterable=False)
-        else:
-            labels = None
-            loader = None
-            if self.char_type == "ch" and self.infer_img:
-                image_shape[-1] = -1
-                if self.tps != None:
-                    logger.info(
-                        "WARNRNG!!!\n"
-                        "TPS does not support variable shape in chinese!"
-                        "We set img_shape to be the same , it may affect the inference effect"
-                    )
-                    image_shape = deepcopy(self.image_shape)
-            image = fluid.data(name='image', shape=image_shape, dtype='float32')
-            if self.loss_type == "srn":
-                encoder_word_pos = fluid.data(
-                    name="encoder_word_pos",
-                    shape=[
-                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
-                        1
-                    ],
-                    dtype="int64")
-                gsrm_word_pos = fluid.data(
-                    name="gsrm_word_pos",
-                    shape=[-1, self.max_text_length, 1],
-                    dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(
-                    name="gsrm_slf_attn_bias1",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                gsrm_slf_attn_bias2 = fluid.data(
-                    name="gsrm_slf_attn_bias2",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                feed_list = [
-                    image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
-                    gsrm_slf_attn_bias2
-                ]
-                labels = {
-                    'encoder_word_pos': encoder_word_pos,
-                    'gsrm_word_pos': gsrm_word_pos,
-                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
-                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2
-                }
-        return image, labels, loader
-
-    def __call__(self, mode):
-        image, labels, loader = self.create_feed(mode)
-        if self.tps is None:
-            inputs = image
-        else:
-            inputs = self.tps(image)
-        conv_feas = self.backbone(inputs)
-        predicts = self.head(conv_feas, labels, mode)
-        decoded_out = predicts['decoded_out']
-        if mode == "train":
-            loss = self.loss(predicts, labels)
-            if self.loss_type == "attention":
-                label = labels['label_out']
-            else:
-                label = labels['label']
-            if self.loss_type == 'srn':
-                total_loss, img_loss, word_loss = self.loss(predicts, labels)
-                outputs = {
-                    'total_loss': total_loss,
-                    'img_loss': img_loss,
-                    'word_loss': word_loss,
-                    'decoded_out': decoded_out,
-                    'label': label
-                }
-            else:
-                outputs = {'total_loss':loss, 'decoded_out':\
-                    decoded_out, 'label':label}
-            return loader, outputs
-
-        elif mode == "export":
-            predict = predicts['predict']
-            if self.loss_type == "ctc":
-                predict = fluid.layers.softmax(predict)
-            if self.loss_type == "srn":
-                raise Exception(
-                    "Warning! SRN does not support export model currently")
-            return [image, {'decoded_out': decoded_out, 'predicts': predict}]
-        else:
-            predict = predicts['predict']
-            if self.loss_type == "ctc":
-                predict = fluid.layers.softmax(predict)
-            return loader, {'decoded_out': decoded_out, 'predicts': predict}
--- a/ppocr/modeling/backbones/init.py
+++ b/ppocr/modeling/backbones/init.py
@ -11,3 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+__all__ = ['build_backbone']
+
+
+def build_backbone(config, model_type):
+    if model_type == 'det':
+        from .det_mobilenet_v3 import MobileNetV3
+        from .det_resnet_vd import ResNet
+
+        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
+    elif model_type == 'rec':
+        from .rec_mobilenet_v3 import MobileNetV3
+        from .rec_resnet_vd import ResNet
+        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_FPN']
+    else:
+        raise NotImplementedError
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'when model typs is {}, backbone only support {}'.format(model_type,
+                                                                 support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
@ -1,40 +1,48 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr

 __all__ = ['MobileNetV3']


-class MobileNetV3():
-    def __init__(self, params):
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class MobileNetV3(nn.Layer):
+    def __init__(self, in_channels=3, model_name='large', scale=0.5, **kwargs):
        """
        the MobilenetV3 backbone network for detection module.
        Args:
            params(dict): the super parameters for build network
        """
-        self.scale = params['scale']
-        model_name = params['model_name']
-        self.inplanes = 16
+        super(MobileNetV3, self).__init__()
        if model_name == "large":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, 'relu', 1],
                [3, 64, 24, False, 'relu', 2],
@ -52,10 +60,9 @@ class MobileNetV3():
                [5, 960, 160, True, 'hard_swish', 1],
                [5, 960, 160, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
+            cls_ch_squeeze = 960
        elif model_name == "small":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, 'relu', 2],
                [3, 72, 24, False, 'relu', 2],
@ -69,183 +76,203 @@ class MobileNetV3():
                [5, 576, 96, True, 'hard_swish', 1],
                [5, 576, 96, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
+            cls_ch_squeeze = 576
        else:
            raise NotImplementedError("mode[" + model_name +
                                      "_model] is not implemented!")

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert self.scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, self.scale)
-
-    def __call__(self, input):
-        scale = self.scale
-        inplanes = self.inplanes
-        cfg = self.cfg
-        cls_ch_squeeze = self.cls_ch_squeeze
-        cls_ch_expand = self.cls_ch_expand
-        #conv1
-        conv = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=self.make_divisible(inplanes * scale),
+        assert scale in supported_scale, \
+            "supported scale are {} but input scale is {}".format(supported_scale, scale)
+        inplanes = 16
+        # conv1
+        self.conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=make_divisible(inplanes * scale),
+            kernel_size=3,
            stride=2,
            padding=1,
-            num_groups=1,
+            groups=1,
            if_act=True,
            act='hard_swish',
            name='conv1')
+
+        self.stages = []
+        self.out_channels = []
+        block_list = []
        i = 0
-        inplanes = self.make_divisible(inplanes * scale)
-        outs = []
-        for layer_cfg in cfg:
-            if layer_cfg[5] == 2 and i > 2:
-                outs.append(conv)
-            conv = self.residual_unit(
-                input=conv,
-                num_in_filter=inplanes,
-                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
-                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
-                act=layer_cfg[4],
-                stride=layer_cfg[5],
-                filter_size=layer_cfg[0],
-                use_se=layer_cfg[3],
-                name='conv' + str(i + 2))
-            inplanes = self.make_divisible(scale * layer_cfg[2])
+        inplanes = make_divisible(inplanes * scale)
+        for (k, exp, c, se, nl, s) in cfg:
+            if s == 2 and i > 2:
+                self.out_channels.append(inplanes)
+                self.stages.append(nn.Sequential(*block_list))
+                block_list = []
+            block_list.append(
+                ResidualUnit(
+                    in_channels=inplanes,
+                    mid_channels=make_divisible(scale * exp),
+                    out_channels=make_divisible(scale * c),
+                    kernel_size=k,
+                    stride=s,
+                    use_se=se,
+                    act=nl,
+                    name="conv" + str(i + 2)))
+            inplanes = make_divisible(scale * c)
            i += 1
+        block_list.append(
+            ConvBNLayer(
+                in_channels=inplanes,
+                out_channels=make_divisible(scale * cls_ch_squeeze),
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=1,
+                if_act=True,
+                act='hard_swish',
+                name='conv_last'))

-        conv = self.conv_bn_layer(
-            input=conv,
-            filter_size=1,
-            num_filters=self.make_divisible(scale * cls_ch_squeeze),
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv_last')
-        outs.append(conv)
-        return outs
+        self.stages.append(nn.Sequential(*block_list))
+        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
+        for i, stage in enumerate(self.stages):
+            self.add_sublayer(sublayer=stage, name="stage{}".format(i))

-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      act=None,
-                      name=None,
-                      use_cudnn=True,
-                      res_last_bn_init=False):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
+    def forward(self, x):
+        x = self.conv(x)
+        out_list = []
+        for stage in self.stages:
+            x = stage(x)
+            out_list.append(x)
+        return out_list
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(
-                name=bn_name + "_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=bn_name + "_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            if act == 'relu':
-                bn = fluid.layers.relu(bn)
-            elif act == 'hard_swish':
-                bn = fluid.layers.hard_swish(bn)
-        return bn

-    def make_divisible(self, v, divisor=8, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=None,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")

-    def se_block(self, input, num_out_filter, ratio=4, name=None):
-        num_mid_filter = num_out_filter // ratio
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv1 = fluid.layers.conv2d(
-            input=pool,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
-        conv2 = fluid.layers.conv2d(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = F.relu(x)
+            elif self.act == "hard_swish":
+                x = F.hard_swish(x)
+            else:
+                print("The activation function is selected incorrectly.")
+                exit()
+        return x

-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_mid_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      act=None,
-                      use_se=False,
-                      name=None):

-        conv0 = self.conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_mid_filter,
+class ResidualUnit(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 mid_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 use_se,
+                 act=None,
+                 name=''):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_channels == out_channels
+        self.if_se = use_se
+
+        self.expand_conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=mid_channels,
+            kernel_size=1,
            stride=1,
            padding=0,
            if_act=True,
            act=act,
-            name=name + '_expand')
-
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            filter_size=filter_size,
-            num_filters=num_mid_filter,
+            name=name + "_expand")
+        self.bottleneck_conv = ConvBNLayer(
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=kernel_size,
            stride=stride,
-            padding=int((filter_size - 1) // 2),
+            padding=int((kernel_size - 1) // 2),
+            groups=mid_channels,
            if_act=True,
            act=act,
-            num_groups=num_mid_filter,
-            use_cudnn=False,
-            name=name + '_depthwise')
-        if use_se:
-            conv1 = self.se_block(
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
+            name=name + "_depthwise")
+        if self.if_se:
+            self.mid_se = SEModule(mid_channels, name=name + "_se")
+        self.linear_conv = ConvBNLayer(
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
            stride=1,
            padding=0,
            if_act=False,
-            name=name + '_linear',
-            res_last_bn_init=True)
-        if num_in_filter != num_out_filter or stride != 1:
-            return conv2
-        else:
-            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+            act=None,
+            name=name + "_linear")
+
+    def forward(self, inputs):
+        x = self.expand_conv(inputs)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = paddle.elementwise_add(inputs, x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, in_channels, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=in_channels // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        self.conv2 = nn.Conv2d(
+            in_channels=in_channels // reduction,
+            out_channels=in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = F.hard_sigmoid(outputs)
+        return inputs * outputs
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@ -1,252 +1,329 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+from paddle import nn
+from paddle.nn import functional as F
+from paddle import ParamAttr

 __all__ = ["ResNet"]


-class ResNet(object):
-    def __init__(self, params):
+class ResNet(nn.Layer):
+    def __init__(self, in_channels=3, layers=50, **kwargs):
        """
        the Resnet backbone network for detection module.
        Args:
            params(dict): the super parameters for network build
        """
-        self.layers = params['layers']
-        supported_layers = [18, 34, 50, 101, 152]
-        assert self.layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
-        self.is_3x3 = True
+        super(ResNet, self).__init__()
+        supported_layers = {
+            18: {
+                'depth': [2, 2, 2, 2],
+                'block_class': BasicBlock
+            },
+            34: {
+                'depth': [3, 4, 6, 3],
+                'block_class': BasicBlock
+            },
+            50: {
+                'depth': [3, 4, 6, 3],
+                'block_class': BottleneckBlock
+            },
+            101: {
+                'depth': [3, 4, 23, 3],
+                'block_class': BottleneckBlock
+            },
+            152: {
+                'depth': [3, 8, 36, 3],
+                'block_class': BottleneckBlock
+            },
+            200: {
+                'depth': [3, 12, 48, 3],
+                'block_class': BottleneckBlock
+            }
+        }
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
+        is_3x3 = True
+
+        depth = supported_layers[layers]['depth']
+        block_class = supported_layers[layers]['block_class']

-    def __call__(self, input):
-        layers = self.layers
-        is_3x3 = self.is_3x3
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
        num_filters = [64, 128, 256, 512]
-        outs = []

+        conv = []
        if is_3x3 == False:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=64,
-                filter_size=7,
-                stride=2,
-                act='relu')
+            conv.append(
+                ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=64,
+                    kernel_size=7,
+                    stride=2,
+                    act='relu'))
        else:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=32,
-                filter_size=3,
-                stride=2,
-                act='relu',
-                name='conv1_1')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=32,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_2')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=64,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_3')
-
-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
-
-        if layers >= 50:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    if layers in [101, 152, 200] and block == 2:
+            conv.append(
+                ConvBNLayer(
+                    in_channels=3,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=2,
+                    act='relu',
+                    name='conv1_1'))
+            conv.append(
+                ConvBNLayer(
+                    in_channels=32,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    act='relu',
+                    name='conv1_2'))
+            conv.append(
+                ConvBNLayer(
+                    in_channels=32,
+                    out_channels=64,
+                    kernel_size=3,
+                    stride=1,
+                    act='relu',
+                    name='conv1_3'))
+        self.conv1 = nn.Sequential(*conv)
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.stages = []
+        self.out_channels = []
+        in_ch = 64
+        for block_index in range(len(depth)):
+            block_list = []
+            for i in range(depth[block_index]):
+                if layers >= 50:
+                    if layers in [101, 152, 200] and block_index == 2:
                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
+                            conv_name = "res" + str(block_index + 2) + "a"
                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                            conv_name = "res" + str(block_index +
+                                                    2) + "b" + str(i)
                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        if_first=block == i == 0,
-                        name=conv_name)
-                outs.append(conv)
-        else:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv = self.basic_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        if_first=block == i == 0,
-                        name=conv_name)
-                outs.append(conv)
-        return outs
+                        conv_name = "res" + str(block_index + 2) + chr(97 + i)
+                else:
+                    conv_name = "res" + str(block_index + 2) + chr(97 + i)
+                block_list.append(
+                    block_class(
+                        in_channels=in_ch,
+                        out_channels=num_filters[block_index],
+                        stride=2 if i == 0 and block_index != 0 else 1,
+                        if_first=block_index == i == 0,
+                        name=conv_name))
+                in_ch = block_list[-1].out_channels
+            self.out_channels.append(in_ch)
+            self.stages.append(nn.Sequential(*block_list))
+        for i, stage in enumerate(self.stages):
+            self.add_sublayer(sublayer=stage, name="stage{}".format(i))

-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.pool(x)
+        out_list = []
+        for stage in self.stages:
+            x = stage(x)
+            out_list.append(x)
+        return out_list
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=stride,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
+            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")

-    def conv_bn_layer_new(self,
-                          input,
-                          num_filters,
-                          filter_size,
-                          stride=1,
-                          groups=1,
-                          act=None,
-                          name=None):
-        pool = fluid.layers.pool2d(
-            input=input,
-            pool_size=2,
-            pool_stride=2,
-            pool_padding=0,
-            pool_type='avg',
-            ceil_mode=True)
+    def __call__(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x

-        conv = fluid.layers.conv2d(
-            input=pool,
-            num_filters=num_filters,
-            filter_size=filter_size,
+
+class ConvBNLayerNew(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act=None,
+                 name=None):
+        super(ConvBNLayerNew, self).__init__()
+        self.pool = nn.AvgPool2d(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=1,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
+            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")

-    def shortcut(self, input, ch_out, stride, name, if_first=False):
-        ch_in = input.shape[1]
-        if ch_in != ch_out or stride != 1:
+    def __call__(self, x):
+        x = self.pool(x)
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class ShortCut(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first=False):
+        super(ShortCut, self).__init__()
+        self.use_conv = True
+        if in_channels != out_channels or stride != 1:
            if if_first:
-                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+                self.conv = ConvBNLayer(
+                    in_channels, out_channels, 1, stride, name=name)
            else:
-                return self.conv_bn_layer_new(
-                    input, ch_out, 1, stride, name=name)
+                self.conv = ConvBNLayerNew(
+                    in_channels, out_channels, 1, stride, name=name)
        elif if_first:
-            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+            self.conv = ConvBNLayer(
+                in_channels, out_channels, 1, stride, name=name)
        else:
-            return input
+            self.use_conv = False

-    def bottleneck_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=1,
+    def forward(self, x):
+        if self.use_conv:
+            x = self.conv(x)
+        return x
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
+        super(BottleneckBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
            act='relu',
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            stride=stride,
            act='relu',
            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            num_filters=num_filters * 4,
-            filter_size=1,
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
            act=None,
            name=name + "_branch2c")

-        short = self.shortcut(
-            input,
-            num_filters * 4,
-            stride,
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels * 4,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
+        self.out_channels = out_channels * 4

-        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = self.conv2(y)
+        y = y + self.short(x)
+        y = F.relu(y)
+        return y

-    def basic_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
+
+class BasicBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
+        super(BasicBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            act='relu',
            stride=stride,
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            act=None,
            name=name + "_branch2b")
-        short = self.shortcut(
-            input,
-            num_filters,
-            stride,
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = y + self.short(x)
+        return F.relu(y)
+
+
+if __name__ == '__main__':
+    import paddle
+
+    paddle.disable_static()
+    x = paddle.zeros([1, 3, 640, 640])
+    x = paddle.to_variable(x)
+    print(x.shape)
+    net = ResNet(layers=18)
+    y = net(x)
+
+    for stage in y:
+        print(stage.shape)
+    # paddle.save(net.state_dict(),'1.pth')
--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
@ -1,274 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = ["ResNet"]
-
-
-class ResNet(object):
-    def __init__(self, params):
-        """
-        the Resnet backbone network for detection module.
-        Args:
-            params(dict): the super parameters for network build
-        """
-        self.layers = params['layers']
-        supported_layers = [18, 34, 50, 101, 152]
-        assert self.layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
-        self.is_3x3 = True
-
-    def __call__(self, input):
-        layers = self.layers
-        is_3x3 = self.is_3x3
-        # if layers == 18:
-        #     depth = [2, 2, 2, 2]
-        # elif layers == 34 or layers == 50:
-        #     depth = [3, 4, 6, 3]
-        # elif layers == 101:
-        #     depth = [3, 4, 23, 3]
-        # elif layers == 152:
-        #     depth = [3, 8, 36, 3]
-        # elif layers == 200:
-        #     depth = [3, 12, 48, 3]
-        # num_filters = [64, 128, 256, 512]
-        # outs = []
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]#, 3, 3]
-        elif layers == 34 or layers == 50:
-            #depth = [3, 4, 6, 3]#,  3, 3]
-            depth = [3, 4, 6, 3, 3]#, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]#,  3, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]#,  3, 3]
-        num_filters = [64, 128, 256, 512, 512]#, 512]
-        blocks = {}
-
-        idx = 'block_0'
-        blocks[idx] = input
-
-        if is_3x3 == False:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=64,
-                filter_size=7,
-                stride=2,
-                act='relu')
-        else:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=32,
-                filter_size=3,
-                stride=2,
-                act='relu',
-                name='conv1_1')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=32,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_2')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=64,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_3')
-        idx = 'block_1'
-        blocks[idx] = conv
-
-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
-
-        if layers >= 50:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    if layers in [101, 152, 200] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        if_first=block == i == 0,
-                        name=conv_name)
-                # outs.append(conv)
-                idx = 'block_' + str(block + 2)
-                blocks[idx] = conv
-        else:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv = self.basic_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        if_first=block == i == 0,
-                        name=conv_name)
-                # outs.append(conv)
-                idx = 'block_' + str(block + 2)
-                blocks[idx] = conv
-        # return outs
-        return blocks
-
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-
-    def conv_bn_layer_new(self,
-                          input,
-                          num_filters,
-                          filter_size,
-                          stride=1,
-                          groups=1,
-                          act=None,
-                          name=None):
-        pool = fluid.layers.pool2d(
-            input=input,
-            pool_size=2,
-            pool_stride=2,
-            pool_padding=0,
-            pool_type='avg',
-            ceil_mode=True)
-
-        conv = fluid.layers.conv2d(
-            input=pool,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=1,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-
-    def shortcut(self, input, ch_out, stride, name, if_first=False):
-        ch_in = input.shape[1]
-        if ch_in != ch_out or stride != 1:
-            if if_first:
-                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
-            else:
-                return self.conv_bn_layer_new(
-                    input, ch_out, 1, stride, name=name)
-        elif if_first:
-            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
-        else:
-            return input
-
-    def bottleneck_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        short = self.shortcut(
-            input,
-            num_filters * 4,
-            stride,
-            if_first=if_first,
-            name=name + "_branch1")
-
-        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
-
-    def basic_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
-            act='relu',
-            stride=stride,
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b")
-        short = self.shortcut(
-            input,
-            num_filters,
-            stride,
-            if_first=if_first,
-            name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@ -1,53 +1,49 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from paddle import nn

-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
+from ppocr.modeling.backbones.det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible

-__all__ = [
-    'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
-    'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
-    'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
-    'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
-    'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
-]
+__all__ = ['MobileNetV3']


-class MobileNetV3():
-    def __init__(self, params):
-        self.scale = params.get("scale", 0.5)
-        model_name = params.get("model_name", "small")
-        large_stride = params.get("large_stride", [1, 2, 2, 2])
-        small_stride = params.get("small_stride", [2, 2, 2, 2])
+class MobileNetV3(nn.Layer):
+    def __init__(self,
+                 in_channels=3,
+                 model_name='small',
+                 scale=0.5,
+                 large_stride=None,
+                 small_stride=None,
+                 **kwargs):
+        super(MobileNetV3, self).__init__()
+        if small_stride is None:
+            small_stride = [2, 2, 2, 2]
+        if large_stride is None:
+            large_stride = [1, 2, 2, 2]

        assert isinstance(large_stride, list), "large_stride type must " \
-            "be list but got {}".format(type(large_stride))
+                                               "be list but got {}".format(type(large_stride))
        assert isinstance(small_stride, list), "small_stride type must " \
-            "be list but got {}".format(type(small_stride))
+                                               "be list but got {}".format(type(small_stride))
        assert len(large_stride) == 4, "large_stride length must be " \
-            "4 but got {}".format(len(large_stride))
+                                       "4 but got {}".format(len(large_stride))
        assert len(small_stride) == 4, "small_stride length must be " \
-            "4 but got {}".format(len(small_stride))
+                                       "4 but got {}".format(len(small_stride))

-        self.inplanes = 16
        if model_name == "large":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, 'relu', large_stride[0]],
                [3, 64, 24, False, 'relu', (large_stride[1], 1)],
@ -65,10 +61,9 @@ class MobileNetV3():
                [5, 960, 160, True, 'hard_swish', 1],
                [5, 960, 160, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
+            cls_ch_squeeze = 960
        elif model_name == "small":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, 'relu', (small_stride[0], 1)],
                [3, 72, 24, False, 'relu', (small_stride[1], 1)],
@ -82,186 +77,72 @@ class MobileNetV3():
                [5, 576, 96, True, 'hard_swish', 1],
                [5, 576, 96, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
+            cls_ch_squeeze = 576
        else:
            raise NotImplementedError("mode[" + model_name +
                                      "_model] is not implemented!")

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert self.scale in supported_scale, \
-            "supported scales are {} but input scale is {}".format(supported_scale, self.scale)
+        assert scale in supported_scale, \
+            "supported scales are {} but input scale is {}".format(supported_scale, scale)

-    def __call__(self, input):
-        scale = self.scale
-        inplanes = self.inplanes
-        cfg = self.cfg
-        cls_ch_squeeze = self.cls_ch_squeeze
-        cls_ch_expand = self.cls_ch_expand
-        #conv1
-        conv = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=self.make_divisible(inplanes * scale),
+        inplanes = 16
+        # conv1
+        self.conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=make_divisible(inplanes * scale),
+            kernel_size=3,
            stride=2,
            padding=1,
-            num_groups=1,
+            groups=1,
            if_act=True,
            act='hard_swish',
            name='conv1')
        i = 0
-        inplanes = self.make_divisible(inplanes * scale)
-        for layer_cfg in cfg:
-            conv = self.residual_unit(
-                input=conv,
-                num_in_filter=inplanes,
-                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
-                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
-                act=layer_cfg[4],
-                stride=layer_cfg[5],
-                filter_size=layer_cfg[0],
-                use_se=layer_cfg[3],
-                name='conv' + str(i + 2))
-            inplanes = self.make_divisible(scale * layer_cfg[2])
+        block_list = []
+        inplanes = make_divisible(inplanes * scale)
+        for (k, exp, c, se, nl, s) in cfg:
+            block_list.append(
+                ResidualUnit(
+                    in_channels=inplanes,
+                    mid_channels=make_divisible(scale * exp),
+                    out_channels=make_divisible(scale * c),
+                    kernel_size=k,
+                    stride=s,
+                    use_se=se,
+                    act=nl,
+                    name='conv' + str(i + 2)))
+            inplanes = make_divisible(scale * c)
            i += 1
+        self.blocks = nn.Sequential(*block_list)

-        conv = self.conv_bn_layer(
-            input=conv,
-            filter_size=1,
-            num_filters=self.make_divisible(scale * cls_ch_squeeze),
+        self.conv2 = ConvBNLayer(
+            in_channels=inplanes,
+            out_channels=make_divisible(scale * cls_ch_squeeze),
+            kernel_size=1,
            stride=1,
            padding=0,
-            num_groups=1,
+            groups=1,
            if_act=True,
            act='hard_swish',
            name='conv_last')

-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=2,
-            pool_stride=2,
-            pool_padding=0,
-            pool_type='max')
-        return conv
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.out_channels = make_divisible(scale * cls_ch_squeeze)

-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      act=None,
-                      name=None,
-                      use_cudnn=True,
-                      res_last_bn_init=False):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(
-                name=bn_name + "_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=bn_name + "_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            if act == 'relu':
-                bn = fluid.layers.relu(bn)
-            elif act == 'hard_swish':
-                bn = fluid.layers.hard_swish(bn)
-        return bn
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.blocks(x)
+        x = self.conv2(x)
+        x = self.pool(x)
+        return x

-    def make_divisible(self, v, divisor=8, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v

-    def se_block(self, input, num_out_filter, ratio=4, name=None):
-        num_mid_filter = num_out_filter // ratio
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv1 = fluid.layers.conv2d(
-            input=pool,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
-        conv2 = fluid.layers.conv2d(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
-
-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_mid_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      act=None,
-                      use_se=False,
-                      name=None):
-
-        conv0 = self.conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + '_expand')
-
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            filter_size=filter_size,
-            num_filters=num_mid_filter,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            if_act=True,
-            act=act,
-            num_groups=num_mid_filter,
-            use_cudnn=False,
-            name=name + '_depthwise')
-        if use_se:
-            conv1 = self.se_block(
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            stride=1,
-            padding=0,
-            if_act=False,
-            name=name + '_linear',
-            res_last_bn_init=True)
-        if num_in_filter != num_out_filter or stride != 1:
-            return conv2
-        else:
-            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+if __name__ == '__main__':
+    import paddle
+    paddle.disable_static()
+    x = paddle.zeros((1, 3, 32, 320))
+    x = paddle.to_variable(x)
+    net = MobileNetV3(model_name='small', small_stride=[1, 2, 2, 2])
+    y = net(x)
+    print(y.shape)
--- a/ppocr/modeling/backbones/rec_resnet_fpn.py
+++ b/ppocr/modeling/backbones/rec_resnet_fpn.py
@ -1,246 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = [
-    "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"
-]
-
-Trainable = True
-w_nolr = fluid.ParamAttr(trainable=Trainable)
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [30, 60, 90],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-
-
-class ResNet():
-    def __init__(self, params):
-        self.layers = params['layers']
-        self.params = train_parameters
-
-    def __call__(self, input):
-        layers = self.layers
-        supported_layers = [18, 34, 50, 101, 152]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
-        num_filters = [64, 128, 256, 512]
-
-        conv = self.conv_bn_layer(
-            input=input,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act='relu',
-            name="conv1")
-        F = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=stride_list[block] if i == 0 else 1,
-                        name=conv_name)
-                F.append(conv)
-        else:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-
-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
-
-                    conv = self.basic_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=stride,
-                        if_first=block == i == 0,
-                        name=conv_name)
-                F.append(conv)
-
-        base = F[-1]
-        for i in [-2, -3]:
-            b, c, w, h = F[i].shape
-            if (w, h) == base.shape[2:]:
-                base = base
-            else:
-                base = fluid.layers.conv2d_transpose(
-                    input=base,
-                    num_filters=c,
-                    filter_size=4,
-                    stride=2,
-                    padding=1,
-                    act=None,
-                    param_attr=w_nolr,
-                    bias_attr=w_nolr)
-                base = fluid.layers.batch_norm(
-                    base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)
-            base = fluid.layers.concat([base, F[i]], axis=1)
-            base = fluid.layers.conv2d(
-                base,
-                num_filters=c,
-                filter_size=1,
-                param_attr=w_nolr,
-                bias_attr=w_nolr)
-            base = fluid.layers.conv2d(
-                base,
-                num_filters=c,
-                filter_size=3,
-                padding=1,
-                param_attr=w_nolr,
-                bias_attr=w_nolr)
-            base = fluid.layers.batch_norm(
-                base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)
-
-        base = fluid.layers.conv2d(
-            base,
-            num_filters=512,
-            filter_size=1,
-            bias_attr=w_nolr,
-            param_attr=w_nolr)
-
-        return base
-
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=2 if stride == (1, 1) else filter_size,
-            dilation=2 if stride == (1, 1) else 1,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(
-                name=name + "_weights", trainable=Trainable),
-            bias_attr=False,
-            name=name + '.conv2d.output.1')
-
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            name=bn_name + '.output.1',
-            param_attr=ParamAttr(
-                name=bn_name + '_scale', trainable=Trainable),
-            bias_attr=ParamAttr(
-                bn_name + '_offset', trainable=Trainable),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance', )
-
-    def shortcut(self, input, ch_out, stride, is_first, name):
-        ch_in = input.shape[1]
-        if ch_in != ch_out or stride != 1 or is_first == True:
-            if stride == (1, 1):
-                return self.conv_bn_layer(input, ch_out, 1, 1, name=name)
-            else:  #stride == (2,2)
-                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
-
-        else:
-            return input
-
-    def bottleneck_block(self, input, num_filters, stride, name):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        short = self.shortcut(
-            input,
-            num_filters * 4,
-            stride,
-            is_first=False,
-            name=name + "_branch1")
-
-        return fluid.layers.elementwise_add(
-            x=short, y=conv2, act='relu', name=name + ".add.output.5")
-
-    def basic_block(self, input, num_filters, stride, is_first, name):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
-            act='relu',
-            stride=stride,
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b")
-        short = self.shortcut(
-            input, num_filters, stride, is_first, name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
--- a/ppocr/modeling/backbones/rec_resnet_vd.py
+++ b/ppocr/modeling/backbones/rec_resnet_vd.py
@ -1,271 +1,312 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import math
+from paddle import nn, ParamAttr
+from paddle.nn import functional as F

-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = [
-    "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd",
-    "ResNet152_vd", "ResNet200_vd"
-]
+__all__ = ["ResNet"]


-class ResNet():
-    def __init__(self, params):
-        self.layers = params['layers']
-        self.is_3x3 = True
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert self.layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
+class ResNet(nn.Layer):
+    def __init__(self, in_channels=3, layers=34):
+        super(ResNet, self).__init__()
+        supported_layers = {
+            18: {
+                'depth': [2, 2, 2, 2],
+                'block_class': BasicBlock
+            },
+            34: {
+                'depth': [3, 4, 6, 3],
+                'block_class': BasicBlock
+            },
+            50: {
+                'depth': [3, 4, 6, 3],
+                'block_class': BottleneckBlock
+            },
+            101: {
+                'depth': [3, 4, 23, 3],
+                'block_class': BottleneckBlock
+            },
+            152: {
+                'depth': [3, 8, 36, 3],
+                'block_class': BottleneckBlock
+            },
+            200: {
+                'depth': [3, 12, 48, 3],
+                'block_class': BottleneckBlock
+            }
+        }
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
+        is_3x3 = True

-    def __call__(self, input):
-        is_3x3 = self.is_3x3
-        layers = self.layers
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
        num_filters = [64, 128, 256, 512]
+        depth = supported_layers[layers]['depth']
+        block_class = supported_layers[layers]['block_class']
+        conv = []
        if is_3x3 == False:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=64,
-                filter_size=7,
-                stride=1,
-                act='relu')
+            conv.append(
+                ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=64,
+                    kernel_size=7,
+                    stride=1,
+                    act='relu'))
        else:
-            conv = self.conv_bn_layer(
-                input=input,
-                num_filters=32,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_1')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=32,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_2')
-            conv = self.conv_bn_layer(
-                input=conv,
-                num_filters=64,
-                filter_size=3,
-                stride=1,
-                act='relu',
-                name='conv1_3')
+            conv.append(
+                ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    act='relu',
+                    name='conv1_1'))
+            conv.append(
+                ConvBNLayer(
+                    in_channels=32,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    act='relu',
+                    name='conv1_2'))
+            conv.append(
+                ConvBNLayer(
+                    in_channels=32,
+                    out_channels=64,
+                    kernel_size=3,
+                    stride=1,
+                    act='relu',
+                    name='conv1_3'))
+        self.conv1 = nn.Sequential(*conv)

-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
+        self.pool = nn.MaxPool2d(
+            kernel_size=3,
+            stride=2,
+            padding=1, )

-        if layers >= 50:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    if layers in [101, 152, 200] and block == 2:
+        block_list = []
+        in_ch = 64
+        for block_index in range(len(depth)):
+            for i in range(depth[block_index]):
+                if layers >= 50:
+                    if layers in [101, 152, 200] and block_index == 2:
                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
+                            conv_name = "res" + str(block_index + 2) + "a"
                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                            conv_name = "res" + str(block_index +
+                                                    2) + "b" + str(i)
                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-
-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
-
-                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=num_filters[block],
+                        conv_name = "res" + str(block_index + 2) + chr(97 + i)
+                else:
+                    conv_name = "res" + str(block_index + 2) + chr(97 + i)
+                if i == 0 and block_index != 0:
+                    stride = (2, 1)
+                else:
+                    stride = (1, 1)
+                block_list.append(
+                    block_class(
+                        in_channels=in_ch,
+                        out_channels=num_filters[block_index],
                        stride=stride,
-                        if_first=block == i == 0,
-                        name=conv_name)
-        else:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                        if_first=block_index == i == 0,
+                        name=conv_name))
+                in_ch = block_list[-1].out_channels
+        self.block_list = nn.Sequential(*block_list)
+        self.add_sublayer(sublayer=self.block_list, name="block_list")
+        self.pool_out = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.out_channels = in_ch

-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.pool(x)
+        x = self.block_list(x)
+        x = self.pool_out(x)
+        return x

-                    conv = self.basic_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=stride,
-                        if_first=block == i == 0,
-                        name=conv_name)

-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=2,
-            pool_stride=2,
-            pool_padding=0,
-            pool_type='max')
-
-        return conv
-
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=stride,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
+            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")

-    def conv_bn_layer_new(self,
-                          input,
-                          num_filters,
-                          filter_size,
-                          stride=1,
-                          groups=1,
-                          act=None,
-                          name=None):
-        pool = fluid.layers.pool2d(
-            input=input,
-            pool_size=stride,
-            pool_stride=stride,
-            pool_padding=0,
-            pool_type='avg',
-            ceil_mode=True)
+    def __call__(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x

-        conv = fluid.layers.conv2d(
-            input=pool,
-            num_filters=num_filters,
-            filter_size=filter_size,
+
+class ConvBNLayerNew(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act=None,
+                 name=None):
+        super(ConvBNLayerNew, self).__init__()
+        self.pool = nn.AvgPool2d(
+            kernel_size=stride, stride=stride, padding=0, ceil_mode=True)
+
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=1,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
+            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
-
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
-            input=conv,
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")

-    def shortcut(self, input, ch_out, stride, name, if_first=False):
-        ch_in = input.shape[1]
-        if ch_in != ch_out or stride[0] != 1:
+    def __call__(self, x):
+        x = self.pool(x)
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class ShortCut(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first=False):
+        super(ShortCut, self).__init__()
+        self.use_conv = True
+
+        if in_channels != out_channels or stride[0] != 1:
            if if_first:
-                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+                self.conv = ConvBNLayer(
+                    in_channels, out_channels, 1, stride, name=name)
            else:
-                return self.conv_bn_layer_new(
-                    input, ch_out, 1, stride, name=name)
+                self.conv = ConvBNLayerNew(
+                    in_channels, out_channels, 1, stride, name=name)
        elif if_first:
-            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+            self.conv = ConvBNLayer(
+                in_channels, out_channels, 1, stride, name=name)
        else:
-            return input
+            self.use_conv = False

-    def bottleneck_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=1,
+    def forward(self, x):
+        if self.use_conv:
+            x = self.conv(x)
+        return x
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
+        super(BottleneckBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
            act='relu',
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            stride=stride,
            act='relu',
            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            num_filters=num_filters * 4,
-            filter_size=1,
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
            act=None,
            name=name + "_branch2c")

-        short = self.shortcut(
-            input,
-            num_filters * 4,
-            stride,
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels * 4,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
+        self.out_channels = out_channels * 4

-        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = self.conv2(y)
+        y = y + self.short(x)
+        y = F.relu(y)
+        return y

-    def basic_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
+
+class BasicBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
+        super(BasicBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            act='relu',
            stride=stride,
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            act=None,
            name=name + "_branch2b")
-        short = self.shortcut(
-            input,
-            num_filters,
-            stride,
+        self.short = ShortCut(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = y + self.short(x)
+        return F.relu(y)
--- a/ppocr/modeling/common_functions.py
+++ b/ppocr/modeling/common_functions.py
@ -1,95 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import math
-
-
-def get_para_bias_attr(l2_decay, k, name):
-    regularizer = fluid.regularizer.L2Decay(l2_decay)
-    stdv = 1.0 / math.sqrt(k * 1.0)
-    initializer = fluid.initializer.Uniform(-stdv, stdv)
-    para_attr = fluid.ParamAttr(
-        regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
-    bias_attr = fluid.ParamAttr(
-        regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
-    return [para_attr, bias_attr]
-
-
-def conv_bn_layer(input,
-                  num_filters,
-                  filter_size,
-                  stride=1,
-                  groups=1,
-                  act=None,
-                  name=None):
-    conv = fluid.layers.conv2d(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        stride=stride,
-        padding=(filter_size - 1) // 2,
-        groups=groups,
-        act=None,
-        param_attr=ParamAttr(name=name + "_weights"),
-        bias_attr=False,
-        name=name + '.conv2d')
-
-    bn_name = "bn_" + name
-    return fluid.layers.batch_norm(
-        input=conv,
-        act=act,
-        name=bn_name + '.output',
-        param_attr=ParamAttr(name=bn_name + '_scale'),
-        bias_attr=ParamAttr(bn_name + '_offset'),
-        moving_mean_name=bn_name + '_mean',
-        moving_variance_name=bn_name + '_variance')
-
-
-def deconv_bn_layer(input,
-                    num_filters,
-                    filter_size=4,
-                    stride=2,
-                    act='relu',
-                    name=None):
-    deconv = fluid.layers.conv2d_transpose(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        stride=stride,
-        padding=1,
-        act=None,
-        param_attr=ParamAttr(name=name + "_weights"),
-        bias_attr=False,
-        name=name + '.deconv2d')
-    bn_name = "bn_" + name
-    return fluid.layers.batch_norm(
-        input=deconv,
-        act=act,
-        name=bn_name + '.output',
-        param_attr=ParamAttr(name=bn_name + '_scale'),
-        bias_attr=ParamAttr(bn_name + '_offset'),
-        moving_mean_name=bn_name + '_mean',
-        moving_variance_name=bn_name + '_variance')
-
-
-def create_tmp_var(program, name, dtype, shape, lod_level=0):
-    return program.current_block().create_var(
-        name=name, dtype=dtype, shape=shape, lod_level=lod_level)
--- a/ppocr/modeling/heads/init.py
+++ b/ppocr/modeling/heads/init.py
@ -11,3 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+__all__ = ['build_head']
+
+
+def build_head(config):
+    # det head
+    from .det_db_head import DBHead
+
+    # rec head
+    from .rec_ctc_head import CTC
+    support_dict = ['DBHead', 'CTC']
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception('head only support {}'.format(
+        support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@ -1,27 +1,98 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import math
-
-import paddle.fluid as fluid
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr


-class DBHead(object):
+def get_bias_attr(k, name):
+    stdv = 1.0 / math.sqrt(k * 1.0)
+    initializer = paddle.nn.initializer.Uniform(-stdv, stdv)
+    bias_attr = ParamAttr(initializer=initializer, name=name + "_b_attr")
+    return bias_attr
+
+
+class Head(nn.Layer):
+    def __init__(self, in_channels, name_list):
+        super(Head, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=in_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(name=name_list[0] + '.w_0'),
+            bias_attr=False)
+        self.conv_bn1 = nn.BatchNorm(
+            num_channels=in_channels // 4,
+            param_attr=ParamAttr(
+                name=name_list[1] + '.w_0',
+                initializer=paddle.nn.initializer.Constant(value=1.0)),
+            bias_attr=ParamAttr(
+                name=name_list[1] + '.b_0',
+                initializer=paddle.nn.initializer.Constant(value=1e-4)),
+            moving_mean_name=name_list[1] + '.w_1',
+            moving_variance_name=name_list[1] + '.w_2',
+            act='relu')
+        self.conv2 = nn.ConvTranspose2d(
+            in_channels=in_channels // 4,
+            out_channels=in_channels // 4,
+            kernel_size=2,
+            stride=2,
+            weight_attr=ParamAttr(
+                name=name_list[2] + '.w_0',
+                initializer=paddle.nn.initializer.MSRA(uniform=False)),
+            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
+        self.conv_bn2 = nn.BatchNorm(
+            num_channels=in_channels // 4,
+            param_attr=ParamAttr(
+                name=name_list[3] + '.w_0',
+                initializer=paddle.nn.initializer.Constant(value=1.0)),
+            bias_attr=ParamAttr(
+                name=name_list[3] + '.b_0',
+                initializer=paddle.nn.initializer.Constant(value=1e-4)),
+            moving_mean_name=name_list[3] + '.w_1',
+            moving_variance_name=name_list[3] + '.w_2',
+            act="relu")
+        self.conv3 = nn.ConvTranspose2d(
+            in_channels=in_channels // 4,
+            out_channels=1,
+            kernel_size=2,
+            stride=2,
+            weight_attr=ParamAttr(
+                name=name_list[4] + '.w_0',
+                initializer=paddle.nn.initializer.MSRA(uniform=False)),
+            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv_bn1(x)
+        x = self.conv2(x)
+        x = self.conv_bn2(x)
+        x = self.conv3(x)
+        x = F.sigmoid(x)
+        return x
+
+
+class DBHead(nn.Layer):
    """
    Differentiable Binarization (DB) for text detection:
        see https://arxiv.org/abs/1911.08947
@ -29,177 +100,29 @@ class DBHead(object):
        params(dict): super parameters for build DB network
    """

-    def __init__(self, params):
-        self.k = params['k']
-        self.inner_channels = params['inner_channels']
-        self.C, self.H, self.W = params['image_shape']
-        print(self.C, self.H, self.W)
-
-    def binarize(self, x):
-        conv1 = fluid.layers.conv2d(
-            input=x,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=False)
-        conv_bn1 = fluid.layers.batch_norm(
-            input=conv1,
-            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
-            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
-            act="relu")
-        conv2 = fluid.layers.conv2d_transpose(
-            input=conv_bn1,
-            num_filters=self.inner_channels // 4,
-            filter_size=2,
-            stride=2,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
-            act=None)
-        conv_bn2 = fluid.layers.batch_norm(
-            input=conv2,
-            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
-            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
-            act="relu")
-        conv3 = fluid.layers.conv2d_transpose(
-            input=conv_bn2,
-            num_filters=1,
-            filter_size=2,
-            stride=2,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
-            act=None)
-        out = fluid.layers.sigmoid(conv3)
-        return out
-
-    def thresh(self, x):
-        conv1 = fluid.layers.conv2d(
-            input=x,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=False)
-        conv_bn1 = fluid.layers.batch_norm(
-            input=conv1,
-            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
-            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
-            act="relu")
-        conv2 = fluid.layers.conv2d_transpose(
-            input=conv_bn1,
-            num_filters=self.inner_channels // 4,
-            filter_size=2,
-            stride=2,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
-            act=None)
-        conv_bn2 = fluid.layers.batch_norm(
-            input=conv2,
-            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
-            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
-            act="relu")
-        conv3 = fluid.layers.conv2d_transpose(
-            input=conv_bn2,
-            num_filters=1,
-            filter_size=2,
-            stride=2,
-            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
-            bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
-            act=None)
-        out = fluid.layers.sigmoid(conv3)
-        return out
-
-    def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None):
-        regularizer = fluid.regularizer.L2Decay(l2_decay)
-        stdv = 1.0 / math.sqrt(k * 1.0)
-        initializer = fluid.initializer.Uniform(-stdv, stdv)
-        bias_attr = fluid.ParamAttr(
-            regularizer=regularizer,
-            initializer=initializer,
-            name=name + "_b_attr")
-        return bias_attr
+    def __init__(self, in_channels, k=50, **kwargs):
+        super(DBHead, self).__init__()
+        self.k = k
+        binarize_name_list = [
+            'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
+            'conv2d_transpose_1', 'binarize'
+        ]
+        thresh_name_list = [
+            'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
+            'conv2d_transpose_3', 'thresh'
+        ]
+        self.binarize = Head(in_channels, binarize_name_list)
+        self.thresh = Head(in_channels, thresh_name_list)

    def step_function(self, x, y):
-        return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
+        return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))

-    def __call__(self, conv_features, mode="train"):
-        c2, c3, c4, c5 = conv_features
-        param_attr = fluid.initializer.MSRAInitializer(uniform=False)
-        in5 = fluid.layers.conv2d(
-            input=c5,
-            num_filters=self.inner_channels,
-            filter_size=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        in4 = fluid.layers.conv2d(
-            input=c4,
-            num_filters=self.inner_channels,
-            filter_size=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        in3 = fluid.layers.conv2d(
-            input=c3,
-            num_filters=self.inner_channels,
-            filter_size=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        in2 = fluid.layers.conv2d(
-            input=c2,
-            num_filters=self.inner_channels,
-            filter_size=1,
-            param_attr=param_attr,
-            bias_attr=False)
+    def forward(self, x):
+        shrink_maps = self.binarize(x)
+        if not self.training:
+            return shrink_maps

-        out4 = fluid.layers.elementwise_add(
-            x=fluid.layers.resize_nearest(
-                input=in5, scale=2), y=in4)  # 1/16
-        out3 = fluid.layers.elementwise_add(
-            x=fluid.layers.resize_nearest(
-                input=out4, scale=2), y=in3)  # 1/8
-        out2 = fluid.layers.elementwise_add(
-            x=fluid.layers.resize_nearest(
-                input=out3, scale=2), y=in2)  # 1/4
-
-        p5 = fluid.layers.conv2d(
-            input=in5,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        p5 = fluid.layers.resize_nearest(input=p5, scale=8)
-        p4 = fluid.layers.conv2d(
-            input=out4,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        p4 = fluid.layers.resize_nearest(input=p4, scale=4)
-        p3 = fluid.layers.conv2d(
-            input=out3,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=param_attr,
-            bias_attr=False)
-        p3 = fluid.layers.resize_nearest(input=p3, scale=2)
-        p2 = fluid.layers.conv2d(
-            input=out2,
-            num_filters=self.inner_channels // 4,
-            filter_size=3,
-            padding=1,
-            param_attr=param_attr,
-            bias_attr=False)
-
-        fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
-        shrink_maps = self.binarize(fuse)
-        if mode != "train":
-            return {"maps": shrink_maps}
-        threshold_maps = self.thresh(fuse)
+        threshold_maps = self.thresh(x)
        binary_maps = self.step_function(shrink_maps, threshold_maps)
-        y = fluid.layers.concat(
-            input=[shrink_maps, threshold_maps, binary_maps], axis=1)
-        predicts = {}
-        predicts['maps'] = y
-        return predicts
+        y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1)
+        return y
--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
@ -1,117 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from ..common_functions import conv_bn_layer, deconv_bn_layer
-from collections import OrderedDict
-
-
-class EASTHead(object):
-    """
-    EAST: An Efficient and Accurate Scene Text Detector
-        see arxiv: https://arxiv.org/abs/1704.03155
-    args:
-        params(dict): the super parameters for network build
-    """
-
-    def __init__(self, params):
-
-        self.model_name = params['model_name']
-
-    def unet_fusion(self, inputs):
-        f = inputs[::-1]
-        if self.model_name == "large":
-            num_outputs = [128, 128, 128, 128]
-        else:
-            num_outputs = [64, 64, 64, 64]
-        g = [None, None, None, None]
-        h = [None, None, None, None]
-        for i in range(4):
-            if i == 0:
-                h[i] = f[i]
-            else:
-                h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
-                h[i] = conv_bn_layer(
-                    input=h[i],
-                    num_filters=num_outputs[i],
-                    filter_size=3,
-                    stride=1,
-                    act='relu',
-                    name="unet_h_%d" % (i))
-            if i <= 2:
-                #can be replaced with unpool
-                g[i] = deconv_bn_layer(
-                    input=h[i],
-                    num_filters=num_outputs[i],
-                    name="unet_g_%d" % (i))
-            else:
-                g[i] = conv_bn_layer(
-                    input=h[i],
-                    num_filters=num_outputs[i],
-                    filter_size=3,
-                    stride=1,
-                    act='relu',
-                    name="unet_g_%d" % (i))
-        return g[3]
-
-    def detector_header(self, f_common):
-        if self.model_name == "large":
-            num_outputs = [128, 64, 1, 8]
-        else:
-            num_outputs = [64, 32, 1, 8]
-        f_det = conv_bn_layer(
-            input=f_common,
-            num_filters=num_outputs[0],
-            filter_size=3,
-            stride=1,
-            act='relu',
-            name="det_head1")
-        f_det = conv_bn_layer(
-            input=f_det,
-            num_filters=num_outputs[1],
-            filter_size=3,
-            stride=1,
-            act='relu',
-            name="det_head2")
-        #f_score
-        f_score = conv_bn_layer(
-            input=f_det,
-            num_filters=num_outputs[2],
-            filter_size=1,
-            stride=1,
-            act=None,
-            name="f_score")
-        f_score = fluid.layers.sigmoid(f_score)
-        #f_geo
-        f_geo = conv_bn_layer(
-            input=f_det,
-            num_filters=num_outputs[3],
-            filter_size=1,
-            stride=1,
-            act=None,
-            name="f_geo")
-        f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
-        return f_score, f_geo
-
-    def __call__(self, inputs):
-        f_common = self.unet_fusion(inputs)
-        f_score, f_geo = self.detector_header(f_common)
-        predicts = OrderedDict()
-        predicts['f_score'] = f_score
-        predicts['f_geo'] = f_geo
-        return predicts
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
@ -1,228 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from ..common_functions import conv_bn_layer, deconv_bn_layer
-from collections import OrderedDict
-
-
-class SASTHead(object):
-    """
-    SAST: 
-        see arxiv: https://arxiv.org/abs/1908.05498
-    args:
-        params(dict): the super parameters for network build
-    """
-
-    def __init__(self, params):
-        self.model_name = params['model_name']
-        self.with_cab = params['with_cab']
-
-    def FPN_Up_Fusion(self, blocks):
-        """
-        blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
-                1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
-        """
-        f = [blocks['block_6'], blocks['block_5'], blocks['block_4'], blocks['block_3'], blocks['block_2']]
-        num_outputs = [256, 256, 192, 192, 128]
-        g = [None, None, None, None, None]
-        h = [None, None, None, None, None] 
-        for i in range(5):
-            h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
-                                filter_size=1, stride=1, act=None, name='fpn_up_h'+str(i))
-
-        for i in range(4):
-            if i == 0:
-                g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0')
-                #print("g[{}] shape: {}".format(i, g[i].shape))
-            else:
-                g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
-                g[i] = fluid.layers.relu(g[i])
-                #g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
-                #                    filter_size=1, stride=1, act='relu')
-                g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
-                                    filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i)
-                g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i)
-                #print("g[{}] shape: {}".format(i, g[i].shape))
-
-        g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
-        g[4] = fluid.layers.relu(g[4])
-        g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
-                            filter_size=3, stride=1, act='relu', name='fpn_up_fusion_1')
-        g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
-                            filter_size=1, stride=1, act=None, name='fpn_up_fusion_2')
-        
-        return g[4]
-
-    def FPN_Down_Fusion(self, blocks):
-        """
-        blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
-                1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
-        """
-        f = [blocks['block_0'], blocks['block_1'], blocks['block_2']]
-        num_outputs = [32, 64, 128]
-        g = [None, None, None]
-        h = [None, None, None] 
-        for i in range(3):
-            h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
-                                filter_size=3, stride=1, act=None, name='fpn_down_h'+str(i))
-        for i in range(2):
-            if i == 0:
-                g[i] = conv_bn_layer(input=h[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g0')
-            else:
-                g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
-                g[i] = fluid.layers.relu(g[i])
-                g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], filter_size=3, stride=1, act='relu', name='fpn_down_g%d_1'%i)
-                g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g%d_2'%i)
-            # print("g[{}] shape: {}".format(i, g[i].shape)) 
-        g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2])
-        g[2] = fluid.layers.relu(g[2])
-        g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
-                            filter_size=3, stride=1, act='relu', name='fpn_down_fusion_1')
-        g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
-                            filter_size=1, stride=1, act=None, name='fpn_down_fusion_2')
-        return g[2]
-
-    def SAST_Header1(self, f_common):
-        """Detector header."""
-        #f_score
-        f_score = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_score1')
-        f_score = conv_bn_layer(input=f_score, num_filters=64, filter_size=3, stride=1, act='relu', name='f_score2')
-        f_score = conv_bn_layer(input=f_score, num_filters=128, filter_size=1, stride=1, act='relu', name='f_score3')
-        f_score = conv_bn_layer(input=f_score, num_filters=1, filter_size=3, stride=1, name='f_score4')
-        f_score = fluid.layers.sigmoid(f_score)
-        # print("f_score shape: {}".format(f_score.shape))
-
-        #f_boder
-        f_border = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_border1')
-        f_border = conv_bn_layer(input=f_border, num_filters=64, filter_size=3, stride=1, act='relu', name='f_border2')
-        f_border = conv_bn_layer(input=f_border, num_filters=128, filter_size=1, stride=1, act='relu', name='f_border3')
-        f_border = conv_bn_layer(input=f_border, num_filters=4, filter_size=3, stride=1, name='f_border4')
-        # print("f_border shape: {}".format(f_border.shape))
-        
-        return f_score, f_border
-
-    def SAST_Header2(self, f_common):
-        """Detector header.""" 
-        #f_tvo
-        f_tvo = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tvo1')
-        f_tvo = conv_bn_layer(input=f_tvo, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tvo2')
-        f_tvo = conv_bn_layer(input=f_tvo, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tvo3')
-        f_tvo = conv_bn_layer(input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
-        # print("f_tvo shape: {}".format(f_tvo.shape))
-
-        #f_tco
-        f_tco = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tco1')
-        f_tco = conv_bn_layer(input=f_tco, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tco2')
-        f_tco = conv_bn_layer(input=f_tco, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tco3')
-        f_tco = conv_bn_layer(input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
-        # print("f_tco shape: {}".format(f_tco.shape))
-        
-        return f_tvo, f_tco
-
-    def cross_attention(self, f_common):
-        """
-        """
-        f_shape = fluid.layers.shape(f_common)
-        f_theta = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_theta')
-        f_phi = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_phi')
-        f_g = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_g')
-        ### horizon
-        fh_theta = f_theta
-        fh_phi = f_phi
-        fh_g = f_g
-        #flatten
-        fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1])
-        fh_theta = fluid.layers.reshape(fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
-        fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1])
-        fh_phi = fluid.layers.reshape(fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
-        fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1])
-        fh_g = fluid.layers.reshape(fh_g, [f_shape[0] * f_shape[2], f_shape[3], 128])
-        #correlation
-        fh_attn = fluid.layers.matmul(fh_theta, fluid.layers.transpose(fh_phi, [0, 2, 1]))
-        #scale
-        fh_attn = fh_attn / (128 ** 0.5)
-        fh_attn = fluid.layers.softmax(fh_attn)
-        #weighted sum
-        fh_weight = fluid.layers.matmul(fh_attn, fh_g)
-        fh_weight = fluid.layers.reshape(fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
-        # print("fh_weight: {}".format(fh_weight.shape))
-        fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2])
-        fh_weight = conv_bn_layer(input=fh_weight, num_filters=128, filter_size=1, stride=1, name='fh_weight')
-        #short cut
-        fh_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fh_sc')
-        f_h = fluid.layers.relu(fh_weight + fh_sc)
-        ######
-        #vertical
-        fv_theta = fluid.layers.transpose(f_theta, [0, 1, 3, 2])
-        fv_phi = fluid.layers.transpose(f_phi, [0, 1, 3, 2])
-        fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2])
-        #flatten
-        fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1])
-        fv_theta = fluid.layers.reshape(fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
-        fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1])
-        fv_phi = fluid.layers.reshape(fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
-        fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1])
-        fv_g = fluid.layers.reshape(fv_g, [f_shape[0] * f_shape[3], f_shape[2], 128])
-        #correlation
-        fv_attn = fluid.layers.matmul(fv_theta, fluid.layers.transpose(fv_phi, [0, 2, 1]))
-        #scale
-        fv_attn = fv_attn / (128 ** 0.5)
-        fv_attn = fluid.layers.softmax(fv_attn)
-        #weighted sum
-        fv_weight = fluid.layers.matmul(fv_attn, fv_g)
-        fv_weight = fluid.layers.reshape(fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
-        # print("fv_weight: {}".format(fv_weight.shape))
-        fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1])
-        fv_weight = conv_bn_layer(input=fv_weight, num_filters=128, filter_size=1, stride=1, name='fv_weight')
-        #short cut
-        fv_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fv_sc')
-        f_v = fluid.layers.relu(fv_weight + fv_sc)
-        ######
-        f_attn = fluid.layers.concat([f_h, f_v], axis=1)
-        f_attn = conv_bn_layer(input=f_attn, num_filters=128, filter_size=1, stride=1, act='relu', name='f_attn')  
-        return f_attn
-        
-    def __call__(self, blocks, with_cab=False):
-        # for k, v in blocks.items():
-        #     print(k, v.shape)
-
-        #down fpn
-        f_down = self.FPN_Down_Fusion(blocks)
-        # print("f_down shape: {}".format(f_down.shape))
-        #up fpn
-        f_up = self.FPN_Up_Fusion(blocks)
-        # print("f_up shape: {}".format(f_up.shape))
-        #fusion
-        f_common = fluid.layers.elementwise_add(x=f_down, y=f_up)
-        f_common = fluid.layers.relu(f_common)
-        # print("f_common: {}".format(f_common.shape))
-        
-        if self.with_cab:
-            # print('enhence f_common with CAB.')
-            f_common = self.cross_attention(f_common)
-            
-        f_score, f_border= self.SAST_Header1(f_common)
-        f_tvo, f_tco = self.SAST_Header2(f_common)
-
-        predicts = OrderedDict()
-        predicts['f_score'] = f_score
-        predicts['f_border'] = f_border
-        predicts['f_tvo'] = f_tvo
-        predicts['f_tco'] = f_tco
-        return predicts
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ b/ppocr/modeling/heads/rec_attention_head.py
@ -1,237 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from .rec_seq_encoder import SequenceEncoder
-import numpy as np
-
-
-class AttentionPredict(object):
-    def __init__(self, params):
-        super(AttentionPredict, self).__init__()
-        self.char_num = params['char_num']
-        self.encoder = SequenceEncoder(params)
-        self.decoder_size = params['Attention']['decoder_size']
-        self.word_vector_dim = params['Attention']['word_vector_dim']
-        self.encoder_type = params['encoder_type']
-        self.max_length = params['max_text_length']
-
-    def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
-                         decoder_size):
-        decoder_state_proj = layers.fc(input=decoder_state,
-                                       size=decoder_size,
-                                       bias_attr=False,
-                                       name="decoder_state_proj_fc")
-        decoder_state_expand = layers.sequence_expand(
-            x=decoder_state_proj, y=encoder_proj)
-        concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
-        concated = layers.tanh(x=concated)
-        attention_weights = layers.fc(input=concated,
-                                      size=1,
-                                      act=None,
-                                      bias_attr=False,
-                                      name="attention_weights_fc")
-        attention_weights = layers.sequence_softmax(input=attention_weights)
-        weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
-        scaled = layers.elementwise_mul(
-            x=encoder_vec, y=weigths_reshape, axis=0)
-        context = layers.sequence_pool(input=scaled, pool_type='sum')
-        return context
-
-    def gru_decoder_with_attention(self, target_embedding, encoder_vec,
-                                   encoder_proj, decoder_boot, decoder_size,
-                                   char_num):
-        rnn = layers.DynamicRNN()
-        with rnn.block():
-            current_word = rnn.step_input(target_embedding)
-            encoder_vec = rnn.static_input(encoder_vec)
-            encoder_proj = rnn.static_input(encoder_proj)
-            hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
-            context = self.simple_attention(encoder_vec, encoder_proj,
-                                            hidden_mem, decoder_size)
-            fc_1 = layers.fc(input=context,
-                             size=decoder_size * 3,
-                             bias_attr=False,
-                             name="rnn_fc1")
-            fc_2 = layers.fc(input=current_word,
-                             size=decoder_size * 3,
-                             bias_attr=False,
-                             name="rnn_fc2")
-            decoder_inputs = fc_1 + fc_2
-            h, _, _ = layers.gru_unit(
-                input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
-            rnn.update_memory(hidden_mem, h)
-            out = layers.fc(input=h,
-                            size=char_num,
-                            bias_attr=True,
-                            act='softmax',
-                            name="rnn_out_fc")
-            rnn.output(out)
-        return rnn()
-
-    def gru_attention_infer(self, decoder_boot, max_length, char_num,
-                            word_vector_dim, encoded_vector, encoded_proj,
-                            decoder_size):
-        init_state = decoder_boot
-        beam_size = 1
-        array_len = layers.fill_constant(
-            shape=[1], dtype='int64', value=max_length)
-        counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)
-
-        # fill the first element with init_state
-        state_array = layers.create_array('float32')
-        layers.array_write(init_state, array=state_array, i=counter)
-
-        # ids, scores as memory
-        ids_array = layers.create_array('int64')
-        scores_array = layers.create_array('float32')
-        rois_shape = layers.shape(init_state)
-        batch_size = layers.slice(
-            rois_shape, axes=[0], starts=[0], ends=[1]) + 1
-        lod_level = layers.range(
-            start=0, end=batch_size, step=1, dtype=batch_size.dtype)
-
-        init_ids = layers.fill_constant_batch_size_like(
-            input=init_state, shape=[-1, 1], value=0, dtype='int64')
-        init_ids = layers.lod_reset(init_ids, lod_level)
-        init_ids = layers.lod_append(init_ids, lod_level)
-
-        init_scores = layers.fill_constant_batch_size_like(
-            input=init_state, shape=[-1, 1], value=1, dtype='float32')
-        init_scores = layers.lod_reset(init_scores, init_ids)
-        layers.array_write(init_ids, array=ids_array, i=counter)
-        layers.array_write(init_scores, array=scores_array, i=counter)
-
-        full_ids = fluid.layers.fill_constant_batch_size_like(
-            input=init_state, shape=[-1, 1], dtype='int64', value=1)
-        full_scores = fluid.layers.fill_constant_batch_size_like(
-            input=init_state, shape=[-1, 1], dtype='float32', value=1)
-
-        cond = layers.less_than(x=counter, y=array_len)
-        while_op = layers.While(cond=cond)
-        with while_op.block():
-            pre_ids = layers.array_read(array=ids_array, i=counter)
-            pre_state = layers.array_read(array=state_array, i=counter)
-            pre_score = layers.array_read(array=scores_array, i=counter)
-            pre_ids_emb = layers.embedding(
-                input=pre_ids,
-                size=[char_num, word_vector_dim],
-                dtype='float32')
-
-            context = self.simple_attention(encoded_vector, encoded_proj,
-                                            pre_state, decoder_size)
-
-            # expand the recursive_sequence_lengths of pre_state 
-            # to be the same with pre_score
-            pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
-            context_expanded = layers.sequence_expand(context, pre_score)
-
-            fc_1 = layers.fc(input=context_expanded,
-                             size=decoder_size * 3,
-                             bias_attr=False,
-                             name="rnn_fc1")
-
-            fc_2 = layers.fc(input=pre_ids_emb,
-                             size=decoder_size * 3,
-                             bias_attr=False,
-                             name="rnn_fc2")
-
-            decoder_inputs = fc_1 + fc_2
-            current_state, _, _ = layers.gru_unit(
-                input=decoder_inputs,
-                hidden=pre_state_expanded,
-                size=decoder_size * 3)
-            current_state_with_lod = layers.lod_reset(
-                x=current_state, y=pre_score)
-            # use score to do beam search
-            current_score = layers.fc(input=current_state_with_lod,
-                                      size=char_num,
-                                      bias_attr=True,
-                                      act='softmax',
-                                      name="rnn_out_fc")
-            topk_scores, topk_indices = layers.topk(current_score, k=beam_size)
-
-            new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
-            fluid.layers.assign(new_ids, full_ids)
-
-            new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
-            fluid.layers.assign(new_scores, full_scores)
-            
-            layers.increment(x=counter, value=1, in_place=True)
-
-            # update the memories
-            layers.array_write(current_state, array=state_array, i=counter)
-            layers.array_write(topk_indices, array=ids_array, i=counter)
-            layers.array_write(topk_scores, array=scores_array, i=counter)
-
-            # update the break condition: 
-            # up to the max length or all candidates of
-            # source sentences have ended.
-            length_cond = layers.less_than(x=counter, y=array_len)
-            finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
-            layers.logical_and(x=length_cond, y=finish_cond, out=cond)
-        return full_ids, full_scores
-
-    def __call__(self, inputs, labels=None, mode=None):
-        encoder_features = self.encoder(inputs)
-        char_num = self.char_num
-        word_vector_dim = self.word_vector_dim
-        decoder_size = self.decoder_size
-
-        if self.encoder_type == "reshape":
-            encoder_input = encoder_features
-            encoded_vector = encoder_features
-        else:
-            encoder_input = encoder_features[1]
-            encoded_vector = layers.concat(encoder_features, axis=1)
-        encoded_proj = layers.fc(input=encoded_vector,
-                                 size=decoder_size,
-                                 bias_attr=False,
-                                 name="encoded_proj_fc")
-        backward_first = layers.sequence_pool(
-            input=encoder_input, pool_type='first')
-        decoder_boot = layers.fc(input=backward_first,
-                                 size=decoder_size,
-                                 bias_attr=False,
-                                 act="relu",
-                                 name='decoder_boot')
-
-        if mode == "train":
-            label_in = labels['label_in']
-            label_out = labels['label_out']
-            label_in = layers.cast(x=label_in, dtype='int64')
-            trg_embedding = layers.embedding(
-                input=label_in,
-                size=[char_num, word_vector_dim],
-                dtype='float32')
-            predict = self.gru_decoder_with_attention(
-                trg_embedding, encoded_vector, encoded_proj, decoder_boot,
-                decoder_size, char_num)
-            _, decoded_out = layers.topk(input=predict, k=1)
-            decoded_out = layers.lod_reset(decoded_out, y=label_out)
-            predicts = {'predict':predict, 'decoded_out':decoded_out}
-        else:
-            ids, predict = self.gru_attention_infer(
-                decoder_boot, self.max_length, char_num, word_vector_dim,
-                encoded_vector, encoded_proj, decoder_size)
-            predicts = {'predict':predict, 'decoded_out':ids}
-        return predicts
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@ -1,16 +1,16 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
@ -19,34 +19,33 @@ from __future__ import print_function
 import math

 import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from .rec_seq_encoder import SequenceEncoder
-from ..common_functions import get_para_bias_attr
-import numpy as np
+from paddle import ParamAttr, nn


-class CTCPredict(object):
-    def __init__(self, params):
-        super(CTCPredict, self).__init__()
-        self.char_num = params['char_num']
-        self.encoder = SequenceEncoder(params)
-        self.encoder_type = params['encoder_type']
-        self.fc_decay = params.get("fc_decay", 0.0004)
+def get_para_bias_attr(l2_decay, k, name):
+    regularizer = paddle.fluid.regularizer.L2Decay(l2_decay)
+    stdv = 1.0 / math.sqrt(k * 1.0)
+    initializer = nn.initializer.Uniform(-stdv, stdv)
+    weight_attr = ParamAttr(
+        regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
+    bias_attr = ParamAttr(
+        regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
+    return [weight_attr, bias_attr]

-    def __call__(self, inputs, labels=None, mode=None):
-        encoder_features = self.encoder(inputs)
-        if self.encoder_type != "reshape":
-            encoder_features = fluid.layers.concat(encoder_features, axis=1)
-        name = "ctc_fc"
-        para_attr, bias_attr = get_para_bias_attr(
-            l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
-        predict = fluid.layers.fc(input=encoder_features,
-                                  size=self.char_num + 1,
-                                  param_attr=para_attr,
-                                  bias_attr=bias_attr,
-                                  name=name)
-        decoded_out = fluid.layers.ctc_greedy_decoder(
-            input=predict, blank=self.char_num)
-        predicts = {'predict': predict, 'decoded_out': decoded_out}
+
+class CTC(nn.Layer):
+    def __init__(self, in_channels, out_channels, fc_decay=1e-5, **kwargs):
+        super(CTC, self).__init__()
+        weight_attr, bias_attr = get_para_bias_attr(
+            l2_decay=fc_decay, k=in_channels, name='ctc_fc')
+        self.fc = nn.Linear(
+            in_channels,
+            out_channels,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            name='ctc_fc')
+        self.out_channels = out_channels
+
+    def forward(self, x, labels=None):
+        predicts = self.fc(x)
        return predicts
--- a/ppocr/modeling/heads/rec_seq_encoder.py
+++ b/ppocr/modeling/heads/rec_seq_encoder.py
@ -1,100 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-
-class EncoderWithReshape(object):
-    def __init__(self, params):
-        super(EncoderWithReshape, self).__init__()
-
-    def __call__(self, inputs):
-        sliced_feature = layers.im2sequence(
-            input=inputs,
-            stride=[1, 1],
-            filter_size=[inputs.shape[2], 1],
-            name="sliced_feature")
-        return sliced_feature
-
-
-class EncoderWithRNN(object):
-    def __init__(self, params):
-        super(EncoderWithRNN, self).__init__()
-        self.rnn_hidden_size = params['SeqRNN']['hidden_size']
-
-    def __call__(self, inputs):
-        lstm_list = []
-        name_prefix = "lstm"
-        rnn_hidden_size = self.rnn_hidden_size
-        for no in range(1, 3):
-            if no == 1:
-                is_reverse = False
-            else:
-                is_reverse = True
-            name = "%s_st1_fc%d" % (name_prefix, no)
-            fc = layers.fc(input=inputs,
-                           size=rnn_hidden_size * 4,
-                           param_attr=fluid.ParamAttr(name=name + "_w"),
-                           bias_attr=fluid.ParamAttr(name=name + "_b"),
-                           name=name)
-            name = "%s_st1_out%d" % (name_prefix, no)
-            lstm, _ = layers.dynamic_lstm(
-                input=fc,
-                size=rnn_hidden_size * 4,
-                is_reverse=is_reverse,
-                param_attr=fluid.ParamAttr(name=name + "_w"),
-                bias_attr=fluid.ParamAttr(name=name + "_b"),
-                use_peepholes=False)
-            name = "%s_st2_fc%d" % (name_prefix, no)
-            fc = layers.fc(input=lstm,
-                           size=rnn_hidden_size * 4,
-                           param_attr=fluid.ParamAttr(name=name + "_w"),
-                           bias_attr=fluid.ParamAttr(name=name + "_b"),
-                           name=name)
-            name = "%s_st2_out%d" % (name_prefix, no)
-            lstm, _ = layers.dynamic_lstm(
-                input=fc,
-                size=rnn_hidden_size * 4,
-                is_reverse=is_reverse,
-                param_attr=fluid.ParamAttr(name=name + "_w"),
-                bias_attr=fluid.ParamAttr(name=name + "_b"),
-                use_peepholes=False)
-            lstm_list.append(lstm)
-        return lstm_list
-
-
-class SequenceEncoder(object):
-    def __init__(self, params):
-        super(SequenceEncoder, self).__init__()
-        self.encoder_type = params['encoder_type']
-        self.encoder_reshape = EncoderWithReshape(params)
-        if self.encoder_type == "rnn":
-            self.encoder_rnn = EncoderWithRNN(params)
-
-    def __call__(self, inputs):
-        if self.encoder_type == "reshape":
-            encoder_features = self.encoder_reshape(inputs)
-        elif self.encoder_type == "rnn":
-            inputs = self.encoder_reshape(inputs)
-            encoder_features = self.encoder_rnn(inputs)
-        else:
-            assert False, "Unsupport encoder_type:%s"\
-                % self.encoder_type
-        return encoder_features
--- a/ppocr/modeling/heads/rec_srn_all_head.py
+++ b/ppocr/modeling/heads/rec_srn_all_head.py
@ -1,230 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-from .self_attention.model import wrap_encoder
-from .self_attention.model import wrap_encoder_forFeature
-gradient_clip = 10
-
-
-class SRNPredict(object):
-    def __init__(self, params):
-        super(SRNPredict, self).__init__()
-        self.char_num = params['char_num']
-        self.max_length = params['max_text_length']
-
-        self.num_heads = params['num_heads']
-        self.num_encoder_TUs = params['num_encoder_TUs']
-        self.num_decoder_TUs = params['num_decoder_TUs']
-        self.hidden_dims = params['hidden_dims']
-
-    def pvam(self, inputs, others):
-
-        b, c, h, w = inputs.shape
-        conv_features = fluid.layers.reshape(x=inputs, shape=[-1, c, h * w])
-        conv_features = fluid.layers.transpose(x=conv_features, perm=[0, 2, 1])
-
-        #===== Transformer encoder =====
-        b, t, c = conv_features.shape
-        encoder_word_pos = others["encoder_word_pos"]
-        gsrm_word_pos = others["gsrm_word_pos"]
-
-        enc_inputs = [conv_features, encoder_word_pos, None]
-        word_features = wrap_encoder_forFeature(
-            src_vocab_size=-1,
-            max_length=t,
-            n_layer=self.num_encoder_TUs,
-            n_head=self.num_heads,
-            d_key=int(self.hidden_dims / self.num_heads),
-            d_value=int(self.hidden_dims / self.num_heads),
-            d_model=self.hidden_dims,
-            d_inner_hid=self.hidden_dims,
-            prepostprocess_dropout=0.1,
-            attention_dropout=0.1,
-            relu_dropout=0.1,
-            preprocess_cmd="n",
-            postprocess_cmd="da",
-            weight_sharing=True,
-            enc_inputs=enc_inputs, )
-        fluid.clip.set_gradient_clip(
-            fluid.clip.GradientClipByValue(gradient_clip))
-
-        #===== Parallel Visual Attention Module =====
-        b, t, c = word_features.shape
-
-        word_features = fluid.layers.fc(word_features, c, num_flatten_dims=2)
-        word_features_ = fluid.layers.reshape(word_features, [-1, 1, t, c])
-        word_features_ = fluid.layers.expand(word_features_,
-                                             [1, self.max_length, 1, 1])
-        word_pos_feature = fluid.layers.embedding(gsrm_word_pos,
-                                                  [self.max_length, c])
-        word_pos_ = fluid.layers.reshape(word_pos_feature,
-                                         [-1, self.max_length, 1, c])
-        word_pos_ = fluid.layers.expand(word_pos_, [1, 1, t, 1])
-        temp = fluid.layers.elementwise_add(
-            word_features_, word_pos_, act='tanh')
-
-        attention_weight = fluid.layers.fc(input=temp,
-                                           size=1,
-                                           num_flatten_dims=3,
-                                           bias_attr=False)
-        attention_weight = fluid.layers.reshape(
-            x=attention_weight, shape=[-1, self.max_length, t])
-        attention_weight = fluid.layers.softmax(input=attention_weight, axis=-1)
-
-        pvam_features = fluid.layers.matmul(attention_weight,
-                                            word_features)  #[b, max_length, c]
-
-        return pvam_features
-
-    def gsrm(self, pvam_features, others):
-
-        #===== GSRM Visual-to-semantic embedding block =====
-        b, t, c = pvam_features.shape
-        word_out = fluid.layers.fc(
-            input=fluid.layers.reshape(pvam_features, [-1, c]),
-            size=self.char_num,
-            act="softmax")
-        #word_out.stop_gradient = True
-        word_ids = fluid.layers.argmax(word_out, axis=1)
-        word_ids.stop_gradient = True
-        word_ids = fluid.layers.reshape(x=word_ids, shape=[-1, t, 1])
-
-        #===== GSRM Semantic reasoning block =====
-        """
-        This module is achieved through bi-transformers,
-        ngram_feature1 is the froward one, ngram_fetaure2 is the backward one
-        """
-        pad_idx = self.char_num
-        gsrm_word_pos = others["gsrm_word_pos"]
-        gsrm_slf_attn_bias1 = others["gsrm_slf_attn_bias1"]
-        gsrm_slf_attn_bias2 = others["gsrm_slf_attn_bias2"]
-
-        def prepare_bi(word_ids):
-            """
-            prepare bi for gsrm
-            word1 for forward; word2 for backward
-            """
-            word1 = fluid.layers.cast(word_ids, "float32")
-            word1 = fluid.layers.pad(word1, [0, 0, 1, 0, 0, 0],
-                                     pad_value=1.0 * pad_idx)
-            word1 = fluid.layers.cast(word1, "int64")
-            word1 = word1[:, :-1, :]
-            word2 = word_ids
-            return word1, word2
-
-        word1, word2 = prepare_bi(word_ids)
-        word1.stop_gradient = True
-        word2.stop_gradient = True
-        enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1]
-        enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2]
-
-        gsrm_feature1 = wrap_encoder(
-            src_vocab_size=self.char_num + 1,
-            max_length=self.max_length,
-            n_layer=self.num_decoder_TUs,
-            n_head=self.num_heads,
-            d_key=int(self.hidden_dims / self.num_heads),
-            d_value=int(self.hidden_dims / self.num_heads),
-            d_model=self.hidden_dims,
-            d_inner_hid=self.hidden_dims,
-            prepostprocess_dropout=0.1,
-            attention_dropout=0.1,
-            relu_dropout=0.1,
-            preprocess_cmd="n",
-            postprocess_cmd="da",
-            weight_sharing=True,
-            enc_inputs=enc_inputs_1, )
-        gsrm_feature2 = wrap_encoder(
-            src_vocab_size=self.char_num + 1,
-            max_length=self.max_length,
-            n_layer=self.num_decoder_TUs,
-            n_head=self.num_heads,
-            d_key=int(self.hidden_dims / self.num_heads),
-            d_value=int(self.hidden_dims / self.num_heads),
-            d_model=self.hidden_dims,
-            d_inner_hid=self.hidden_dims,
-            prepostprocess_dropout=0.1,
-            attention_dropout=0.1,
-            relu_dropout=0.1,
-            preprocess_cmd="n",
-            postprocess_cmd="da",
-            weight_sharing=True,
-            enc_inputs=enc_inputs_2, )
-        gsrm_feature2 = fluid.layers.pad(gsrm_feature2, [0, 0, 0, 1, 0, 0],
-                                         pad_value=0.)
-        gsrm_feature2 = gsrm_feature2[:, 1:, ]
-        gsrm_features = gsrm_feature1 + gsrm_feature2
-
-        b, t, c = gsrm_features.shape
-
-        gsrm_out = fluid.layers.matmul(
-            x=gsrm_features,
-            y=fluid.default_main_program().global_block().var(
-                "src_word_emb_table"),
-            transpose_y=True)
-        b, t, c = gsrm_out.shape
-        gsrm_out = fluid.layers.softmax(input=fluid.layers.reshape(gsrm_out,
-                                                                   [-1, c]))
-
-        return gsrm_features, word_out, gsrm_out
-
-    def vsfd(self, pvam_features, gsrm_features):
-
-        #===== Visual-Semantic Fusion Decoder Module =====
-        b, t, c1 = pvam_features.shape
-        b, t, c2 = gsrm_features.shape
-        combine_features_ = fluid.layers.concat(
-            [pvam_features, gsrm_features], axis=2)
-        img_comb_features_ = fluid.layers.reshape(
-            x=combine_features_, shape=[-1, c1 + c2])
-        img_comb_features_map = fluid.layers.fc(input=img_comb_features_,
-                                                size=c1,
-                                                act="sigmoid")
-        img_comb_features_map = fluid.layers.reshape(
-            x=img_comb_features_map, shape=[-1, t, c1])
-        combine_features = img_comb_features_map * pvam_features + (
-            1.0 - img_comb_features_map) * gsrm_features
-        img_comb_features = fluid.layers.reshape(
-            x=combine_features, shape=[-1, c1])
-
-        fc_out = fluid.layers.fc(input=img_comb_features,
-                                 size=self.char_num,
-                                 act="softmax")
-        return fc_out
-
-    def __call__(self, inputs, others, mode=None):
-
-        pvam_features = self.pvam(inputs, others)
-        gsrm_features, word_out, gsrm_out = self.gsrm(pvam_features, others)
-        final_out = self.vsfd(pvam_features, gsrm_features)
-
-        _, decoded_out = fluid.layers.topk(input=final_out, k=1)
-        predicts = {
-            'predict': final_out,
-            'decoded_out': decoded_out,
-            'word_out': word_out,
-            'gsrm_out': gsrm_out
-        }
-
-        return predicts
--- a/ppocr/modeling/heads/self_attention/init.py
+++ b/ppocr/modeling/heads/self_attention/init.py
--- a/ppocr/modeling/heads/self_attention/model.py
+++ b/ppocr/modeling/heads/self_attention/model.py
@ -1,485 +0,0 @@
-from functools import partial
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-encoder_data_input_fields = (
-    "src_word",
-    "src_pos",
-    "src_slf_attn_bias", )
-
-
-def wrap_layer_with_block(layer, block_idx):
-    """
-    Make layer define support indicating block, by which we can add layers
-    to other blocks within current block. This will make it easy to define
-    cache among while loop.
-    """
-
-    class BlockGuard(object):
-        """
-        BlockGuard class.
-
-        BlockGuard class is used to switch to the given block in a program by
-        using the Python `with` keyword.
-        """
-
-        def __init__(self, block_idx=None, main_program=None):
-            self.main_program = fluid.default_main_program(
-            ) if main_program is None else main_program
-            self.old_block_idx = self.main_program.current_block().idx
-            self.new_block_idx = block_idx
-
-        def __enter__(self):
-            self.main_program.current_block_idx = self.new_block_idx
-
-        def __exit__(self, exc_type, exc_val, exc_tb):
-            self.main_program.current_block_idx = self.old_block_idx
-            if exc_type is not None:
-                return False  # re-raise exception
-            return True
-
-    def layer_wrapper(*args, **kwargs):
-        with BlockGuard(block_idx):
-            return layer(*args, **kwargs)
-
-    return layer_wrapper
-
-
-def multi_head_attention(queries,
-                         keys,
-                         values,
-                         attn_bias,
-                         d_key,
-                         d_value,
-                         d_model,
-                         n_head=1,
-                         dropout_rate=0.,
-                         cache=None,
-                         gather_idx=None,
-                         static_kv=False):
-    """
-    Multi-Head Attention. Note that attn_bias is added to the logit before
-    computing softmax activiation to mask certain selected positions so that
-    they will not considered in attention weights.
-    """
-    keys = queries if keys is None else keys
-    values = keys if values is None else values
-
-    if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
-        raise ValueError(
-            "Inputs: quries, keys and values should all be 3-D tensors.")
-
-    def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
-        """
-        Add linear projection to queries, keys, and values.
-        """
-        q = layers.fc(input=queries,
-                      size=d_key * n_head,
-                      bias_attr=False,
-                      num_flatten_dims=2)
-        # For encoder-decoder attention in inference, insert the ops and vars
-        # into global block to use as cache among beam search.
-        fc_layer = wrap_layer_with_block(
-            layers.fc, fluid.default_main_program().current_block()
-            .parent_idx) if cache is not None and static_kv else layers.fc
-        k = fc_layer(
-            input=keys,
-            size=d_key * n_head,
-            bias_attr=False,
-            num_flatten_dims=2)
-        v = fc_layer(
-            input=values,
-            size=d_value * n_head,
-            bias_attr=False,
-            num_flatten_dims=2)
-        return q, k, v
-
-    def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value):
-        """
-        Reshape input tensors at the last dimension to split multi-heads
-        and then transpose. Specifically, transform the input tensor with shape
-        [bs, max_sequence_length, n_head * hidden_dim] to the output tensor
-        with shape [bs, n_head, max_sequence_length, hidden_dim].
-        """
-        # The value 0 in shape attr means copying the corresponding dimension
-        # size of the input as the output dimension size.
-        reshaped_q = layers.reshape(
-            x=queries, shape=[0, 0, n_head, d_key], inplace=True)
-        # permuate the dimensions into:
-        # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
-        q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
-        # For encoder-decoder attention in inference, insert the ops and vars
-        # into global block to use as cache among beam search.
-        reshape_layer = wrap_layer_with_block(
-            layers.reshape,
-            fluid.default_main_program().current_block()
-            .parent_idx) if cache is not None and static_kv else layers.reshape
-        transpose_layer = wrap_layer_with_block(
-            layers.transpose,
-            fluid.default_main_program().current_block().
-            parent_idx) if cache is not None and static_kv else layers.transpose
-        reshaped_k = reshape_layer(
-            x=keys, shape=[0, 0, n_head, d_key], inplace=True)
-        k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3])
-        reshaped_v = reshape_layer(
-            x=values, shape=[0, 0, n_head, d_value], inplace=True)
-        v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3])
-
-        if cache is not None:  # only for faster inference
-            if static_kv:  # For encoder-decoder attention in inference
-                cache_k, cache_v = cache["static_k"], cache["static_v"]
-                # To init the static_k and static_v in cache.
-                # Maybe we can use condition_op(if_else) to do these at the first
-                # step in while loop to replace these, however it might be less
-                # efficient.
-                static_cache_init = wrap_layer_with_block(
-                    layers.assign,
-                    fluid.default_main_program().current_block().parent_idx)
-                static_cache_init(k, cache_k)
-                static_cache_init(v, cache_v)
-            else:  # For decoder self-attention in inference
-                cache_k, cache_v = cache["k"], cache["v"]
-            # gather cell states corresponding to selected parent
-            select_k = layers.gather(cache_k, index=gather_idx)
-            select_v = layers.gather(cache_v, index=gather_idx)
-            if not static_kv:
-                # For self attention in inference, use cache and concat time steps.
-                select_k = layers.concat([select_k, k], axis=2)
-                select_v = layers.concat([select_v, v], axis=2)
-            # update cell states(caches) cached in global block
-            layers.assign(select_k, cache_k)
-            layers.assign(select_v, cache_v)
-            return q, select_k, select_v
-        return q, k, v
-
-    def __combine_heads(x):
-        """
-        Transpose and then reshape the last two dimensions of inpunt tensor x
-        so that it becomes one dimension, which is reverse to __split_heads.
-        """
-        if len(x.shape) != 4:
-            raise ValueError("Input(x) should be a 4-D Tensor.")
-
-        trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
-        # The value 0 in shape attr means copying the corresponding dimension
-        # size of the input as the output dimension size.
-        return layers.reshape(
-            x=trans_x,
-            shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
-            inplace=True)
-
-    def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
-        """
-        Scaled Dot-Product Attention
-        """
-        # print(q)
-        # print(k)
-
-        product = layers.matmul(x=q, y=k, transpose_y=True, alpha=d_key**-0.5)
-        if attn_bias:
-            product += attn_bias
-        weights = layers.softmax(product)
-        if dropout_rate:
-            weights = layers.dropout(
-                weights, dropout_prob=dropout_rate, seed=None, is_test=False)
-        out = layers.matmul(weights, v)
-        return out
-
-    q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
-    q, k, v = __split_heads_qkv(q, k, v, n_head, d_key, d_value)
-
-    ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_model,
-                                                  dropout_rate)
-
-    out = __combine_heads(ctx_multiheads)
-
-    # Project back to the model size.
-    proj_out = layers.fc(input=out,
-                         size=d_model,
-                         bias_attr=False,
-                         num_flatten_dims=2)
-    return proj_out
-
-
-def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate):
-    """
-    Position-wise Feed-Forward Networks.
-    This module consists of two linear transformations with a ReLU activation
-    in between, which is applied to each position separately and identically.
-    """
-    hidden = layers.fc(input=x,
-                       size=d_inner_hid,
-                       num_flatten_dims=2,
-                       act="relu")
-    if dropout_rate:
-        hidden = layers.dropout(
-            hidden, dropout_prob=dropout_rate, seed=None, is_test=False)
-    out = layers.fc(input=hidden, size=d_hid, num_flatten_dims=2)
-    return out
-
-
-def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.):
-    """
-    Add residual connection, layer normalization and droput to the out tensor
-    optionally according to the value of process_cmd.
-    This will be used before or after multi-head attention and position-wise
-    feed-forward networks.
-    """
-    for cmd in process_cmd:
-        if cmd == "a":  # add residual connection
-            out = out + prev_out if prev_out else out
-        elif cmd == "n":  # add layer normalization
-            out = layers.layer_norm(
-                out,
-                begin_norm_axis=len(out.shape) - 1,
-                param_attr=fluid.initializer.Constant(1.),
-                bias_attr=fluid.initializer.Constant(0.))
-        elif cmd == "d":  # add dropout
-            if dropout_rate:
-                out = layers.dropout(
-                    out, dropout_prob=dropout_rate, seed=None, is_test=False)
-    return out
-
-
-pre_process_layer = partial(pre_post_process_layer, None)
-post_process_layer = pre_post_process_layer
-
-
-def prepare_encoder(
-        src_word,  # [b,t,c]
-        src_pos,
-        src_vocab_size,
-        src_emb_dim,
-        src_max_len,
-        dropout_rate=0.,
-        bos_idx=0,
-        word_emb_param_name=None,
-        pos_enc_param_name=None):
-    """Add word embeddings and position encodings.
-    The output tensor has a shape of:
-    [batch_size, max_src_length_in_batch, d_model].
-    This module is used at the bottom of the encoder stacks.
-    """
-
-    src_word_emb = src_word
-    src_word_emb = layers.cast(src_word_emb, 'float32')
-
-    src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
-    src_pos_enc = layers.embedding(
-        src_pos,
-        size=[src_max_len, src_emb_dim],
-        param_attr=fluid.ParamAttr(
-            name=pos_enc_param_name, trainable=False))
-    src_pos_enc.stop_gradient = True
-    enc_input = src_word_emb + src_pos_enc
-    return layers.dropout(
-        enc_input, dropout_prob=dropout_rate, seed=None,
-        is_test=False) if dropout_rate else enc_input
-
-
-def prepare_decoder(src_word,
-                    src_pos,
-                    src_vocab_size,
-                    src_emb_dim,
-                    src_max_len,
-                    dropout_rate=0.,
-                    bos_idx=0,
-                    word_emb_param_name=None,
-                    pos_enc_param_name=None):
-    """Add word embeddings and position encodings.
-        The output tensor has a shape of:
-        [batch_size, max_src_length_in_batch, d_model].
-        This module is used at the bottom of the encoder stacks.
-        """
-    src_word_emb = layers.embedding(
-        src_word,
-        size=[src_vocab_size, src_emb_dim],
-        padding_idx=bos_idx,  # set embedding of bos to 0
-        param_attr=fluid.ParamAttr(
-            name=word_emb_param_name,
-            initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5)))
-
-    src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
-    src_pos_enc = layers.embedding(
-        src_pos,
-        size=[src_max_len, src_emb_dim],
-        param_attr=fluid.ParamAttr(
-            name=pos_enc_param_name, trainable=False))
-    src_pos_enc.stop_gradient = True
-    enc_input = src_word_emb + src_pos_enc
-    return layers.dropout(
-        enc_input, dropout_prob=dropout_rate, seed=None,
-        is_test=False) if dropout_rate else enc_input
-
-
-def encoder_layer(enc_input,
-                  attn_bias,
-                  n_head,
-                  d_key,
-                  d_value,
-                  d_model,
-                  d_inner_hid,
-                  prepostprocess_dropout,
-                  attention_dropout,
-                  relu_dropout,
-                  preprocess_cmd="n",
-                  postprocess_cmd="da"):
-    """The encoder layers that can be stacked to form a deep encoder.
-    This module consits of a multi-head (self) attention followed by
-    position-wise feed-forward networks and both the two components companied
-    with the post_process_layer to add residual connection, layer normalization
-    and droput.
-    """
-    attn_output = multi_head_attention(
-        pre_process_layer(enc_input, preprocess_cmd,
-                          prepostprocess_dropout), None, None, attn_bias, d_key,
-        d_value, d_model, n_head, attention_dropout)
-    attn_output = post_process_layer(enc_input, attn_output, postprocess_cmd,
-                                     prepostprocess_dropout)
-    ffd_output = positionwise_feed_forward(
-        pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout),
-        d_inner_hid, d_model, relu_dropout)
-    return post_process_layer(attn_output, ffd_output, postprocess_cmd,
-                              prepostprocess_dropout)
-
-
-def encoder(enc_input,
-            attn_bias,
-            n_layer,
-            n_head,
-            d_key,
-            d_value,
-            d_model,
-            d_inner_hid,
-            prepostprocess_dropout,
-            attention_dropout,
-            relu_dropout,
-            preprocess_cmd="n",
-            postprocess_cmd="da"):
-    """
-    The encoder is composed of a stack of identical layers returned by calling
-    encoder_layer.
-    """
-    for i in range(n_layer):
-        enc_output = encoder_layer(
-            enc_input,
-            attn_bias,
-            n_head,
-            d_key,
-            d_value,
-            d_model,
-            d_inner_hid,
-            prepostprocess_dropout,
-            attention_dropout,
-            relu_dropout,
-            preprocess_cmd,
-            postprocess_cmd, )
-        enc_input = enc_output
-    enc_output = pre_process_layer(enc_output, preprocess_cmd,
-                                   prepostprocess_dropout)
-    return enc_output
-
-
-def wrap_encoder_forFeature(src_vocab_size,
-                            max_length,
-                            n_layer,
-                            n_head,
-                            d_key,
-                            d_value,
-                            d_model,
-                            d_inner_hid,
-                            prepostprocess_dropout,
-                            attention_dropout,
-                            relu_dropout,
-                            preprocess_cmd,
-                            postprocess_cmd,
-                            weight_sharing,
-                            enc_inputs=None,
-                            bos_idx=0):
-    """
-    The wrapper assembles together all needed layers for the encoder.
-    img, src_pos, src_slf_attn_bias = enc_inputs
-    img
-    """
-
-    conv_features, src_pos, src_slf_attn_bias = enc_inputs  #
-    b, t, c = conv_features.shape
-
-    enc_input = prepare_encoder(
-        conv_features,
-        src_pos,
-        src_vocab_size,
-        d_model,
-        max_length,
-        prepostprocess_dropout,
-        bos_idx=bos_idx,
-        word_emb_param_name="src_word_emb_table")
-
-    enc_output = encoder(
-        enc_input,
-        src_slf_attn_bias,
-        n_layer,
-        n_head,
-        d_key,
-        d_value,
-        d_model,
-        d_inner_hid,
-        prepostprocess_dropout,
-        attention_dropout,
-        relu_dropout,
-        preprocess_cmd,
-        postprocess_cmd, )
-    return enc_output
-
-
-def wrap_encoder(src_vocab_size,
-                 max_length,
-                 n_layer,
-                 n_head,
-                 d_key,
-                 d_value,
-                 d_model,
-                 d_inner_hid,
-                 prepostprocess_dropout,
-                 attention_dropout,
-                 relu_dropout,
-                 preprocess_cmd,
-                 postprocess_cmd,
-                 weight_sharing,
-                 enc_inputs=None,
-                 bos_idx=0):
-    """
-    The wrapper assembles together all needed layers for the encoder.
-    img, src_pos, src_slf_attn_bias = enc_inputs
-    img
-    """
-
-    src_word, src_pos, src_slf_attn_bias = enc_inputs  #
-
-    enc_input = prepare_decoder(
-        src_word,
-        src_pos,
-        src_vocab_size,
-        d_model,
-        max_length,
-        prepostprocess_dropout,
-        bos_idx=bos_idx,
-        word_emb_param_name="src_word_emb_table")
-
-    enc_output = encoder(
-        enc_input,
-        src_slf_attn_bias,
-        n_layer,
-        n_head,
-        d_key,
-        d_value,
-        d_model,
-        d_inner_hid,
-        prepostprocess_dropout,
-        attention_dropout,
-        relu_dropout,
-        preprocess_cmd,
-        postprocess_cmd, )
-    return enc_output
--- a/ppocr/modeling/losses/init.py
+++ b/ppocr/modeling/losses/init.py
@ -11,3 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import copy
+
+
+def build_loss(config):
+    # det loss
+    from .det_db_loss import DBLoss
+
+    # rec loss
+    from .rec_ctc_loss import CTCLoss
+
+    support_dict = ['DBLoss', 'CTCLoss']
+
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception('loss only support {}'.format(
+        support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/losses/det_basic_loss.py
+++ b/ppocr/modeling/losses/det_basic_loss.py
@ -1,16 +1,16 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
@ -18,99 +18,189 @@ from __future__ import print_function

 import numpy as np

-import paddle.fluid as fluid
+import paddle
+from paddle import nn
+import paddle.nn.functional as F


-def BalanceLoss(pred,
-                gt,
-                mask,
-                balance_loss=True,
-                main_loss_type="DiceLoss",
-                negative_ratio=3,
-                return_origin=False,
-                eps=1e-6):
-    """
-    The BalanceLoss for Differentiable Binarization text detection
-    args:
-        pred (variable): predicted feature maps.
-        gt (variable): ground truth feature maps.
-        mask (variable): masked maps.
-        balance_loss (bool): whether balance loss or not, default is True
-        main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
-            'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
-        negative_ratio (int|float): float, default is 3.
-        return_origin (bool): whether return unbalanced loss or not, default is False.
-        eps (float): default is 1e-6.
-    return: (variable) balanced loss
-    """
-    positive = gt * mask
-    negative = (1 - gt) * mask
+class BalanceLoss(nn.Layer):
+    def __init__(self,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 negative_ratio=3,
+                 return_origin=False,
+                 eps=1e-6,
+                 **kwargs):
+        """
+               The BalanceLoss for Differentiable Binarization text detection
+               args:
+                   balance_loss (bool): whether balance loss or not, default is True
+                   main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
+                       'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
+                   negative_ratio (int|float): float, default is 3.
+                   return_origin (bool): whether return unbalanced loss or not, default is False.
+                   eps (float): default is 1e-6.
+               """
+        super(BalanceLoss, self).__init__()
+        self.balance_loss = balance_loss
+        self.main_loss_type = main_loss_type
+        self.negative_ratio = negative_ratio
+        self.main_loss_type = main_loss_type
+        self.return_origin = return_origin
+        self.eps = eps

-    positive_count = fluid.layers.reduce_sum(positive)
-    positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
-    negative_count = min(
-        fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
-    negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
+        if self.main_loss_type == "CrossEntropy":
+            self.loss = nn.CrossEntropyLoss()
+        elif self.main_loss_type == "Euclidean":
+            self.loss = nn.MSELoss()
+        elif self.main_loss_type == "DiceLoss":
+            self.loss = DiceLoss(self.eps)
+        elif self.main_loss_type == "BCELoss":
+            self.loss = BCELoss(reduction='none')
+        elif self.main_loss_type == "MaskL1Loss":
+            self.loss = MaskL1Loss(self.eps)
+        else:
+            loss_type = [
+                'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
+            ]
+            raise Exception(
+                "main_loss_type in BalanceLoss() can only be one of {}".format(
+                    loss_type))

-    if main_loss_type == "CrossEntropy":
-        loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
-        loss = fluid.layers.reduce_mean(loss)
-    elif main_loss_type == "Euclidean":
-        loss = fluid.layers.square(pred - gt)
-        loss = fluid.layers.reduce_mean(loss)
-    elif main_loss_type == "DiceLoss":
-        loss = DiceLoss(pred, gt, mask)
-    elif main_loss_type == "BCELoss":
-        loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
-    elif main_loss_type == "MaskL1Loss":
-        loss = MaskL1Loss(pred, gt, mask)
-    else:
-        loss_type = [
-            'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
-        ]
-        raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
-                        format(loss_type))
+    def forward(self, pred, gt, mask=None):
+        """
+        The BalanceLoss for Differentiable Binarization text detection
+        args:
+            pred (variable): predicted feature maps.
+            gt (variable): ground truth feature maps.
+            mask (variable): masked maps.
+        return: (variable) balanced loss
+        """
+        # if self.main_loss_type in ['DiceLoss']:
+        #     # For the loss that returns to scalar value, perform ohem on the mask
+        #     mask = ohem_batch(pred, gt, mask, self.negative_ratio)
+        #     loss = self.loss(pred, gt, mask)
+        #     return loss

-    if not balance_loss:
+        positive = gt * mask
+        negative = (1 - gt) * mask
+
+        positive_count = int(positive.sum())
+        negative_count = int(
+            min(negative.sum(), positive_count * self.negative_ratio))
+        loss = self.loss(pred, gt, mask=mask)
+
+        if not self.balance_loss:
+            return loss
+
+        positive_loss = positive * loss
+        negative_loss = negative * loss
+        negative_loss = paddle.reshape(negative_loss, shape=[-1])
+        if negative_count > 0:
+            sort_loss = negative_loss.sort(descending=True)
+            negative_loss = sort_loss[:negative_count]
+            # negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int)
+            balance_loss = (positive_loss.sum() + negative_loss.sum()) / (
+                positive_count + negative_count + self.eps)
+        else:
+            balance_loss = positive_loss.sum() / (positive_count + self.eps)
+        if self.return_origin:
+            return balance_loss, loss
+
+        return balance_loss
+
+
+class DiceLoss(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super(DiceLoss, self).__init__()
+        self.eps = eps
+
+    def forward(self, pred, gt, mask, weights=None):
+        """
+        DiceLoss function.
+        """
+
+        assert pred.shape == gt.shape
+        assert pred.shape == mask.shape
+        if weights is not None:
+            assert weights.shape == mask.shape
+            mask = weights * mask
+        intersection = paddle.sum(pred * gt * mask)
+
+        union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps
+        loss = 1 - 2.0 * intersection / union
+        assert loss <= 1
        return loss

-    positive_loss = positive * loss
-    negative_loss = negative * loss
-    negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
-    negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
-    balance_loss = (fluid.layers.reduce_sum(positive_loss) +
-                    fluid.layers.reduce_sum(negative_loss)) / (
-                        positive_count + negative_count + eps)

-    if return_origin:
-        return balance_loss, loss
-    return balance_loss
+class MaskL1Loss(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super(MaskL1Loss, self).__init__()
+        self.eps = eps
+
+    def forward(self, pred, gt, mask):
+        """
+        Mask L1 Loss
+        """
+        loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
+        loss = paddle.mean(loss)
+        return loss


-def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
-    """
-    DiceLoss function.
-    """
+class BCELoss(nn.Layer):
+    def __init__(self, reduction='mean'):
+        super(BCELoss, self).__init__()
+        self.reduction = reduction

-    assert pred.shape == gt.shape
-    assert pred.shape == mask.shape
-    if weights is not None:
-        assert weights.shape == mask.shape
-        mask = weights * mask
-    intersection = fluid.layers.reduce_sum(pred * gt * mask)
-
-    union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
-        gt * mask) + eps
-    loss = 1 - 2.0 * intersection / union
-    assert loss <= 1
-    return loss
+    def forward(self, input, label, mask=None, weight=None, name=None):
+        loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
+        return loss


-def MaskL1Loss(pred, gt, mask, eps=1e-6):
-    """
-    Mask L1 Loss
-    """
-    loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
-        fluid.layers.reduce_sum(mask) + eps)
-    loss = fluid.layers.reduce_mean(loss)
-    return loss
+def ohem_single(score, gt_text, training_mask, ohem_ratio):
+    pos_num = (int)(np.sum(gt_text > 0.5)) - (
+        int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5)))
+
+    if pos_num == 0:
+        # selected_mask = gt_text.copy() * 0 # may be not good
+        selected_mask = training_mask
+        selected_mask = selected_mask.reshape(
+            1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+        return selected_mask
+
+    neg_num = (int)(np.sum(gt_text <= 0.5))
+    neg_num = (int)(min(pos_num * ohem_ratio, neg_num))
+
+    if neg_num == 0:
+        selected_mask = training_mask
+        selected_mask = selected_mask.reshape(
+            1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+        return selected_mask
+
+    neg_score = score[gt_text <= 0.5]
+    # 将负样本得分从高到低排序
+    neg_score_sorted = np.sort(-neg_score)
+    threshold = -neg_score_sorted[neg_num - 1]
+    # 选出 得分高的 负样本 和正样本 的 mask
+    selected_mask = ((score >= threshold) |
+                     (gt_text > 0.5)) & (training_mask > 0.5)
+    selected_mask = selected_mask.reshape(
+        1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+    return selected_mask
+
+
+def ohem_batch(scores, gt_texts, training_masks, ohem_ratio):
+    scores = scores.numpy()
+    gt_texts = gt_texts.numpy()
+    training_masks = training_masks.numpy()
+
+    selected_masks = []
+    for i in range(scores.shape[0]):
+        selected_masks.append(
+            ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
+                i, :, :], ohem_ratio))
+
+    selected_masks = np.concatenate(selected_masks, 0)
+    selected_masks = paddle.to_variable(selected_masks)
+
+    return selected_masks
--- a/ppocr/modeling/losses/det_db_loss.py
+++ b/ppocr/modeling/losses/det_db_loss.py
@ -1,68 +1,71 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from paddle import nn
+
 from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss


-class DBLoss(object):
+class DBLoss(nn.Layer):
    """
    Differentiable Binarization (DB) Loss Function
    args:
        param (dict): the super paramter for DB Loss
    """

-    def __init__(self, params):
+    def __init__(self,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 **kwargs):
        super(DBLoss, self).__init__()
-        self.balance_loss = params['balance_loss']
-        self.main_loss_type = params['main_loss_type']
+        self.alpha = alpha
+        self.beta = beta
+        self.dice_loss = DiceLoss(eps=eps)
+        self.l1_loss = MaskL1Loss(eps=eps)
+        self.bce_loss = BalanceLoss(
+            balance_loss=balance_loss,
+            main_loss_type=main_loss_type,
+            negative_ratio=ohem_ratio)

-        self.alpha = params['alpha']
-        self.beta = params['beta']
-        self.ohem_ratio = params['ohem_ratio']
+    def forward(self, predicts, labels):
+        label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[
+            1:]
+        shrink_maps = predicts[:, 0, :, :]
+        threshold_maps = predicts[:, 1, :, :]
+        binary_maps = predicts[:, 2, :, :]

-    def __call__(self, predicts, labels):
-        label_shrink_map = labels['shrink_map']
-        label_shrink_mask = labels['shrink_mask']
-        label_threshold_map = labels['threshold_map']
-        label_threshold_mask = labels['threshold_mask']
-        pred = predicts['maps']
-        shrink_maps = pred[:, 0, :, :]
-        threshold_maps = pred[:, 1, :, :]
-        binary_maps = pred[:, 2, :, :]
-
-        loss_shrink_maps = BalanceLoss(
-            shrink_maps,
-            label_shrink_map,
-            label_shrink_mask,
-            balance_loss=self.balance_loss,
-            main_loss_type=self.main_loss_type,
-            negative_ratio=self.ohem_ratio)
-        loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
-                                         label_threshold_mask)
-        loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
-                                    label_shrink_mask)
+        loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map,
+                                         label_shrink_mask)
+        loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map,
+                                           label_threshold_mask)
+        loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map,
+                                          label_shrink_mask)
        loss_shrink_maps = self.alpha * loss_shrink_maps
        loss_threshold_maps = self.beta * loss_threshold_maps

-        loss_all = loss_shrink_maps + loss_threshold_maps\
-            + loss_binary_maps
-        losses = {'total_loss':loss_all,\
-            "loss_shrink_maps":loss_shrink_maps,\
-            "loss_threshold_maps":loss_threshold_maps,\
-            "loss_binary_maps":loss_binary_maps}
+        loss_all = loss_shrink_maps + loss_threshold_maps \
+                   + loss_binary_maps
+        losses = {'loss': loss_all, \
+                  "loss_shrink_maps": loss_shrink_maps, \
+                  "loss_threshold_maps": loss_threshold_maps, \
+                  "loss_binary_maps": loss_binary_maps}
        return losses
--- a/ppocr/modeling/losses/det_east_loss.py
+++ b/ppocr/modeling/losses/det_east_loss.py
@ -1,61 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-
-
-class EASTLoss(object):
-    """
-    EAST Loss function
-    """
-
-    def __init__(self, params=None):
-        super(EASTLoss, self).__init__()
-
-    def __call__(self, predicts, labels):
-        f_score = predicts['f_score']
-        f_geo = predicts['f_geo']
-        l_score = labels['score']
-        l_geo = labels['geo']
-        l_mask = labels['mask']
-        ##dice_loss
-        intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
-        union = fluid.layers.reduce_sum(f_score * l_mask)\
-            + fluid.layers.reduce_sum(l_score * l_mask)
-        dice_loss = 1 - 2 * intersection / (union + 1e-5)
-        #smoooth_l1_loss
-        channels = 8
-        l_geo_split = fluid.layers.split(
-            l_geo, num_or_sections=channels + 1, dim=1)
-        f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
-        smooth_l1 = 0
-        for i in range(0, channels):
-            geo_diff = l_geo_split[i] - f_geo_split[i]
-            abs_geo_diff = fluid.layers.abs(geo_diff)
-            smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
-            smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
-            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
-                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
-            out_loss = l_geo_split[-1] / channels * in_loss * l_score
-            smooth_l1 += out_loss
-        smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
-        dice_loss = dice_loss * 0.01
-        total_loss = dice_loss + smooth_l1_loss
-        losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
-            "smooth_l1_loss":smooth_l1_loss}
-        return losses
--- a/ppocr/modeling/losses/det_sast_loss.py
+++ b/ppocr/modeling/losses/det_sast_loss.py
@ -1,115 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-
-
-class SASTLoss(object):
-    """
-    SAST Loss function
-    """
-
-    def __init__(self, params=None):
-        super(SASTLoss, self).__init__()
-
-    def __call__(self, predicts, labels):
-        """
-        tcl_pos: N x 128 x 3
-        tcl_mask: N x 128 x 1
-        tcl_label: N x X list or LoDTensor
-        """
-                
-        f_score = predicts['f_score']
-        f_border = predicts['f_border']
-        f_tvo = predicts['f_tvo']
-        f_tco = predicts['f_tco']
-
-        l_score = labels['input_score']
-        l_border = labels['input_border']
-        l_mask = labels['input_mask']
-        l_tvo = labels['input_tvo']
-        l_tco = labels['input_tco']
-
-        #score_loss
-        intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
-        union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask)
-        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
-
-        #border loss
-        l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1)
-        f_border_split = f_border
-        l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1])
-        l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1])   
-        l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1])   
-        border_diff = l_border_split - f_border_split
-        abs_border_diff = fluid.layers.abs(border_diff) 
-        border_sign = abs_border_diff < 1.0
-        border_sign = fluid.layers.cast(border_sign, dtype='float32')
-        border_sign.stop_gradient = True
-        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
-                    (abs_border_diff - 0.5) * (1.0 - border_sign)
-        border_out_loss = l_border_norm_split * border_in_loss
-        border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \
-                    (fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5)
-
-        #tvo_loss
-        l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1)
-        f_tvo_split = f_tvo
-        l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1])
-        l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1])   
-        l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1])   
-        #
-        tvo_geo_diff = l_tvo_split - f_tvo_split
-        abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff) 
-        tvo_sign = abs_tvo_geo_diff < 1.0
-        tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32')
-        tvo_sign.stop_gradient = True
-        tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
-                    (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
-        tvo_out_loss = l_tvo_norm_split * tvo_in_loss
-        tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
-                    (fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5)
-
-        #tco_loss
-        l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1)
-        f_tco_split = f_tco
-        l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1])
-        l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1])   
-        l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1])   
-        #
-        tco_geo_diff = l_tco_split - f_tco_split
-        abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff) 
-        tco_sign = abs_tco_geo_diff < 1.0
-        tco_sign = fluid.layers.cast(tco_sign, dtype='float32')
-        tco_sign.stop_gradient = True
-        tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
-                    (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
-        tco_out_loss = l_tco_norm_split * tco_in_loss
-        tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \
-                    (fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5)
-
-
-        # total loss
-        tvo_lw, tco_lw = 1.5, 1.5
-        score_lw, border_lw = 1.0, 1.0
-        total_loss = score_loss * score_lw + border_loss * border_lw + \
-                    tvo_loss * tvo_lw + tco_loss * tco_lw
-                    
-        losses = {'total_loss':total_loss, "score_loss":score_loss,\
-            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
-        return losses
--- a/ppocr/modeling/losses/rec_attention_loss.py
+++ b/ppocr/modeling/losses/rec_attention_loss.py
@ -1,38 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-
-
-class AttentionLoss(object):
-    def __init__(self, params):
-        super(AttentionLoss, self).__init__()
-        self.char_num = params['char_num']
-
-    def __call__(self, predicts, labels):
-        predict = predicts['predict']
-        label_out = labels['label_out']
-        label_out = fluid.layers.cast(x=label_out, dtype='int64')
-        cost = fluid.layers.cross_entropy(input=predict, label=label_out)
-        sum_cost = fluid.layers.reduce_sum(cost)
-        return sum_cost
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
@ -1,36 +1,36 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import math
-
 import paddle
-import paddle.fluid as fluid
+from paddle import nn


-class CTCLoss(object):
-    def __init__(self, params):
+class CTCLoss(nn.Layer):
+    def __init__(self, **kwargs):
        super(CTCLoss, self).__init__()
-        self.char_num = params['char_num']
+        self.loss_func = nn.CTCLoss(blank=0, reduction='none')

-    def __call__(self, predicts, labels):
-        predict = predicts['predict']
-        label = labels['label']
-        cost = fluid.layers.warpctc(
-            input=predict, label=label, blank=self.char_num, norm_by_times=True)
-        sum_cost = fluid.layers.reduce_sum(cost)
-        return sum_cost
+    def __call__(self, predicts, batch):
+        predicts = predicts.transpose((1, 0, 2))
+        N, B, _ = predicts.shape
+        preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
+        labels = batch[1].astype("int32")
+        label_lengths = batch[2].astype('int64')
+        loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
+        loss = loss.mean()
+        return {'loss': loss}
--- a/ppocr/modeling/losses/rec_srn_loss.py
+++ b/ppocr/modeling/losses/rec_srn_loss.py
@ -1,55 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-
-
-class SRNLoss(object):
-    def __init__(self, params):
-        super(SRNLoss, self).__init__()
-        self.char_num = params['char_num']
-
-    def __call__(self, predicts, others):
-        predict = predicts['predict']
-        word_predict = predicts['word_out']
-        gsrm_predict = predicts['gsrm_out']
-        label = others['label']
-        lbl_weight = others['lbl_weight']
-
-        casted_label = fluid.layers.cast(x=label, dtype='int64')
-        cost_word = fluid.layers.cross_entropy(
-            input=word_predict, label=casted_label)
-        cost_gsrm = fluid.layers.cross_entropy(
-            input=gsrm_predict, label=casted_label)
-        cost_vsfd = fluid.layers.cross_entropy(
-            input=predict, label=casted_label)
-
-        cost_word = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_word), shape=[1])
-        cost_gsrm = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
-        cost_vsfd = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
-
-        sum_cost = fluid.layers.sum(
-            [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
-
-        return [sum_cost, cost_vsfd, cost_word]
--- a/ppocr/modeling/necks/init.py
+++ b/ppocr/modeling/necks/init.py
@ -11,3 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+__all__ = ['build_neck']
+
+
+def build_neck(config):
+    from .fpn import FPN
+    from .rnn import SequenceEncoder
+    support_dict = ['FPN', 'SequenceEncoder']
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception('neck only support {}'.format(
+        support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/necks/fpn.py
+++ b/ppocr/modeling/necks/fpn.py
@ -0,0 +1,113 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class FPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(FPN, self).__init__()
+        self.out_channels = out_channels
+        weight_attr = paddle.nn.initializer.MSRA(uniform=False)
+
+        self.in2_conv = nn.Conv2d(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_51.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2d(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_50.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2d(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_49.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2d(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_48.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_52.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_53.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_54.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_55.w_0', initializer=weight_attr),
+            bias_attr=False)
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+
+        out4 = in4 + F.resize_nearest(in5, scale=2)  # 1/16
+        out3 = in3 + F.resize_nearest(out4, scale=2)  # 1/8
+        out2 = in2 + F.resize_nearest(out3, scale=2)  # 1/4
+
+        p5 = self.p5_conv(in5)
+        p4 = self.p4_conv(out4)
+        p3 = self.p3_conv(out3)
+        p2 = self.p2_conv(out2)
+        p5 = F.resize_nearest(p5, scale=8)
+        p4 = F.resize_nearest(p4, scale=4)
+        p3 = F.resize_nearest(p3, scale=2)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
@ -0,0 +1,143 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+
+from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
+
+
+class EncoderWithReshape(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.reshape((B, C, -1))
+        x = x.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        return x
+
+
+class Im2Seq(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == 1
+        x = x.transpose((0, 2, 3, 1))
+        x = x.reshape((-1, C))
+        return x
+
+
+class EncoderWithRNN(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN, self).__init__()
+        self.out_channels = hidden_size * 2
+        # self.lstm1_fw = nn.LSTMCell(
+        #     in_channels,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
+        # )
+        # self.lstm1_bw = nn.LSTMCell(
+        #     in_channels,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
+        # )
+        # self.lstm2_fw = nn.LSTMCell(
+        #     hidden_size,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
+        # )
+        # self.lstm2_bw = nn.LSTMCell(
+        #     hidden_size,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
+        # )
+        self.lstm = nn.LSTM(
+            in_channels, hidden_size, direction='bidirectional', num_layers=2)
+
+    def forward(self, x):
+        # fw_x, _ = self.lstm1_fw(x)
+        # fw_x, _ = self.lstm2_fw(fw_x)
+        #
+        # # bw
+        # bw_x, _ = self.lstm1_bw(x)
+        # bw_x, _ = self.lstm2_bw(bw_x)
+        # x = paddle.concat([fw_x, bw_x], axis=2)
+        x, _ = self.lstm(x)
+        return x
+
+
+class EncoderWithFC(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithFC, self).__init__()
+        self.out_channels = hidden_size
+        weight_attr, bias_attr = get_para_bias_attr(
+            l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
+        self.fc = nn.Linear(
+            in_channels,
+            hidden_size,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            name='reduce_encoder_fea')
+
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+
+
+class SequenceEncoder(nn.Layer):
+    def __init__(self, in_channels, encoder_type, hidden_size, **kwargs):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_reshape = EncoderWithReshape(in_channels)
+        self.out_channels = self.encoder_reshape.out_channels
+        if encoder_type == 'reshape':
+            self.only_reshape = True
+        else:
+            support_encoder_dict = {
+                'reshape': EncoderWithReshape,
+                'fc': EncoderWithFC,
+                'rnn': EncoderWithRNN
+            }
+            assert encoder_type in support_encoder_dict, '{} must in {}'.format(
+                encoder_type, support_encoder_dict.keys())
+
+            self.encoder = support_encoder_dict[encoder_type](
+                self.encoder_reshape.out_channels, hidden_size)
+            self.out_channels = self.encoder.out_channels
+            self.only_reshape = False
+
+    def forward(self, x):
+        x = self.encoder_reshape(x)
+        if not self.only_reshape:
+            x = self.encoder(x)
+        return x
--- a/ppocr/modeling/stns/tps.py
+++ b/ppocr/modeling/stns/tps.py
@ -1,261 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-
-
-class LocalizationNetwork(object):
-    def __init__(self, params):
-        super(LocalizationNetwork, self).__init__()
-        self.F = params['num_fiducial']
-        self.loc_lr = params['loc_lr']
-        self.model_name = params['model_name']
-
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        bn_name = "bn_" + name
-        return layers.batch_norm(
-            input=conv,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-
-    def get_initial_fiducials(self):
-        """ see RARE paper Fig. 6 (a) """
-        F = self.F
-        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
-        ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
-        ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
-        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
-        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
-        initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
-        return initial_bias
-
-    def __call__(self, image):
-        F = self.F
-        loc_lr = self.loc_lr
-        if self.model_name == "large":
-            num_filters_list = [64, 128, 256, 512]
-            fc_dim = 256
-        else:
-            num_filters_list = [16, 32, 64, 128]
-            fc_dim = 64
-        for fno in range(len(num_filters_list)):
-            num_filters = num_filters_list[fno]
-            name = "loc_conv%d" % fno
-            if fno == 0:
-                conv = self.conv_bn_layer(
-                    image, num_filters, 3, act='relu', name=name)
-            else:
-                conv = self.conv_bn_layer(
-                    pool, num_filters, 3, act='relu', name=name)
-
-            if fno == len(num_filters_list) - 1:
-                pool = layers.adaptive_pool2d(
-                    input=conv, pool_size=[1, 1], pool_type='avg')
-            else:
-                pool = layers.pool2d(
-                    input=conv,
-                    pool_size=2,
-                    pool_stride=2,
-                    pool_padding=0,
-                    pool_type='max')
-        name = "loc_fc1"
-        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-        fc1 = layers.fc(input=pool,
-                        size=fc_dim,
-                        param_attr=fluid.param_attr.ParamAttr(
-                            learning_rate=loc_lr,
-                            initializer=fluid.initializer.Uniform(-stdv, stdv),
-                            name=name + "_w"),
-                        act='relu',
-                        name=name)
-
-        initial_bias = self.get_initial_fiducials()
-        initial_bias = initial_bias.reshape(-1)
-        name = "loc_fc2"
-        param_attr = fluid.param_attr.ParamAttr(
-            learning_rate=loc_lr,
-            initializer=fluid.initializer.NumpyArrayInitializer(
-                np.zeros([fc_dim, F * 2])),
-            name=name + "_w")
-        bias_attr = fluid.param_attr.ParamAttr(
-            learning_rate=loc_lr,
-            initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
-            name=name + "_b")
-        fc2 = layers.fc(input=fc1,
-                        size=F * 2,
-                        param_attr=param_attr,
-                        bias_attr=bias_attr,
-                        name=name)
-        batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
-        return batch_C_prime
-
-
-class GridGenerator(object):
-    def __init__(self, params):
-        super(GridGenerator, self).__init__()
-        self.eps = 1e-6
-        self.F = params['num_fiducial']
-
-    def build_C(self):
-        """ Return coordinates of fiducial points in I_r; C """
-        F = self.F
-        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
-        ctrl_pts_y_top = -1 * np.ones(int(F / 2))
-        ctrl_pts_y_bottom = np.ones(int(F / 2))
-        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
-        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
-        C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
-        return C  # F x 2
-
-    def build_P(self, I_r_size):
-        I_r_width, I_r_height = I_r_size
-        I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
-            / I_r_width  # self.I_r_width
-        I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
-            / I_r_height  # self.I_r_height
-        # P: self.I_r_width x self.I_r_height x 2
-        P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
-        # n (= self.I_r_width x self.I_r_height) x 2
-        return P.reshape([-1, 2])
-
-    def build_inv_delta_C(self, C):
-        """ Return inv_delta_C which is needed to calculate T """
-        F = self.F
-        hat_C = np.zeros((F, F), dtype=float)  # F x F
-        for i in range(0, F):
-            for j in range(i, F):
-                r = np.linalg.norm(C[i] - C[j])
-                hat_C[i, j] = r
-                hat_C[j, i] = r
-        np.fill_diagonal(hat_C, 1)
-        hat_C = (hat_C**2) * np.log(hat_C)
-        # print(C.shape, hat_C.shape)
-        delta_C = np.concatenate(  # F+3 x F+3
-            [
-                np.concatenate(
-                    [np.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
-                np.concatenate(
-                    [np.zeros((2, 3)), np.transpose(C)], axis=1),  # 2 x F+3
-                np.concatenate(
-                    [np.zeros((1, 3)), np.ones((1, F))], axis=1)  # 1 x F+3
-            ],
-            axis=0)
-        inv_delta_C = np.linalg.inv(delta_C)
-        return inv_delta_C  # F+3 x F+3
-
-    def build_P_hat(self, C, P):
-        F = self.F
-        eps = self.eps
-        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
-        #P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
-        P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
-        C_tile = np.expand_dims(C, axis=0)  # 1 x F x 2
-        P_diff = P_tile - C_tile  # n x F x 2
-        #rbf_norm: n x F
-        rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
-        #rbf: n x F
-        rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
-        P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
-        return P_hat  # n x F+3
-
-    def get_expand_tensor(self, batch_C_prime):
-        name = "ex_fc"
-        initializer = fluid.initializer.ConstantInitializer(value=0.0)
-        param_attr = fluid.param_attr.ParamAttr(
-            learning_rate=0.0, initializer=initializer, name=name + "_w")
-        bias_attr = fluid.param_attr.ParamAttr(
-            learning_rate=0.0, initializer=initializer, name=name + "_b")
-        batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
-                                                 size=6,
-                                                 param_attr=param_attr,
-                                                 bias_attr=bias_attr,
-                                                 name=name)
-        batch_C_ex_part_tensor = fluid.layers.reshape(
-            x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
-        return batch_C_ex_part_tensor
-
-    def __call__(self, batch_C_prime, I_r_size):
-        C = self.build_C()
-        P = self.build_P(I_r_size)
-        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
-        P_hat = self.build_P_hat(C, P).astype('float32')
-
-        inv_delta_C_tensor = layers.create_tensor(dtype='float32')
-        layers.assign(inv_delta_C, inv_delta_C_tensor)
-        inv_delta_C_tensor.stop_gradient = True
-        P_hat_tensor = layers.create_tensor(dtype='float32')
-        layers.assign(P_hat, P_hat_tensor)
-        P_hat_tensor.stop_gradient = True
-
-        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
-        #         batch_C_ex_part_tensor = create_tmp_var(
-        #             fluid.default_main_program(),
-        #             name='batch_C_ex_part_tensor', 
-        #             dtype='float32', shape=[-1, 3, 2])
-        #         layers.py_func(func=get_batch_C_expand, 
-        #             x=[batch_C_prime], out=[batch_C_ex_part_tensor])
-
-        batch_C_ex_part_tensor.stop_gradient = True
-
-        batch_C_prime_with_zeros = layers.concat(
-            [batch_C_prime, batch_C_ex_part_tensor], axis=1)
-        batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
-        batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
-        return batch_P_prime
-
-
-class TPS(object):
-    def __init__(self, params):
-        super(TPS, self).__init__()
-        self.loc_net = LocalizationNetwork(params)
-        self.grid_generator = GridGenerator(params)
-
-    def __call__(self, image):
-        batch_C_prime = self.loc_net(image)
-        I_r_size = [image.shape[3], image.shape[2]]
-        batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
-        batch_P_prime = layers.reshape(
-            x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
-        batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
-        image.stop_gradient = False
-        return batch_I_r
--- a/ppocr/modeling/transform/init.py
+++ b/ppocr/modeling/transform/init.py
@ -11,3 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+__all__ = ['build_transform']
+
+
+def build_transform(config):
+    support_dict = ['']
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'transform only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
@ -1,155 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle.fluid as fluid
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
-import paddle.fluid.layers.ops as ops
-
-from ppocr.utils.utility import initial_logger
-
-logger = initial_logger()
-
-
-def cosine_decay_with_warmup(learning_rate,
-                             step_each_epoch,
-                             epochs=500,
-                             warmup_minibatch=1000):
-    """Applies cosine decay to the learning rate.
-    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
-    decrease lr for every mini-batch and start with warmup.
-    """
-    global_step = _decay_step_counter()
-    lr = fluid.layers.tensor.create_global_var(
-        shape=[1],
-        value=0.0,
-        dtype='float32',
-        persistable=True,
-        name="learning_rate")
-
-    warmup_minibatch = fluid.layers.fill_constant(
-        shape=[1],
-        dtype='float32',
-        value=float(warmup_minibatch),
-        force_cpu=True)
-
-    with fluid.layers.control_flow.Switch() as switch:
-        with switch.case(global_step < warmup_minibatch):
-            decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-        with switch.default():
-            decayed_lr = learning_rate * \
-                (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-    return lr
-
-
-def AdamDecay(params, parameter_list=None):
-    """
-    define optimizer function
-    args:
-        params(dict): the super parameters
-        parameter_list (list): list of Variable names to update to minimize loss
-    return:
-    """
-    base_lr = params['base_lr']
-    beta1 = params['beta1']
-    beta2 = params['beta2']
-    l2_decay = params.get("l2_decay", 0.0)
-
-    if 'decay' in params:
-        supported_decay_mode = [
-            "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
-        ]
-        params = params['decay']
-        decay_mode = params['function']
-        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
-            supported_decay_mode, decay_mode)
-
-        if decay_mode == "cosine_decay":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            base_lr = fluid.layers.cosine_decay(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch)
-        elif decay_mode == "cosine_decay_warmup":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            warmup_minibatch = params.get("warmup_minibatch", 1000)
-            base_lr = cosine_decay_with_warmup(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch,
-                warmup_minibatch=warmup_minibatch)
-        elif decay_mode == "piecewise_decay":
-            boundaries = params["boundaries"]
-            decay_rate = params["decay_rate"]
-            values = [
-                base_lr * decay_rate**idx
-                for idx in range(len(boundaries) + 1)
-            ]
-            base_lr = fluid.layers.piecewise_decay(boundaries, values)
-
-    optimizer = fluid.optimizer.Adam(
-        learning_rate=base_lr,
-        beta1=beta1,
-        beta2=beta2,
-        regularization=L2Decay(regularization_coeff=l2_decay),
-        parameter_list=parameter_list)
-    return optimizer
-
-
-def RMSProp(params, parameter_list=None):
-    """
-    define optimizer function
-    args:
-        params(dict): the super parameters
-        parameter_list (list): list of Variable names to update to minimize loss
-    return:
-    """
-    base_lr = params.get("base_lr", 0.001)
-    l2_decay = params.get("l2_decay", 0.00005)
-
-    if 'decay' in params:
-        supported_decay_mode = ["cosine_decay", "piecewise_decay"]
-        params = params['decay']
-        decay_mode = params['function']
-        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
-            supported_decay_mode, decay_mode)
-
-        if decay_mode == "cosine_decay":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            base_lr = fluid.layers.cosine_decay(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch)
-        elif decay_mode == "piecewise_decay":
-            boundaries = params["boundaries"]
-            decay_rate = params["decay_rate"]
-            values = [
-                base_lr * decay_rate**idx
-                for idx in range(len(boundaries) + 1)
-            ]
-            base_lr = fluid.layers.piecewise_decay(boundaries, values)
-
-    optimizer = fluid.optimizer.RMSProp(
-        learning_rate=base_lr,
-        regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
-
-    return optimizer
--- a/ppocr/optimizer/init.py
+++ b/ppocr/optimizer/init.py
@ -0,0 +1,56 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_optimizer']
+
+
+def build_lr_scheduler(lr_config, epochs, step_each_epoch):
+    from . import learning_rate
+    lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
+    if 'name' in lr_config:
+        lr_name = lr_config.pop('name')
+        lr = getattr(learning_rate, lr_name)(**lr_config)()
+    else:
+        lr = lr_config['lr']
+    return lr
+
+
+def build_optimizer(config, epochs, step_each_epoch, parameters):
+    from . import regularizer, optimizer
+    config = copy.deepcopy(config)
+    # step1 build lr
+    lr = build_lr_scheduler(
+        config.pop('learning_rate'), epochs, step_each_epoch)
+
+    # step2 build regularization
+    if 'regularizer' in config and config['regularizer'] is not None:
+        reg_config = config.pop('regularizer')
+        reg_name = reg_config.pop('name') + 'Decay'
+        reg = getattr(regularizer, reg_name)(**reg_config)()
+    else:
+        reg = None
+
+    # step3 build optimizer
+    optim_name = config.pop('name')
+    optim = getattr(optimizer, optim_name)(learning_rate=lr,
+                                           regularization=reg,
+                                           **config)
+    return optim(parameters), lr
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@ -0,0 +1,183 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle.optimizer import lr_scheduler
+
+
+class Linear(object):
+    """
+    Linear learning rate decay
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        epochs(int): The decay step size. It determines the decay cycle.
+        end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+        power(float, optional): Power of polynomial. Default: 1.0.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 lr,
+                 epochs,
+                 step_each_epoch,
+                 end_lr=0.0,
+                 power=1.0,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Linear, self).__init__()
+        self.lr = lr
+        self.epochs = epochs * step_each_epoch
+        self.end_lr = end_lr
+        self.power = power
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+
+    def __call__(self):
+        learning_rate = lr_scheduler.PolynomialLR(
+            learning_rate=self.lr,
+            decay_steps=self.epochs,
+            end_lr=self.end_lr,
+            power=self.power,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Cosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 lr,
+                 step_each_epoch,
+                 epochs,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Cosine, self).__init__()
+        self.lr = lr
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+
+    def __call__(self):
+        learning_rate = lr_scheduler.CosineAnnealingLR(
+            learning_rate=self.lr, T_max=self.T_max, last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Step(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        step_each_epoch(int): steps each epoch
+        learning_rate (float): The initial learning rate. It is a python float number.
+        step_size (int): the interval to update.
+        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+            It should be less than 1.0. Default: 0.1.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 lr,
+                 step_size,
+                 step_each_epoch,
+                 gamma,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Step, self).__init__()
+        self.step_size = step_each_epoch * step_size
+        self.lr = lr
+        self.gamma = gamma
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+
+    def __call__(self):
+        learning_rate = lr_scheduler.StepLR(
+            learning_rate=self.lr,
+            step_size=self.step_size,
+            gamma=self.gamma,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+
+
+class Piecewise(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        boundaries(list): A list of steps numbers. The type of element in the list is python int.
+        values(list): A list of learning rate values that will be picked during different epoch boundaries.
+            The type of element in the list is python float.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 step_each_epoch,
+                 decay_epochs,
+                 values,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Piecewise, self).__init__()
+        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.values = values
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+
+    def __call__(self):
+        learning_rate = lr_scheduler.PiecewiseLR(
+            boundaries=self.boundaries,
+            values=self.values,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.values[0],
+                last_epoch=self.last_epoch)
+        return learning_rate
--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
@ -0,0 +1,119 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import optimizer as optim
+
+
+class Momentum(object):
+    """
+    Simple Momentum optimizer with velocity state.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(self, learning_rate, momentum, weight_decay=None, **args):
+        super(Momentum, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+
+    def __call__(self, parameters):
+        opt = optim.Momentum(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            parameters=self.weight_decay,
+            weight_decay=parameters)
+        return opt
+
+
+class Adam(object):
+    def __init__(self,
+                 learning_rate=0.001,
+                 beta1=0.9,
+                 beta2=0.999,
+                 epsilon=1e-08,
+                 parameter_list=None,
+                 weight_decay=None,
+                 grad_clip=None,
+                 name=None,
+                 lazy_mode=False,
+                 **kwargs):
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+
+    def __call__(self, parameters):
+        opt = optim.Adam(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            parameters=parameters)
+        return opt
+
+
+class RMSProp(object):
+    """
+    Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        rho (float) - rho value in equation.
+        epsilon (float) - avoid division by zero, default is 1e-6.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 momentum,
+                 rho=0.95,
+                 epsilon=1e-6,
+                 weight_decay=None,
+                 **args):
+        super(RMSProp, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.rho = rho
+        self.epsilon = epsilon
+        self.weight_decay = weight_decay
+
+    def __call__(self, parameters):
+        opt = optim.RMSProp(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            rho=self.rho,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            parameters=parameters)
+        return opt
--- a/ppocr/optimizer/regularizer.py
+++ b/ppocr/optimizer/regularizer.py
@ -0,0 +1,54 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import fluid
+
+
+class L1Decay(object):
+    """
+    L1 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L1Decay, self).__init__()
+        self.regularization_coeff = factor
+
+    def __call__(self):
+        reg = fluid.regularizer.L1Decay(
+            regularization_coeff=self.regularization_coeff)
+        return reg
+
+
+class L2Decay(object):
+    """
+    L2 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+
+    def __init__(self, factor=0.0):
+        super(L2Decay, self).__init__()
+        self.regularization_coeff = factor
+
+    def __call__(self):
+        reg = fluid.regularizer.L2Decay(
+            regularization_coeff=self.regularization_coeff)
+        return reg
--- a/ppocr/postprocess/init.py
+++ b/ppocr/postprocess/init.py
@ -0,0 +1,38 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_post_process']
+
+
+def build_post_process(config, global_config=None):
+    from .db_postprocess import DBPostProcess
+
+    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
+    support_dict = ['DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode']
+
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    if global_config is not None:
+        config.update(global_config)
+    assert module_name in support_dict, Exception(
+        'post process only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@ -16,11 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import paddle
-import paddle.fluid as fluid
-
 import numpy as np
-import string
 import cv2
 from shapely.geometry import Polygon
 import pyclipper
@ -31,11 +27,16 @@ class DBPostProcess(object):
    The post process for Differentiable Binarization (DB).
    """

-    def __init__(self, params):
-        self.thresh = params['thresh']
-        self.box_thresh = params['box_thresh']
-        self.max_candidates = params['max_candidates']
-        self.unclip_ratio = params['unclip_ratio']
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=2.0,
+                 **kwargs):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
        self.min_size = 3

    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
@ -55,9 +56,9 @@ class DBPostProcess(object):
            contours, _ = outs[0], outs[1]

        num_contours = min(len(contours), self.max_candidates)
-        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
-        scores = np.zeros((num_contours, ), dtype=np.float32)

+        boxes = []
+        scores = []
        for index in range(num_contours):
            contour = contours[index]
            points, sside = self.get_mini_boxes(contour)
@ -73,17 +74,14 @@ class DBPostProcess(object):
            if sside < self.min_size + 2:
                continue
            box = np.array(box)
-            if not isinstance(dest_width, int):
-                dest_width = dest_width.item()
-                dest_height = dest_height.item()

            box[:, 0] = np.clip(
                np.round(box[:, 0] / width * dest_width), 0, dest_width)
            box[:, 1] = np.clip(
                np.round(box[:, 1] / height * dest_height), 0, dest_height)
-            boxes[index, :, :] = box.astype(np.int16)
-            scores[index] = score
-        return boxes, scores
+            boxes.append(box.astype(np.int16))
+            scores.append(score)
+        return np.array(boxes, dtype=np.int16), scores

    def unclip(self, box):
        unclip_ratio = self.unclip_ratio
@ -131,28 +129,15 @@ class DBPostProcess(object):
        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]

-    def __call__(self, outs_dict, ratio_list):
-        pred = outs_dict['maps']
-
-        pred = pred[:, 0, :, :]
+    def __call__(self, pred, shape_list):
+        pred = pred.numpy()[:, 0, :, :]
        segmentation = pred > self.thresh

        boxes_batch = []
        for batch_index in range(pred.shape[0]):
-            height, width = pred.shape[-2:]
-            tmp_boxes, tmp_scores = self.boxes_from_bitmap(
+            height, width = shape_list[batch_index]
+            boxes, scores = self.boxes_from_bitmap(
                pred[batch_index], segmentation[batch_index], width, height)

-            boxes = []
-            for k in range(len(tmp_boxes)):
-                if tmp_scores[k] > self.box_thresh:
-                    boxes.append(tmp_boxes[k])
-            if len(boxes) > 0:
-                boxes = np.array(boxes)
-
-                ratio_h, ratio_w = ratio_list[batch_index]
-                boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
-                boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
-
-            boxes_batch.append(boxes)
+            boxes_batch.append({'points': boxes})
        return boxes_batch
--- a/ppocr/postprocess/db_postprocess_torch.py
+++ b/ppocr/postprocess/db_postprocess_torch.py
@ -0,0 +1,133 @@
+import cv2
+import numpy as np
+import pyclipper
+from shapely.geometry import Polygon
+
+
+class DBPostProcess():
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=1.5):
+        self.min_size = 3
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+
+    def __call__(self, pred, shape_list, is_output_polygon=False):
+        '''
+        batch: (image, polygons, ignore_tags
+        h_w_list: 包含[h,w]的数组
+        pred:
+            binary: text region segmentation map, with shape (N, 1,H, W)
+        '''
+        pred = pred.numpy()[:, 0, :, :]
+        segmentation = self.binarize(pred)
+        batch_out = []
+        for batch_index in range(pred.shape[0]):
+            height, width = shape_list[batch_index]
+            boxes, scores = self.post_p(
+                pred[batch_index],
+                segmentation[batch_index],
+                width,
+                height,
+                is_output_polygon=is_output_polygon)
+            batch_out.append({"points": boxes})
+        return batch_out
+
+    def binarize(self, pred):
+        return pred > self.thresh
+
+    def post_p(self,
+               pred,
+               bitmap,
+               dest_width,
+               dest_height,
+               is_output_polygon=True):
+        '''
+        _bitmap: single map with shape (H, W),
+            whose values are binarized as {0, 1}
+        '''
+        height, width = pred.shape
+        boxes = []
+        new_scores = []
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        for contour in contours[:self.max_candidates]:
+            epsilon = 0.005 * cv2.arcLength(contour, True)
+            approx = cv2.approxPolyDP(contour, epsilon, True)
+            points = approx.reshape((-1, 2))
+            if points.shape[0] < 4:
+                continue
+            score = self.box_score_fast(pred, points.reshape(-1, 2))
+            if self.box_thresh > score:
+                continue
+
+            if points.shape[0] > 2:
+                box = self.unclip(points, unclip_ratio=self.unclip_ratio)
+                if len(box) > 1 or len(box) == 0:
+                    continue
+            else:
+                continue
+            four_point_box, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
+            if sside < self.min_size + 2:
+                continue
+
+            if not is_output_polygon:
+                box = np.array(four_point_box)
+            else:
+                box = box.reshape(-1, 2)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box)
+            new_scores.append(score)
+        return boxes, new_scores
+
+    def unclip(self, box, unclip_ratio=1.5):
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@ -1,136 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from .locality_aware_nms import nms_locality
-import cv2
-
-import os
-import sys
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
-
-
-class EASTPostPocess(object):
-    """
-    The post process for EAST.
-    """
-
-    def __init__(self, params):
-        self.score_thresh = params['score_thresh']
-        self.cover_thresh = params['cover_thresh']
-        self.nms_thresh = params['nms_thresh']
-        
-        # c++ la-nms is faster, but only support python 3.5
-        self.is_python35 = False
-        if sys.version_info.major == 3 and sys.version_info.minor == 5:
-            self.is_python35 = True
-
-    def restore_rectangle_quad(self, origin, geometry):
-        """
-        Restore rectangle from quadrangle.
-        """
-        # quad
-        origin_concat = np.concatenate(
-            (origin, origin, origin, origin), axis=1)  # (n, 8)
-        pred_quads = origin_concat - geometry
-        pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
-        return pred_quads
-
-    def detect(self,
-               score_map,
-               geo_map,
-               score_thresh=0.8,
-               cover_thresh=0.1,
-               nms_thresh=0.2):
-        """
-        restore text boxes from score map and geo map
-        """
-        score_map = score_map[0]
-        geo_map = np.swapaxes(geo_map, 1, 0)
-        geo_map = np.swapaxes(geo_map, 1, 2)
-        # filter the score map
-        xy_text = np.argwhere(score_map > score_thresh)
-        if len(xy_text) == 0:
-            return []
-        # sort the text boxes via the y axis
-        xy_text = xy_text[np.argsort(xy_text[:, 0])]
-        #restore quad proposals
-        text_box_restored = self.restore_rectangle_quad(
-            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
-        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
-        boxes[:, :8] = text_box_restored.reshape((-1, 8))
-        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
-        if self.is_python35:
-            import lanms
-            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
-        else:
-            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
-        if boxes.shape[0] == 0:
-            return []
-        # Here we filter some low score boxes by the average score map, 
-        #   this is different from the orginal paper.
-        for i, box in enumerate(boxes):
-            mask = np.zeros_like(score_map, dtype=np.uint8)
-            cv2.fillPoly(mask, box[:8].reshape(
-                (-1, 4, 2)).astype(np.int32) // 4, 1)
-            boxes[i, 8] = cv2.mean(score_map, mask)[0]
-        boxes = boxes[boxes[:, 8] > cover_thresh]
-        return boxes
-
-    def sort_poly(self, p):
-        """
-        Sort polygons.
-        """
-        min_axis = np.argmin(np.sum(p, axis=1))
-        p = p[[min_axis, (min_axis + 1) % 4,\
-            (min_axis + 2) % 4, (min_axis + 3) % 4]]
-        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
-            return p
-        else:
-            return p[[0, 3, 2, 1]]
-
-    def __call__(self, outs_dict, ratio_list):
-        score_list = outs_dict['f_score']
-        geo_list = outs_dict['f_geo']
-        img_num = len(ratio_list)
-        dt_boxes_list = []
-        for ino in range(img_num):
-            score = score_list[ino]
-            geo = geo_list[ino]
-            boxes = self.detect(
-                score_map=score,
-                geo_map=geo,
-                score_thresh=self.score_thresh,
-                cover_thresh=self.cover_thresh,
-                nms_thresh=self.nms_thresh)
-            boxes_norm = []
-            if len(boxes) > 0:
-                ratio_h, ratio_w = ratio_list[ino]
-                boxes = boxes[:, :8].reshape((-1, 4, 2))
-                boxes[:, :, 0] /= ratio_w
-                boxes[:, :, 1] /= ratio_h
-                for i_box, box in enumerate(boxes):
-                    box = self.sort_poly(box.astype(np.int32))
-                    if np.linalg.norm(box[0] - box[1]) < 5 \
-                        or np.linalg.norm(box[3] - box[0]) < 5:
-                        continue
-                    boxes_norm.append(box)
-            dt_boxes_list.append(np.array(boxes_norm))
-        return dt_boxes_list
--- a/ppocr/postprocess/lanms/.gitignore
+++ b/ppocr/postprocess/lanms/.gitignore
@ -1 +0,0 @@
-adaptor.so
--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ b/ppocr/postprocess/lanms/.ycm_extra_conf.py
@ -1,140 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2014  Google Inc.
-#
-# This file is part of YouCompleteMe.
-#
-# YouCompleteMe is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# YouCompleteMe is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with YouCompleteMe.  If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import sys
-import glob
-import ycm_core
-
-# These are the compilation flags that will be used in case there's no
-# compilation database set (by default, one is not set).
-# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-
-
-BASE_DIR = os.path.dirname(os.path.realpath(__file__))
-
-from plumbum.cmd import python_config
-
-
-flags = [
-    '-Wall',
-    '-Wextra',
-    '-Wnon-virtual-dtor',
-    '-Winvalid-pch',
-    '-Wno-unused-local-typedefs',
-    '-std=c++11',
-    '-x', 'c++',
-    '-Iinclude',
-] + python_config('--cflags').split()
-
-
-# Set this to the absolute path to the folder (NOT the file!) containing the
-# compile_commands.json file to use that instead of 'flags'. See here for
-# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
-#
-# Most projects will NOT need to set this to anything; you can just change the
-# 'flags' list of compilation flags.
-compilation_database_folder = ''
-
-if os.path.exists( compilation_database_folder ):
-  database = ycm_core.CompilationDatabase( compilation_database_folder )
-else:
-  database = None
-
-SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
-
-def DirectoryOfThisScript():
-  return os.path.dirname( os.path.abspath( __file__ ) )
-
-
-def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
-  if not working_directory:
-    return list( flags )
-  new_flags = []
-  make_next_absolute = False
-  path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
-  for flag in flags:
-    new_flag = flag
-
-    if make_next_absolute:
-      make_next_absolute = False
-      if not flag.startswith( '/' ):
-        new_flag = os.path.join( working_directory, flag )
-
-    for path_flag in path_flags:
-      if flag == path_flag:
-        make_next_absolute = True
-        break
-
-      if flag.startswith( path_flag ):
-        path = flag[ len( path_flag ): ]
-        new_flag = path_flag + os.path.join( working_directory, path )
-        break
-
-    if new_flag:
-      new_flags.append( new_flag )
-  return new_flags
-
-
-def IsHeaderFile( filename ):
-  extension = os.path.splitext( filename )[ 1 ]
-  return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
-
-
-def GetCompilationInfoForFile( filename ):
-  # The compilation_commands.json file generated by CMake does not have entries
-  # for header files. So we do our best by asking the db for flags for a
-  # corresponding source file, if any. If one exists, the flags for that file
-  # should be good enough.
-  if IsHeaderFile( filename ):
-    basename = os.path.splitext( filename )[ 0 ]
-    for extension in SOURCE_EXTENSIONS:
-      replacement_file = basename + extension
-      if os.path.exists( replacement_file ):
-        compilation_info = database.GetCompilationInfoForFile(
-          replacement_file )
-        if compilation_info.compiler_flags_:
-          return compilation_info
-    return None
-  return database.GetCompilationInfoForFile( filename )
-
-
-# This is the entry point; this function is called by ycmd to produce flags for
-# a file.
-def FlagsForFile( filename, **kwargs ):
-  if database:
-    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
-    # python list, but a "list-like" StringVec object
-    compilation_info = GetCompilationInfoForFile( filename )
-    if not compilation_info:
-      return None
-
-    final_flags = MakeRelativePathsInFlagsAbsolute(
-      compilation_info.compiler_flags_,
-      compilation_info.compiler_working_dir_ )
-  else:
-    relative_to = DirectoryOfThisScript()
-    final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
-
-  return {
-    'flags': final_flags,
-    'do_cache': True
-  }
-
--- a/Show More
+++ b/Show More