Merge pull request #3866 from LDOUBLEV/lock_seed

Lock seed
2021-09-07 15:01:22 +08:00 · 2021-09-07 15:01:22 +08:00 · 0795697d25
parent 727febd4e9 5aa1ffccd0
commit 0795697d25
7 changed files with 292 additions and 15 deletions
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@ -128,4 +128,4 @@ Eval:
    drop_last: False
    batch_size_per_card: 1 # must be 1
    num_workers: 8
-    use_shared_memory: False
+    use_shared_memory: False
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
@ -98,7 +98,7 @@ Train:
    shuffle: True
    drop_last: False
    batch_size_per_card: 16
-    num_workers: 8
+    num_workers: 4

 Eval:
  dataset:
@ -125,4 +125,4 @@ Eval:
    shuffle: False
    drop_last: False
    batch_size_per_card: 1 # must be 1
-    num_workers: 8
+    num_workers: 8
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@ -113,7 +113,7 @@ class NormalizeImage(object):
        assert isinstance(img,
                          np.ndarray), "invalid input 'img' in NormalizeImage"
        data['image'] = (
-                                img.astype('float32') * self.scale - self.mean) / self.std
+            img.astype('float32') * self.scale - self.mean) / self.std
        return data


@ -144,6 +144,34 @@ class KeepKeys(object):
        return data_list


+class Resize(object):
+    def __init__(self, size=(640, 640), **kwargs):
+        self.size = size
+
+    def resize_image(self, img):
+        resize_h, resize_w = self.size
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, [ratio_h, ratio_w]
+
+    def __call__(self, data):
+        img = data['image']
+        text_polys = data['polys']
+
+        img_resize, [ratio_h, ratio_w] = self.resize_image(img)
+        new_boxes = []
+        for box in text_polys:
+            new_box = []
+            for cord in box:
+                new_box.append([cord[0] * ratio_w, cord[1] * ratio_h])
+            new_boxes.append(new_box)
+        data['image'] = img_resize
+        data['polys'] = np.array(new_boxes, dtype=np.float32)
+        return data
+
+
 class DetResizeForTest(object):
    def __init__(self, **kwargs):
        super(DetResizeForTest, self).__init__()
@ -215,7 +243,7 @@ class DetResizeForTest(object):
            else:
                ratio = 1.
        elif self.limit_type == 'resize_long':
-            ratio = float(limit_side_len) / max(h,w)
+            ratio = float(limit_side_len) / max(h, w)
        else:
            raise Exception('not support limit type, image ')
        resize_h = int(h * ratio)
--- a/tests/configs/det_mv3_db.yml
+++ b/tests/configs/det_mv3_db.yml
@ -0,0 +1,125 @@
+Global:
+  use_gpu: false
+  epoch_num: 5
+  log_smooth_window: 20
+  print_batch_step: 1
+  save_model_dir: ./output/db_mv3/
+  save_epoch_step: 1200
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 400]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Architecture:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: DBFPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
+
+Loss:
+  name: DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5 #5
+  beta: 10 #10
+  ohem_ratio: 3
+
+Optimizer:
+  name: Adam #Momentum
+  #momentum: 0.9
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.6
+  max_candidates: 1000
+  unclip_ratio: 1.5
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - Resize:
+          # size: [640, 640]
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 0
+    use_shared_memory: False
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736, 1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 0
+    use_shared_memory: False
--- a/tests/configs/det_r50_vd_db.yml
+++ b/tests/configs/det_r50_vd_db.yml
@ -0,0 +1,124 @@
+Global:
+  use_gpu: false
+  epoch_num: 5
+  log_smooth_window: 20
+  print_batch_step: 1
+  save_model_dir: ./output/db_mv3/
+  save_epoch_step: 1200
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 400]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Architecture:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: ResNet  #MobileNetV3
+    layers: 50
+  Neck:
+    name: DBFPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
+
+Loss:
+  name: DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5 #5
+  beta: 10 #10
+  ohem_ratio: 3
+
+Optimizer:
+  name: Adam #Momentum
+  #momentum: 0.9
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.6
+  max_candidates: 1000
+  unclip_ratio: 1.5
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - Resize:
+          # size: [640, 640]
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 0
+    use_shared_memory: False
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736, 1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 0
+    use_shared_memory: False
--- a/tests/ocr_det_params.txt
+++ b/tests/ocr_det_params.txt
@ -13,23 +13,23 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train|pact_train
-norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
-pact_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
 ##
 ===========================eval_params=========================== 
-eval:tools/eval.py -c configs/det/det_mv3_db.yml -o
+eval:tools/eval.py -c tests/configs/det_mv3_db.yml -o
 null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
 Global.pretrained_model:
-norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o 
-quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o 
-fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/det_mv3_db.yml -o 
+norm_export:tools/export_model.py -c tests/configs/det_mv3_db.yml -o 
+quant_export:deploy/slim/quantization/export_model.py -c tests/configs/det_mv3_db.yml -o 
+fpgm_export:deploy/slim/prune/export_prune_model.py -c tests/configs/det_mv3_db.yml -o 
 distill_export:null
 export1:null
 export2:null
--- a/tests/ocr_det_server_params.txt
+++ b/tests/ocr_det_server_params.txt
@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train|pact_train
-norm_train:tools/train.py -c configs/det/det_r50_vd_db.yml -o Global.pretrained_model=""
+norm_train:tools/train.py -c tests/configs/det_r50_vd_db.yml -o Global.pretrained_model=""
 pact_train:null
 fpgm_train:null
 distill_train:null
@ -21,13 +21,13 @@ null:null
 null:null
 ##
 ===========================eval_params=========================== 
-eval:tools/eval.py -c configs/det/det_mv3_db.yml -o
+eval:tools/eval.py -c tests/configs/det_r50_vd_db.yml -o
 null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
 Global.pretrained_model:
-norm_export:tools/export_model.py -c configs/det/det_r50_vd_db.yml -o 
+norm_export:tools/export_model.py -c tests/configs/det_r50_vd_db.yml -o 
 quant_export:null 
 fpgm_export:null
 distill_export:null