revert dict and add space in character

2020-07-07 14:13:13 +08:00 · 2020-07-07 14:13:13 +08:00 · db3d78d8ea
parent ed3ed30ecc
commit db3d78d8ea
15 changed files with 80 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -5,6 +5,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 **近期更新**
 - 2020.7.9 添加支持空格的识别模型，[识别效果](#支持空格的中文OCR效果展示)
+- 2020.7.9 添加数据增强、学习率衰减策略,具体参考[配置文件](./doc/doc_ch/config.md)
 - 2020.6.8 添加[数据集](./doc/doc_ch/datasets.md)，并保持持续更新
 - 2020.6.5 支持 `attetnion` 模型导出 `inference_model`
 - 2020.6.5 支持单独预测识别时，输出结果得分
@ -51,6 +52,8 @@ mkdir inference && cd inference
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
 # 下载超轻量级中文OCR模型的识别模型并解压
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
+# 下载支持空格的超轻量级中文OCR模型的识别模型并解压
+wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar && tar xf ch_rec_mv3_crnn_enhance_infer.tar
 cd ..
 ```
 #### (2)通用中文OCR模型下载
@ -60,6 +63,8 @@ mkdir inference && cd inference
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar && tar xf ch_det_r50_vd_db_infer.tar
 # 下载通用中文OCR模型的识别模型并解压
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar && tar xf ch_rec_r34_vd_crnn_infer.tar
+# 下载支持空格的通用中文OCR模型的识别模型并解压
+wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar && tar xf ch_rec_r34_vd_crnn_enhance_infer.tar
 cd ..
 ```

@ -85,6 +90,13 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/"  --rec_model_dir="./inference/ch_rec_r34_vd_crnn/"
 ```

+带空格的通用中文OCR模型的体验可以按照上述步骤下载相应的模型，并且更新相关的参数，示例如下：
+
+```
+# 预测image_dir指定的单张图像
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_12.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/"  --rec_model_dir="./inference/ch_rec_r34_vd_crnn_enhance/"
+```
+
 更多的文本检测、识别串联推理使用方式请参考文档教程中[基于预测引擎推理](./doc/doc_ch/inference.md)。

 ## 文档教程
--- a/README_en.md
+++ b/README_en.md
@ -5,6 +5,7 @@ PaddleOCR aims to create a rich, leading, and practical OCR tools that help user

 **Recent updates**、
 - 2020.7.9 Add recognition model to support space, [recognition result](#space Chinese OCR results)
+- 2020.7.9 Add data auguments and learning rate decay strategies,please read [config](./doc/doc_en/config_en.md)
 - 2020.6.8 Add [dataset](./doc/doc_en/datasets_en.md) and keep updating
 - 2020.6.5 Support exporting `attention` model to `inference_model`
 - 2020.6.5 Support separate prediction and recognition, output result score
@ -52,6 +53,9 @@ mkdir inference && cd inference
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
 # Download the recognition part of the lightweight Chinese OCR and decompress it
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
+# Download the space-recognized part of the lightweight Chinese OCR and decompress it
+wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar && tar xf ch_rec_mv3_crnn_enhance_infer.tar
+
 cd ..
 ```
 #### (2) Download General Chinese OCR models
@ -61,6 +65,8 @@ mkdir inference && cd inference
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar && tar xf ch_det_r50_vd_db_infer.tar
 # Download the recognition part of the generic Chinese OCR model and decompress it
 wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar && tar xf ch_rec_r34_vd_crnn_infer.tar
+# Download the space-recognition part of the generic Chinese OCR model and decompress it
+wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar && tar xf ch_rec_r34_vd_crnn_enhance_infer.tar
 cd ..
 ```

@ -86,6 +92,13 @@ To run inference of the Generic Chinese OCR model, follow these steps above to d
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/"  --rec_model_dir="./inference/ch_rec_r34_vd_crnn/"
 ```

+To run inference of the space-Generic Chinese OCR model, follow these steps above to download the corresponding models and update the relevant parameters. Examples are as follows:
+
+```
+# Prediction on a single image by specifying image path to image_dir
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_12.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/"  --rec_model_dir="./inference/ch_rec_r34_vd_crnn_enhance/"
+```
+
 For more text detection and recognition models, please refer to the document [Inference](./doc/doc_en/inference_en.md)

 ## DOCUMENTATION
--- a/configs/rec/rec_chinese_common_train.yml
+++ b/configs/rec/rec_chinese_common_train.yml
@ -14,6 +14,8 @@ Global:
  character_type: ch
  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
  loss_type: ctc
+  distort: false
+  add_sapce: false
  reader_yml: ./configs/rec/rec_chinese_reader.yml
  pretrain_weights:
  checkpoints:
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
@ -15,6 +15,7 @@ Global:
  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
  loss_type: ctc
  distort: false
+  add_space: false
  reader_yml: ./configs/rec/rec_chinese_reader.yml
  pretrain_weights:
  checkpoints:
--- a/doc/doc_ch/config.md
+++ b/doc/doc_ch/config.md
@ -30,6 +30,8 @@
 |      character_type      |    设置字符类型            |       ch          |    en/ch, en时将使用默认dict，ch时使用自定义dict|
 |      character_dict_path |    设置字典路径            |  ./ppocr/utils/ic15_dict.txt  |    \                 |
 |      loss_type           |    设置 loss 类型              |       ctc         |    支持两种loss： ctc / attention |
+|       distort            |    设置是否使用数据增强          |       false       |  设置为true时，将在训练时随机进行扰动，支持的扰动操作可阅读[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)                 |
+|       add_space          |    设置是否识别空格             |        false      |          仅在"ch"模式下支持空格                 |
 |      reader_yml          |    设置reader配置文件          |  ./configs/rec/rec_icdar15_reader.yml  |  \          |
 |      pretrain_weights    |    加载预训练模型路径      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
 |      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@ -94,7 +94,7 @@ word_dict.txt 每行有一个单字，将字符与数字索引映射在一起，
 `ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典，
 您可以按需使用。

-如需自定义dic文件，请修改 `configs/rec/rec_icdar15_train.yml` 中的 `character_dict_path` 字段, 并将 `character_type` 设置为 `ch`。
+如需自定义dic文件，请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 并将 `character_type` 设置为 `ch`。

 ### 启动训练

@ -157,12 +157,26 @@ Global:
  character_type: ch
  # 添加自定义字典，如修改字典请将路径指向新字典
  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
+  # 训练时添加数据增强
+  distort: true
+  # 识别空格
+  add_space: true
  ...
  # 修改reader类型
  reader_yml: ./configs/rec/rec_chinese_reader.yml
  ...

 ...
+
+Optimizer:
+  ...
+  # 添加学习率衰减策略
+  decay:
+    function: cosine_decay
+    # 每个 epoch 包含 iter 数
+    step_each_epoch: 20
+    # 总共训练epoch数
+    total_epoch: 1000
 ```
 **注意，预测/评估时的配置文件请务必与训练一致。**

--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@ -30,6 +30,8 @@ Take `rec_chinese_lite_train.yml` as an example
 |      character_type      |    Set character type            |       ch          |    en/ch, the default dict will be used for en, and the custom dict will be used for ch|
 |      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ic15_dict.txt  |    \                 |
 |      loss_type           |    Set loss type              |       ctc         |    Supports two types of loss: ctc / attention |
+|       distort            |    Set use distort          |       false       |  Support distort type ,read [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)                 |
+|       add_space          |    Wether to recognize space             |        false      |         Only support in "ch" mode                 |
 |      reader_yml          |    Set the reader configuration file          |  ./configs/rec/rec_icdar15_reader.yml  |  \          |
 |      pretrain_weights    |    Load pre-trained model path      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
 |      checkpoints         |    Load saved model path            |       None        |    Used to load saved parameters to continue training after interruption |
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@ -158,9 +158,23 @@ Global:
  ...
  # Modify reader type
  reader_yml: ./configs/rec/rec_chinese_reader.yml
+  # Whether to use data augmentation
+  distort: true
+  # Whether to recognize spaces
+  add_space: true
  ...

 ...
+
+Optimizer:
+  ...
+  # Add learning rate decay strategy
+  decay:
+    function: cosine_decay
+    # Each epoch contains iter number
+    step_each_epoch: 20
+    # Total epoch number
+    total_epoch: 1000
 ```
 **Note that the configuration file for prediction/evaluation must be consistent with the training.**

--- a/doc/imgs_en/img_12.jpg
+++ b/doc/imgs_en/img_12.jpg
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@ -58,6 +58,7 @@ class LMDBReader(object):
        else:
            self.batch_size = params['test_batch_size_per_card']
            self.drop_last = False
+            self.use_distort = False
        self.infer_img = params['infer_img']

    def load_hierarchical_lmdb_dataset(self):
@ -206,6 +207,7 @@ class SimpleReader(object):
        else:
            self.batch_size = params['test_batch_size_per_card']
            self.drop_last = False
+            self.use_distort = False

    def __call__(self, process_id):
        if self.mode != 'train':
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@ -136,6 +136,9 @@ def jitter(img):


 def add_gasuss_noise(image, mean=0, var=0.1):
+    """
+    Gasuss noise
+    """

    noise = np.random.normal(mean, var**0.5, image.shape)
    out = image + 0.5 * noise
@ -152,9 +155,8 @@ def get_crop(image):
    top_min = 1
    top_max = 8
    top_crop = int(random.randint(top_min, top_max))
-
+    top_crop = min(top_crop, h - 1)
    crop_img = image.copy()
-
    ratio = random.randint(0, 1)
    if ratio:
        crop_img = crop_img[top_crop:h, :, :]
@ -249,13 +251,13 @@ def get_warpR(config):
    dst2 = r.dot(p2)
    dst3 = r.dot(p3)
    dst4 = r.dot(p4)
-    list_dst = [dst1, dst2, dst3, dst4]
+    list_dst = np.array([dst1, dst2, dst3, dst4])
    org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32)
    dst = np.zeros((4, 2), np.float32)
    # Project onto the image plane
-    for i in range(4):
-        dst[i, 0] = list_dst[i][0] * z / (z - list_dst[i][2]) + pcenter[0]
-        dst[i, 1] = list_dst[i][1] * z / (z - list_dst[i][2]) + pcenter[1]
+    dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0]
+    dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1]
+
    warpR = cv2.getPerspectiveTransform(org, dst)

    dst1, dst2, dst3, dst4 = dst
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@ -30,12 +30,17 @@ class CharacterOps(object):
            dict_character = list(self.character_str)
        elif self.character_type == "ch":
            character_dict_path = config['character_dict_path']
+            add_space = False
+            if 'add_space' in config:
+                add_space = config['add_space']
            self.character_str = ""
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
                    line = line.decode('utf-8').strip("\n").strip("\r\n")
                    self.character_str += line
+            if add_space:
+                self.character_str += " "
            dict_character = list(self.character_str)
        elif self.character_type == "en_sensitive":
            # same with ASTER setting (use 94 char).
--- a/ppocr/utils/ppocr_keys_v1.txt
+++ b/ppocr/utils/ppocr_keys_v1.txt
@ -6621,4 +6621,3 @@ j
 續
 紹
 懮
- 
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@ -39,7 +39,8 @@ class TextRecognizer(object):
        self.rec_algorithm = args.rec_algorithm
        char_ops_params = {
            "character_type": args.rec_char_type,
-            "character_dict_path": args.rec_char_dict_path
+            "character_dict_path": args.rec_char_dict_path,
+            "add_space": args.rec_add_space
        }
        if self.rec_algorithm != "RARE":
            char_ops_params['loss_type'] = 'ctc'
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@ -63,6 +63,7 @@ def parse_args():
        "--rec_char_dict_path",
        type=str,
        default="./ppocr/utils/ppocr_keys_v1.txt")
+    parser.add_argument("--rec_add_space", type=bool, default=True)
    return parser.parse_args()