From edeb12b1e00e090102116cb0737b5db194c219d6 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Tue, 26 Jan 2021 15:53:49 +0800 Subject: [PATCH] rename en_sensitive EN_symbol --- .../multi_language/rec_en_number_lite_train.yml | 16 ++++++++-------- doc/doc_ch/recognition.md | 2 +- doc/doc_en/recognition_en.md | 2 +- ppocr/data/imaug/label_ops.py | 8 ++++---- ppocr/postprocess/rec_postprocess.py | 8 ++++---- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml index 8217082d..c2295af9 100644 --- a/configs/rec/multi_language/rec_en_number_lite_train.yml +++ b/configs/rec/multi_language/rec_en_number_lite_train.yml @@ -1,5 +1,5 @@ Global: - use_gpu: True + use_gpu: False epoch_num: 500 log_smooth_window: 20 print_batch_step: 10 @@ -16,7 +16,7 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/en_dict.txt - character_type: En + character_type: EN max_text_length: 25 infer_mode: False use_space_char: False @@ -63,8 +63,8 @@ Metric: Train: dataset: name: SimpleDataSet - data_dir: ./train_data/ - label_file_list: ["./train_data/train_list.txt"] + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] transforms: - DecodeImage: # load image img_mode: BGR @@ -77,15 +77,15 @@ Train: keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order loader: shuffle: True - batch_size_per_card: 256 + batch_size_per_card: 1 drop_last: True - num_workers: 8 + num_workers: 1 Eval: dataset: name: SimpleDataSet - data_dir: ./train_data/ - label_file_list: ["./train_data/eval_list.txt"] + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] transforms: - DecodeImage: # load image img_mode: BGR diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 76b631fc..c5f459bd 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi | 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 9e697819..22f89cde 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are: | Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index f7e21786..14c1cc9c 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -18,6 +18,7 @@ from __future__ import print_function from __future__ import unicode_literals import numpy as np +import string class ClsLabelEncode(object): @@ -92,8 +93,8 @@ class BaseRecLabelEncode(object): character_type='ch', use_space_char=False): support_character_type = [ - 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', - 'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', + 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', + 'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'ne' ] @@ -104,9 +105,8 @@ class BaseRecLabelEncode(object): if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "en_sensitive": + elif character_type == "EN_symbol": # same with ASTER setting (use 94 char). - import string self.character_str = string.printable[:-6] dict_character = list(self.character_str) elif character_type in support_character_type: diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 47bdd03f..65ed4671 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import numpy as np +import string import paddle from paddle.nn import functional as F @@ -24,10 +25,10 @@ class BaseRecLabelDecode(object): character_type='ch', use_space_char=False): support_character_type = [ - 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', + 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', - 'ne', 'En' + 'ne', 'EN' ] assert character_type in support_character_type, "Only {} are supported now but get {}".format( support_character_type, character_type) @@ -35,9 +36,8 @@ class BaseRecLabelDecode(object): if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "en_sensitive": + elif character_type == "EN_symbol": # same with ASTER setting (use 94 char). - import string self.character_str = string.printable[:-6] dict_character = list(self.character_str) elif character_type in support_character_type: