rename en_sensitive EN_symbol

This commit is contained in:
tink2123 2021-01-26 15:53:49 +08:00
parent d9ae86f422
commit edeb12b1e0
5 changed files with 18 additions and 18 deletions

View File

@ -1,5 +1,5 @@
Global:
use_gpu: True
use_gpu: False
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
@ -16,7 +16,7 @@ Global:
infer_img:
# for data or label process
character_dict_path: ppocr/utils/dict/en_dict.txt
character_type: En
character_type: EN
max_text_length: 25
infer_mode: False
use_space_char: False
@ -63,8 +63,8 @@ Metric:
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/train_list.txt"]
data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR
@ -77,15 +77,15 @@ Train:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
batch_size_per_card: 1
drop_last: True
num_workers: 8
num_workers: 1
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"]
data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms:
- DecodeImage: # load image
img_mode: BGR

View File

@ -348,7 +348,7 @@ PaddleOCR目前已支持26种除中文外语种识别`configs/rec/multi
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |

View File

@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |

View File

@ -18,6 +18,7 @@ from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import string
class ClsLabelEncode(object):
@ -92,8 +93,8 @@ class BaseRecLabelEncode(object):
character_type='ch',
use_space_char=False):
support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
'mr', 'ne'
]
@ -104,9 +105,8 @@ class BaseRecLabelEncode(object):
if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
elif character_type == "en_sensitive":
elif character_type == "EN_symbol":
# same with ASTER setting (use 94 char).
import string
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import string
import paddle
from paddle.nn import functional as F
@ -24,10 +25,10 @@ class BaseRecLabelDecode(object):
character_type='ch',
use_space_char=False):
support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
'ne', 'En'
'ne', 'EN'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
@ -35,9 +36,8 @@ class BaseRecLabelDecode(object):
if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
elif character_type == "en_sensitive":
elif character_type == "EN_symbol":
# same with ASTER setting (use 94 char).
import string
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type: