rename en_sensitive EN_symbol
This commit is contained in:
parent
d9ae86f422
commit
edeb12b1e0
|
@ -1,5 +1,5 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
use_gpu: False
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
|
@ -16,7 +16,7 @@ Global:
|
|||
infer_img:
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||
character_type: En
|
||||
character_type: EN
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
@ -63,8 +63,8 @@ Metric:
|
|||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list: ["./train_data/train_list.txt"]
|
||||
data_dir: ./train_data/ic15_data/
|
||||
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
|
@ -77,15 +77,15 @@ Train:
|
|||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
batch_size_per_card: 256
|
||||
batch_size_per_card: 1
|
||||
drop_last: True
|
||||
num_workers: 8
|
||||
num_workers: 1
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list: ["./train_data/eval_list.txt"]
|
||||
data_dir: ./train_data/ic15_data/
|
||||
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
|
|
|
@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
|
|||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
|
||||
|
|
|
@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
|
|||
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
|
||||
|
|
|
@ -18,6 +18,7 @@ from __future__ import print_function
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import numpy as np
|
||||
import string
|
||||
|
||||
|
||||
class ClsLabelEncode(object):
|
||||
|
@ -92,8 +93,8 @@ class BaseRecLabelEncode(object):
|
|||
character_type='ch',
|
||||
use_space_char=False):
|
||||
support_character_type = [
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
||||
'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
||||
'mr', 'ne'
|
||||
]
|
||||
|
@ -104,9 +105,8 @@ class BaseRecLabelEncode(object):
|
|||
if character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type == "en_sensitive":
|
||||
elif character_type == "EN_symbol":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in support_character_type:
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import numpy as np
|
||||
import string
|
||||
import paddle
|
||||
from paddle.nn import functional as F
|
||||
|
||||
|
@ -24,10 +25,10 @@ class BaseRecLabelDecode(object):
|
|||
character_type='ch',
|
||||
use_space_char=False):
|
||||
support_character_type = [
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
||||
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
|
||||
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
|
||||
'ne', 'En'
|
||||
'ne', 'EN'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
@ -35,9 +36,8 @@ class BaseRecLabelDecode(object):
|
|||
if character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type == "en_sensitive":
|
||||
elif character_type == "EN_symbol":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in support_character_type:
|
||||
|
|
Loading…
Reference in New Issue