rename en_sensitive EN_symbol
This commit is contained in:
parent
d9ae86f422
commit
edeb12b1e0
|
@ -1,5 +1,5 @@
|
||||||
Global:
|
Global:
|
||||||
use_gpu: True
|
use_gpu: False
|
||||||
epoch_num: 500
|
epoch_num: 500
|
||||||
log_smooth_window: 20
|
log_smooth_window: 20
|
||||||
print_batch_step: 10
|
print_batch_step: 10
|
||||||
|
@ -16,7 +16,7 @@ Global:
|
||||||
infer_img:
|
infer_img:
|
||||||
# for data or label process
|
# for data or label process
|
||||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||||
character_type: En
|
character_type: EN
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
infer_mode: False
|
infer_mode: False
|
||||||
use_space_char: False
|
use_space_char: False
|
||||||
|
@ -63,8 +63,8 @@ Metric:
|
||||||
Train:
|
Train:
|
||||||
dataset:
|
dataset:
|
||||||
name: SimpleDataSet
|
name: SimpleDataSet
|
||||||
data_dir: ./train_data/
|
data_dir: ./train_data/ic15_data/
|
||||||
label_file_list: ["./train_data/train_list.txt"]
|
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
|
||||||
transforms:
|
transforms:
|
||||||
- DecodeImage: # load image
|
- DecodeImage: # load image
|
||||||
img_mode: BGR
|
img_mode: BGR
|
||||||
|
@ -77,15 +77,15 @@ Train:
|
||||||
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
||||||
loader:
|
loader:
|
||||||
shuffle: True
|
shuffle: True
|
||||||
batch_size_per_card: 256
|
batch_size_per_card: 1
|
||||||
drop_last: True
|
drop_last: True
|
||||||
num_workers: 8
|
num_workers: 1
|
||||||
|
|
||||||
Eval:
|
Eval:
|
||||||
dataset:
|
dataset:
|
||||||
name: SimpleDataSet
|
name: SimpleDataSet
|
||||||
data_dir: ./train_data/
|
data_dir: ./train_data/ic15_data/
|
||||||
label_file_list: ["./train_data/eval_list.txt"]
|
label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
|
||||||
transforms:
|
transforms:
|
||||||
- DecodeImage: # load image
|
- DecodeImage: # load image
|
||||||
img_mode: BGR
|
img_mode: BGR
|
||||||
|
|
|
@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
|
||||||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
||||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
|
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
|
||||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En |
|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN |
|
||||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
|
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
|
||||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
|
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
|
||||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
|
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
|
||||||
|
|
|
@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
|
||||||
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
|
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
|
||||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
|
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
|
||||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En |
|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN |
|
||||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
|
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
|
||||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
|
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
|
||||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
|
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
|
||||||
|
|
|
@ -18,6 +18,7 @@ from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
class ClsLabelEncode(object):
|
class ClsLabelEncode(object):
|
||||||
|
@ -92,8 +93,8 @@ class BaseRecLabelEncode(object):
|
||||||
character_type='ch',
|
character_type='ch',
|
||||||
use_space_char=False):
|
use_space_char=False):
|
||||||
support_character_type = [
|
support_character_type = [
|
||||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||||
'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||||
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
||||||
'mr', 'ne'
|
'mr', 'ne'
|
||||||
]
|
]
|
||||||
|
@ -104,9 +105,8 @@ class BaseRecLabelEncode(object):
|
||||||
if character_type == "en":
|
if character_type == "en":
|
||||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
elif character_type == "en_sensitive":
|
elif character_type == "EN_symbol":
|
||||||
# same with ASTER setting (use 94 char).
|
# same with ASTER setting (use 94 char).
|
||||||
import string
|
|
||||||
self.character_str = string.printable[:-6]
|
self.character_str = string.printable[:-6]
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
elif character_type in support_character_type:
|
elif character_type in support_character_type:
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import string
|
||||||
import paddle
|
import paddle
|
||||||
from paddle.nn import functional as F
|
from paddle.nn import functional as F
|
||||||
|
|
||||||
|
@ -24,10 +25,10 @@ class BaseRecLabelDecode(object):
|
||||||
character_type='ch',
|
character_type='ch',
|
||||||
use_space_char=False):
|
use_space_char=False):
|
||||||
support_character_type = [
|
support_character_type = [
|
||||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||||
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
|
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
|
||||||
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
|
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
|
||||||
'ne', 'En'
|
'ne', 'EN'
|
||||||
]
|
]
|
||||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||||
support_character_type, character_type)
|
support_character_type, character_type)
|
||||||
|
@ -35,9 +36,8 @@ class BaseRecLabelDecode(object):
|
||||||
if character_type == "en":
|
if character_type == "en":
|
||||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
elif character_type == "en_sensitive":
|
elif character_type == "EN_symbol":
|
||||||
# same with ASTER setting (use 94 char).
|
# same with ASTER setting (use 94 char).
|
||||||
import string
|
|
||||||
self.character_str = string.printable[:-6]
|
self.character_str = string.printable[:-6]
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
elif character_type in support_character_type:
|
elif character_type in support_character_type:
|
||||||
|
|
Loading…
Reference in New Issue