polish code
This commit is contained in:
parent
5e9fb50db5
commit
8f52a73718
|
@ -16,7 +16,7 @@ Global:
|
|||
infer_img:
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||
character_type: ch
|
||||
character_type: En
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
|
|
|
@ -303,7 +303,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
|
|||
|
||||
```bash
|
||||
# -l或者--language字段是必须的
|
||||
# --train修改训练集,--val修改验证集,--data_dir修改数据集目录,-o修改对应默认参数
|
||||
# --train修改训练集,--val修改验证集,--data_dir修改数据集目录,--dict修改字典路径, -o修改对应默认参数
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
python3 generate_multi_language_configs.py -l it \ # 语种
|
||||
--train {path/of/train_label.txt} \ # 训练标签文件的路径
|
||||
|
@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
|
|||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | ch |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
|
||||
|
|
|
@ -315,7 +315,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
|
|||
<a name="MULTILINGUAL_MODEL_INFERENCE"></a>
|
||||
### 4. MULTILINGAUL MODEL INFERENCE
|
||||
If you need to predict other language models, when using inference model prediction, you need to specify the dictionary path used by `--rec_char_dict_path`. At the same time, in order to get the correct visualization results,
|
||||
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/` path, such as Korean recognition:
|
||||
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:
|
||||
|
||||
```
|
||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
|
||||
|
|
|
@ -304,6 +304,7 @@ There are two ways to create the required configuration file::
|
|||
# --train to modify the training set
|
||||
# --val to modify the validation set
|
||||
# --data_dir to modify the data set directory
|
||||
# --dict to modify the dict path
|
||||
# -o to modify the corresponding default parameters
|
||||
cd PaddleOCR/configs/rec/multi_language/
|
||||
python3 generate_multi_language_configs.py -l it \ # language
|
||||
|
@ -346,10 +347,10 @@ There are two ways to create the required configuration file::
|
|||
|
||||
Currently, the multi-language algorithms supported by PaddleOCR are:
|
||||
|
||||
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
|
||||
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
|
||||
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
|
||||
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | ch |
|
||||
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En |
|
||||
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
|
||||
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
|
||||
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -92,7 +92,10 @@ class BaseRecLabelEncode(object):
|
|||
character_type='ch',
|
||||
use_space_char=False):
|
||||
support_character_type = [
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
||||
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
||||
'mr', 'ne'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
@ -101,9 +104,15 @@ class BaseRecLabelEncode(object):
|
|||
if character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in ["ch", "french", "german", "japan", "korean"]:
|
||||
elif character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in support_character_type:
|
||||
self.character_str = ""
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
|
||||
character_type)
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
|
@ -112,11 +121,6 @@ class BaseRecLabelEncode(object):
|
|||
if use_space_char:
|
||||
self.character_str += " "
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
self.character_type = character_type
|
||||
dict_character = self.add_special_char(dict_character)
|
||||
self.dict = {}
|
||||
|
|
|
@ -24,9 +24,10 @@ class BaseRecLabelDecode(object):
|
|||
character_type='ch',
|
||||
use_space_char=False):
|
||||
support_character_type = [
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'it',
|
||||
'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'rsc', 'bg',
|
||||
'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'ne'
|
||||
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
|
||||
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
|
||||
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
|
||||
'ne', 'En'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
@ -34,9 +35,15 @@ class BaseRecLabelDecode(object):
|
|||
if character_type == "en":
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in ["ch", "french", "german", "japan", "korean"]:
|
||||
elif character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type in support_character_type:
|
||||
self.character_str = ""
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
|
||||
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
|
||||
character_type)
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
|
@ -45,11 +52,7 @@ class BaseRecLabelDecode(object):
|
|||
if use_space_char:
|
||||
self.character_str += " "
|
||||
dict_character = list(self.character_str)
|
||||
elif character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
import string
|
||||
self.character_str = string.printable[:-6]
|
||||
dict_character = list(self.character_str)
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
self.character_type = character_type
|
||||
|
|
|
@ -70,7 +70,7 @@ def parse_args():
|
|||
default="./ppocr/utils/ppocr_keys_v1.txt")
|
||||
parser.add_argument("--use_space_char", type=str2bool, default=True)
|
||||
parser.add_argument(
|
||||
"--vis_font_path", type=str, default="./doc/simfang.ttf")
|
||||
"--vis_font_path", type=str, default="./doc/fonts/simfang.ttf")
|
||||
parser.add_argument("--drop_score", type=float, default=0.5)
|
||||
|
||||
# params for text classifier
|
||||
|
|
Loading…
Reference in New Issue