rename language abbreviations

This commit is contained in:
tink2123 2021-03-15 17:51:32 +08:00
parent c7be8856e9
commit 68dd6afaa4
4 changed files with 91 additions and 43 deletions

View File

@ -19,21 +19,38 @@ import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
support_list = { support_list = {
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic', 'it': 'italian',
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin', 'es': 'spanish',
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian', 'pt': 'portuguese',
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi', 'ru': 'russian',
'ne':'nepali', 'ar': 'arabic',
'ta': 'tamil',
'ug': 'uyghur',
'fa': 'persian',
'ur': 'urdu',
'rs': 'serbian latin',
'oc': 'occitan',
'rsc': 'serbian cyrillic',
'bg': 'bulgarian',
'uk': 'ukranian',
'be': 'belarusian',
'te': 'telugu',
'ka': 'kannada',
'chinese_cht': 'chinese tradition',
'hi': 'hindi',
'mr': 'marathi',
'ne': 'nepali',
} }
assert( assert (os.path.isfile("./rec_multi_language_lite_train.yml")
os.path.isfile("./rec_multi_language_lite_train.yml") ), "Loss basic configuration file rec_multi_language_lite_train.yml.\
),"Loss basic configuration file rec_multi_language_lite_train.yml.\
You can download it from \ You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/" https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader) global_config = yaml.load(
open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../")) project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
class ArgsParser(ArgumentParser): class ArgsParser(ArgumentParser):
def __init__(self): def __init__(self):
super(ArgsParser, self).__init__( super(ArgsParser, self).__init__(
@ -41,15 +58,30 @@ class ArgsParser(ArgumentParser):
self.add_argument( self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options") "-o", "--opt", nargs='+', help="set configuration options")
self.add_argument( self.add_argument(
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list)) "-l",
"--language",
nargs='+',
help="set language type, support {}".format(support_list))
self.add_argument( self.add_argument(
"--train",type=str,help="you can use this command to change the train dataset default path") "--train",
type=str,
help="you can use this command to change the train dataset default path"
)
self.add_argument( self.add_argument(
"--val",type=str,help="you can use this command to change the eval dataset default path") "--val",
type=str,
help="you can use this command to change the eval dataset default path"
)
self.add_argument( self.add_argument(
"--dict",type=str,help="you can use this command to change the dictionary default path") "--dict",
type=str,
help="you can use this command to change the dictionary default path"
)
self.add_argument( self.add_argument(
"--data_dir",type=str,help="you can use this command to change the dataset default root path") "--data_dir",
type=str,
help="you can use this command to change the dataset default root path"
)
def parse_args(self, argv=None): def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv) args = super(ArgsParser, self).parse_args(argv)
@ -68,20 +100,28 @@ class ArgsParser(ArgumentParser):
return config return config
def _set_language(self, type): def _set_language(self, type):
assert(type),"please use -l or --language to choose language type" assert (type), "please use -l or --language to choose language type"
assert( assert(
type[0] in support_list.keys() type[0] in support_list.keys()
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \ ),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
"please check your running command".format(support_list, type) "please check your running command".format(support_list, type)
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0]) global_config['Global'][
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0]) 'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])] 0])
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])] global_config['Global'][
'save_model_dir'] = './output/rec_{}_lite'.format(type[0])
global_config['Train']['dataset'][
'label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
global_config['Eval']['dataset'][
'label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
global_config['Global']['character_type'] = type[0] global_config['Global']['character_type'] = type[0]
assert( assert (
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path'])) os.path.isfile(
),"Loss default dictionary file {}_dict.txt.You can download it from \ os.path.join(project_path, global_config['Global'][
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0]) 'character_dict_path']))
), "Loss default dictionary file {}_dict.txt.You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
type[0])
return type[0] return type[0]
@ -111,10 +151,12 @@ def merge_config(config):
else: else:
cur = cur[sub_key] cur = cur[sub_key]
def loss_file(path): def loss_file(path):
assert( assert (
os.path.exists(path) os.path.exists(path)
),"There is no such file:{},Please do not forget to put in the specified file".format(path) ), "There is no such file:{},Please do not forget to put in the specified file".format(
path)
if __name__ == '__main__': if __name__ == '__main__':
@ -126,27 +168,33 @@ if __name__ == '__main__':
if FLAGS.train: if FLAGS.train:
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train] global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
train_label_path = os.path.join(project_path,FLAGS.train) train_label_path = os.path.join(project_path, FLAGS.train)
loss_file(train_label_path) loss_file(train_label_path)
if FLAGS.val: if FLAGS.val:
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val] global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
eval_label_path = os.path.join(project_path,FLAGS.val) eval_label_path = os.path.join(project_path, FLAGS.val)
loss_file(eval_label_path) loss_file(eval_label_path)
if FLAGS.dict: if FLAGS.dict:
global_config['Global']['character_dict_path'] = FLAGS.dict global_config['Global']['character_dict_path'] = FLAGS.dict
dict_path = os.path.join(project_path,FLAGS.dict) dict_path = os.path.join(project_path, FLAGS.dict)
loss_file(dict_path) loss_file(dict_path)
if FLAGS.data_dir: if FLAGS.data_dir:
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
data_dir = os.path.join(project_path,FLAGS.data_dir) data_dir = os.path.join(project_path, FLAGS.data_dir)
loss_file(data_dir) loss_file(data_dir)
with open(save_file_path, 'w') as f: with open(save_file_path, 'w') as f:
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False) yaml.dump(
dict(global_config), f, default_flow_style=False, sort_keys=False)
logging.info("Project path is :{}".format(project_path)) logging.info("Project path is :{}".format(project_path))
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0])) logging.info("Train list path set to :{}".format(global_config['Train'][
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0])) 'dataset']['label_file_list'][0]))
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir'])) logging.info("Eval list path set to :{}".format(global_config['Eval'][
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path'])) 'dataset']['label_file_list'][0]))
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path)) logging.info("Dataset root path set to :{}".format(global_config['Eval'][
'dataset']['data_dir']))
logging.info("Dict path set to :{}".format(global_config['Global'][
'character_dict_path']))
logging.info("Config file set to :configs/rec/multi_language/{}".
format(save_file_path))

View File

@ -94,7 +94,7 @@ class BaseRecLabelEncode(object):
use_space_char=False): use_space_char=False):
support_character_type = [ support_character_type = [
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'EN', 'it', 'es', 'pt', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
'mr', 'ne' 'mr', 'ne'
] ]