Merge pull request #3915 from WenmuZhou/whl
add 2.1 models to paddleocr whl
This commit is contained in:
commit
4016805f6c
159
paddleocr.py
159
paddleocr.py
|
@ -33,15 +33,47 @@ from tools.infer.utility import draw_ocr, str2bool
|
|||
from ppstructure.utility import init_args, draw_structure_result
|
||||
from ppstructure.predict_system import OCRSystem, save_structure_res
|
||||
|
||||
__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']
|
||||
__all__ = [
|
||||
'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
|
||||
'save_structure_res', 'download_with_progressbar'
|
||||
]
|
||||
|
||||
model_urls = {
|
||||
SUPPORT_DET_MODEL = ['DB']
|
||||
VERSION = '2.2.1'
|
||||
SUPPORT_REC_MODEL = ['CRNN']
|
||||
BASE_DIR = os.path.expanduser("~/.paddleocr/")
|
||||
|
||||
DEFAULT_MODEL_VERSION = '2.0'
|
||||
MODEL_URLS = {
|
||||
'2.1': {
|
||||
'det': {
|
||||
'ch':
|
||||
'ch': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_det_infer.tar',
|
||||
},
|
||||
},
|
||||
'rec': {
|
||||
'ch': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.1/chinese/ch_ppocr_mobile_v2.1_rec_infer.tar',
|
||||
'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
|
||||
}
|
||||
}
|
||||
},
|
||||
'2.0': {
|
||||
'det': {
|
||||
'ch': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
|
||||
'en':
|
||||
},
|
||||
'en': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
|
||||
'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
|
||||
},
|
||||
'structure': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
|
||||
}
|
||||
},
|
||||
'rec': {
|
||||
'ch': {
|
||||
|
@ -115,22 +147,27 @@ model_urls = {
|
|||
'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
|
||||
},
|
||||
'structure': {
|
||||
'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
|
||||
'dict_path': 'ppocr/utils/dict/table_dict.txt'
|
||||
}
|
||||
},
|
||||
'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
|
||||
'cls': {
|
||||
'ch': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
|
||||
}
|
||||
},
|
||||
'table': {
|
||||
'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
|
||||
'en': {
|
||||
'url':
|
||||
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
|
||||
'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SUPPORT_DET_MODEL = ['DB']
|
||||
VERSION = '2.2.0.1'
|
||||
SUPPORT_REC_MODEL = ['CRNN']
|
||||
BASE_DIR = os.path.expanduser("~/.paddleocr/")
|
||||
|
||||
|
||||
def parse_args(mMain=True):
|
||||
import argparse
|
||||
|
@ -140,6 +177,7 @@ def parse_args(mMain=True):
|
|||
parser.add_argument("--det", type=str2bool, default=True)
|
||||
parser.add_argument("--rec", type=str2bool, default=True)
|
||||
parser.add_argument("--type", type=str, default='ocr')
|
||||
parser.add_argument("--version", type=str, default='2.1')
|
||||
|
||||
for action in parser._actions:
|
||||
if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
|
||||
|
@ -155,19 +193,19 @@ def parse_args(mMain=True):
|
|||
|
||||
def parse_lang(lang):
|
||||
latin_lang = [
|
||||
'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
|
||||
'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
|
||||
'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
|
||||
'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
|
||||
'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
|
||||
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
|
||||
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
|
||||
'sw', 'tl', 'tr', 'uz', 'vi'
|
||||
]
|
||||
arabic_lang = ['ar', 'fa', 'ug', 'ur']
|
||||
cyrillic_lang = [
|
||||
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
|
||||
'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
|
||||
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
|
||||
'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
|
||||
]
|
||||
devanagari_lang = [
|
||||
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
|
||||
'gom', 'sa', 'bgc'
|
||||
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
|
||||
'sa', 'bgc'
|
||||
]
|
||||
if lang in latin_lang:
|
||||
lang = "latin"
|
||||
|
@ -177,9 +215,9 @@ def parse_lang(lang):
|
|||
lang = "cyrillic"
|
||||
elif lang in devanagari_lang:
|
||||
lang = "devanagari"
|
||||
assert lang in model_urls[
|
||||
assert lang in MODEL_URLS[DEFAULT_MODEL_VERSION][
|
||||
'rec'], 'param lang must in {}, but got {}'.format(
|
||||
model_urls['rec'].keys(), lang)
|
||||
MODEL_URLS[DEFAULT_MODEL_VERSION]['rec'].keys(), lang)
|
||||
if lang == "ch":
|
||||
det_lang = "ch"
|
||||
elif lang == 'structure':
|
||||
|
@ -189,6 +227,35 @@ def parse_lang(lang):
|
|||
return lang, det_lang
|
||||
|
||||
|
||||
def get_model_config(version, model_type, lang):
|
||||
if version not in MODEL_URLS:
|
||||
logger.warning('version {} not in {}, use version {} instead'.format(
|
||||
version, MODEL_URLS.keys(), DEFAULT_MODEL_VERSION))
|
||||
version = DEFAULT_MODEL_VERSION
|
||||
if model_type not in MODEL_URLS[version]:
|
||||
if model_type in MODEL_URLS[DEFAULT_MODEL_VERSION]:
|
||||
logger.warning(
|
||||
'version {} not support {} models, use version {} instead'.
|
||||
format(version, model_type, DEFAULT_MODEL_VERSION))
|
||||
version = DEFAULT_MODEL_VERSION
|
||||
else:
|
||||
logger.error('{} models is not support, we only support {}'.format(
|
||||
model_type, MODEL_URLS[DEFAULT_MODEL_VERSION].keys()))
|
||||
sys.exit(-1)
|
||||
if lang not in MODEL_URLS[version][model_type]:
|
||||
if lang in MODEL_URLS[DEFAULT_MODEL_VERSION][model_type]:
|
||||
logger.warning('lang {} is not support in {}, use {} instead'.
|
||||
format(lang, version, DEFAULT_MODEL_VERSION))
|
||||
version = DEFAULT_MODEL_VERSION
|
||||
else:
|
||||
logger.error(
|
||||
'lang {} is not support, we only support {} for {} models'.
|
||||
format(lang, MODEL_URLS[DEFAULT_MODEL_VERSION][model_type].keys(
|
||||
), model_type))
|
||||
sys.exit(-1)
|
||||
return MODEL_URLS[version][model_type][lang]
|
||||
|
||||
|
||||
class PaddleOCR(predict_system.TextSystem):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
|
@ -204,15 +271,21 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
lang, det_lang = parse_lang(params.lang)
|
||||
|
||||
# init model dir
|
||||
params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
|
||||
det_model_config = get_model_config(params.version, 'det', det_lang)
|
||||
params.det_model_dir, det_url = confirm_model_dir_url(
|
||||
params.det_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
|
||||
model_urls['det'][det_lang])
|
||||
params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
|
||||
det_model_config['url'])
|
||||
rec_model_config = get_model_config(params.version, 'rec', lang)
|
||||
params.rec_model_dir, rec_url = confirm_model_dir_url(
|
||||
params.rec_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
|
||||
model_urls['rec'][lang]['url'])
|
||||
params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
|
||||
rec_model_config['url'])
|
||||
cls_model_config = get_model_config(params.version, 'cls', 'ch')
|
||||
params.cls_model_dir, cls_url = confirm_model_dir_url(
|
||||
params.cls_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
|
||||
model_urls['cls'])
|
||||
cls_model_config['url'])
|
||||
# download model
|
||||
maybe_download(params.det_model_dir, det_url)
|
||||
maybe_download(params.rec_model_dir, rec_url)
|
||||
|
@ -226,7 +299,8 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
sys.exit(0)
|
||||
|
||||
if params.rec_char_dict_path is None:
|
||||
params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])
|
||||
params.rec_char_dict_path = str(
|
||||
Path(__file__).parent / rec_model_config['dict_path'])
|
||||
|
||||
print(params)
|
||||
# init det_model and rec_model
|
||||
|
@ -293,24 +367,32 @@ class PPStructure(OCRSystem):
|
|||
lang, det_lang = parse_lang(params.lang)
|
||||
|
||||
# init model dir
|
||||
params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
|
||||
det_model_config = get_model_config(params.version, 'det', det_lang)
|
||||
params.det_model_dir, det_url = confirm_model_dir_url(
|
||||
params.det_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
|
||||
model_urls['det'][det_lang])
|
||||
params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
|
||||
det_model_config['url'])
|
||||
rec_model_config = get_model_config(params.version, 'rec', lang)
|
||||
params.rec_model_dir, rec_url = confirm_model_dir_url(
|
||||
params.rec_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
|
||||
model_urls['rec'][lang]['url'])
|
||||
params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
|
||||
rec_model_config['url'])
|
||||
table_model_config = get_model_config(params.version, 'table', 'en')
|
||||
params.table_model_dir, table_url = confirm_model_dir_url(
|
||||
params.table_model_dir,
|
||||
os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),
|
||||
model_urls['table']['url'])
|
||||
table_model_config['url'])
|
||||
# download model
|
||||
maybe_download(params.det_model_dir, det_url)
|
||||
maybe_download(params.rec_model_dir, rec_url)
|
||||
maybe_download(params.table_model_dir, table_url)
|
||||
|
||||
if params.rec_char_dict_path is None:
|
||||
params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])
|
||||
params.rec_char_dict_path = str(
|
||||
Path(__file__).parent / rec_model_config['dict_path'])
|
||||
if params.table_char_dict_path is None:
|
||||
params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
|
||||
params.table_char_dict_path = str(
|
||||
Path(__file__).parent / table_model_config['dict_path'])
|
||||
|
||||
print(params)
|
||||
super().__init__(params)
|
||||
|
@ -374,4 +456,3 @@ def main():
|
|||
for item in result:
|
||||
item.pop('img')
|
||||
logger.info(item)
|
||||
|
||||
|
|
Loading…
Reference in New Issue