Merge pull request #2487 from tink2123/new_multi_lang_for_dygraph
replace path.join for windows
This commit is contained in:
commit
9cde47d63c
|
@ -32,7 +32,8 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm
|
|||
|
||||
<div align="center">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/test_add_91.jpg" width="800">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/00018069.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_01.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_02.jpg" width="800">
|
||||
</div>
|
||||
|
||||
The above pictures are the visualizations of the general ppocr_server model. For more effect pictures, please see [More visualizations](./doc/doc_en/visualization_en.md).
|
||||
|
|
|
@ -118,7 +118,6 @@ class ArgsParser(ArgumentParser):
|
|||
return config
|
||||
|
||||
def _set_language(self, type):
|
||||
print("type:", type)
|
||||
lang = type[0]
|
||||
assert (type), "please use -l or --language to choose language type"
|
||||
assert(
|
||||
|
|
|
@ -113,7 +113,7 @@ python3 generate_multi_language_configs.py -l it \
|
|||
| cyrillic_mobile_v2.0_rec | 斯拉夫字母 | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | 梵文字母 | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
|
||||
更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
|
||||
更多支持语种请参考: [多语言模型](./multi_languages.md)
|
||||
|
||||
|
||||
<a name="文本方向分类模型"></a>
|
||||
|
|
|
@ -134,7 +134,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
|
|||
<a name="python_脚本运行"></a>
|
||||
### 2.2 python 脚本运行
|
||||
|
||||
ppocr 也支持在python脚本中运行,便于嵌入到您自己的代码中:
|
||||
ppocr 也支持在python脚本中运行,便于嵌入到您自己的代码中 :
|
||||
|
||||
* 整图预测(检测+识别)
|
||||
|
||||
|
@ -155,7 +155,7 @@ image = Image.open(img_path).convert('RGB')
|
|||
boxes = [line[0] for line in result]
|
||||
txts = [line[1][0] for line in result]
|
||||
scores = [line[1][1] for line in result]
|
||||
im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/korean.ttf')
|
||||
im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/fonts/korean.ttf')
|
||||
im_show = Image.fromarray(im_show)
|
||||
im_show.save('result.jpg')
|
||||
```
|
||||
|
@ -240,7 +240,7 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
|
|||
|德文|german|german|
|
||||
|日文|japan|japan|
|
||||
|韩文|korean|korean|
|
||||
|中文繁体|chinese traditional |ch_tra|
|
||||
|中文繁体|chinese traditional |chinese_cht|
|
||||
|意大利文| Italian |it|
|
||||
|西班牙文|Spanish |es|
|
||||
|葡萄牙文| Portuguese|pt|
|
||||
|
@ -259,10 +259,9 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
|
|||
|乌克兰文|Ukranian|uk|
|
||||
|白俄罗斯文|Belarusian|be|
|
||||
|泰卢固文|Telugu |te|
|
||||
|卡纳达文|Kannada |kn|
|
||||
|泰米尔文|Tamil |ta|
|
||||
|南非荷兰文 |Afrikaans |af|
|
||||
|阿塞拜疆文 |Azerbaijani |az|
|
||||
|阿塞拜疆文 |Azerbaijani |az|
|
||||
|波斯尼亚文|Bosnian|bs|
|
||||
|捷克文|Czech|cs|
|
||||
|威尔士文 |Welsh |cy|
|
||||
|
|
|
@ -111,7 +111,7 @@ python3 generate_multi_language_configs.py -l it \
|
|||
| cyrillic_mobile_v2.0_rec | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
|
||||
For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
|
||||
For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md)
|
||||
|
||||
|
||||
<a name="Angle"></a>
|
||||
|
|
|
@ -153,7 +153,7 @@ image = Image.open(img_path).convert('RGB')
|
|||
boxes = [line[0] for line in result]
|
||||
txts = [line[1][0] for line in result]
|
||||
scores = [line[1][1] for line in result]
|
||||
im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/korean.ttf')
|
||||
im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/fonts/korean.ttf')
|
||||
im_show = Image.fromarray(im_show)
|
||||
im_show.save('result.jpg')
|
||||
```
|
||||
|
@ -232,7 +232,7 @@ For functions such as data annotation, you can read the complete [Document Tutor
|
|||
|german|german|
|
||||
|japan|japan|
|
||||
|korean|korean|
|
||||
|chinese traditional |ch_tra|
|
||||
|chinese traditional |chinese_cht|
|
||||
| Italian |it|
|
||||
|Spanish |es|
|
||||
| Portuguese|pt|
|
||||
|
@ -251,7 +251,6 @@ For functions such as data annotation, you can read the complete [Document Tutor
|
|||
|Ukranian|uk|
|
||||
|Belarusian|be|
|
||||
|Telugu |te|
|
||||
|Kannada |kn|
|
||||
|Tamil |ta|
|
||||
|Afrikaans |af|
|
||||
|Azerbaijani |az|
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 107 KiB |
Binary file not shown.
After Width: | Height: | Size: 231 KiB |
14
paddleocr.py
14
paddleocr.py
|
@ -30,6 +30,7 @@ from ppocr.utils.logging import get_logger
|
|||
|
||||
logger = get_logger()
|
||||
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
|
||||
from tools.infer.utility import draw_ocr
|
||||
|
||||
__all__ = ['PaddleOCR']
|
||||
|
||||
|
@ -117,7 +118,7 @@ model_urls = {
|
|||
}
|
||||
|
||||
SUPPORT_DET_MODEL = ['DB']
|
||||
VERSION = 2.1
|
||||
VERSION = '2.1'
|
||||
SUPPORT_REC_MODEL = ['CRNN']
|
||||
BASE_DIR = os.path.expanduser("~/.paddleocr/")
|
||||
|
||||
|
@ -315,14 +316,13 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
|
||||
# init model dir
|
||||
if postprocess_params.det_model_dir is None:
|
||||
postprocess_params.det_model_dir = os.path.join(
|
||||
BASE_DIR, '{}/det/{}'.format(VERSION, det_lang))
|
||||
postprocess_params.det_model_dir = os.path.join(BASE_DIR, VERSION,
|
||||
'det', det_lang)
|
||||
if postprocess_params.rec_model_dir is None:
|
||||
postprocess_params.rec_model_dir = os.path.join(
|
||||
BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
|
||||
postprocess_params.rec_model_dir = os.path.join(BASE_DIR, VERSION,
|
||||
'rec', lang)
|
||||
if postprocess_params.cls_model_dir is None:
|
||||
postprocess_params.cls_model_dir = os.path.join(
|
||||
BASE_DIR, '{}/cls'.format(VERSION))
|
||||
postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
|
||||
print(postprocess_params)
|
||||
# download model
|
||||
maybe_download(postprocess_params.det_model_dir,
|
||||
|
|
|
@ -96,7 +96,7 @@ class BaseRecLabelEncode(object):
|
|||
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
|
||||
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
|
||||
'mr', 'ne'
|
||||
'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
|
|
@ -28,7 +28,7 @@ class BaseRecLabelDecode(object):
|
|||
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
|
||||
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
|
||||
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
|
||||
'ne', 'EN'
|
||||
'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
|
||||
]
|
||||
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
|
||||
support_character_type, character_type)
|
||||
|
|
Loading…
Reference in New Issue