From 24dfd8b2b2d688cfe6b44c9e4893a34a4840109a Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Sat, 19 Sep 2020 12:06:23 +0000 Subject: [PATCH] fix whl doc and add multi-language --- doc/doc_ch/whl.md | 4 +++- doc/doc_en/whl_en.md | 4 ++++ paddleocr.py | 25 +++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 657f9837..46796ce6 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -19,7 +19,9 @@ pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本 * 检测+分类+识别全流程 ```python from paddleocr import PaddleOCR, draw_ocr -ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory +# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换 +# 参数依次为`zh`, `en`, `french`, `german`, `korean`, `japan`。 +ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' result = ocr.ocr(img_path, cls=True) for line in result: diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index b62e5454..4049d9dc 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -17,12 +17,16 @@ pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x is the version of padd * detection classification and recognition ```python from paddleocr import PaddleOCR,draw_ocr +# Paddleocr supports Chinese, English, French, German, Korean and Japanese. +# You can set the parameter `lang` as `zh`, `en`, `french`, `german`, `korean`, `japan` +# to switch the language model in order. ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' result = ocr.ocr(img_path, cls=True) for line in result: print(line) + # draw result from PIL import Image image = Image.open(img_path).convert('RGB') diff --git a/paddleocr.py b/paddleocr.py index 55ca87ac..7e9b2402 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -46,6 +46,26 @@ model_urls = { 'url': 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar', 'dict_path': './ppocr/utils/ic15_dict.txt' + }, + 'french': { + 'url': + 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar', + 'dict_path': './ppocr/utils/french_dict.txt' + }, + 'german': { + 'url': + 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar', + 'dict_path': './ppocr/utils/german_dict.txt' + }, + 'korean': { + 'url': + 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar', + 'dict_path': './ppocr/utils/korean_dict.txt' + }, + 'japan': { + 'url': + 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar', + 'dict_path': './ppocr/utils/japan_dict.txt' } }, 'cls': @@ -165,8 +185,9 @@ class PaddleOCR(predict_system.TextSystem): postprocess_params.__dict__.update(**kwargs) self.use_angle_cls = postprocess_params.use_angle_cls lang = postprocess_params.lang - assert lang in model_urls['rec'], 'param lang must in {}'.format( - model_urls['rec'].keys()) + assert lang in model_urls[ + 'rec'], 'param lang must in {}, but got {}'.format( + model_urls['rec'].keys(), lang) if postprocess_params.rec_char_dict_path is None: postprocess_params.rec_char_dict_path = model_urls['rec'][lang][ 'dict_path']