ppocr支持多语言切换
This commit is contained in:
parent
79180223dc
commit
06430c9359
|
@ -236,7 +236,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls tru
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR, draw_ocr
|
from paddleocr import PaddleOCR, draw_ocr
|
||||||
# 模型路径下必须含有model和params文件
|
# 模型路径下必须含有model和params文件
|
||||||
ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
|
ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
|
||||||
img_path = 'PaddleOCR/doc/imgs/11.jpg'
|
img_path = 'PaddleOCR/doc/imgs/11.jpg'
|
||||||
result = ocr.ocr(img_path, cls=True)
|
result = ocr.ocr(img_path, cls=True)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -256,7 +256,7 @@ im_show.save('result.jpg')
|
||||||
### 通过命令行使用
|
### 通过命令行使用
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
||||||
```
|
```
|
||||||
|
|
||||||
## 参数说明
|
## 参数说明
|
||||||
|
@ -290,6 +290,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
|
||||||
| cls_batch_num | 进行分类时,同时前向的图片数 |30 |
|
| cls_batch_num | 进行分类时,同时前向的图片数 |30 |
|
||||||
| enable_mkldnn | 是否启用mkldnn | FALSE |
|
| enable_mkldnn | 是否启用mkldnn | FALSE |
|
||||||
| use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE |
|
| use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE |
|
||||||
|
| lang | 模型语言类型,目前支持 中文(ch)和英文(en) | ch |
|
||||||
| det | 前向时使用启动检测 | TRUE |
|
| det | 前向时使用启动检测 | TRUE |
|
||||||
| rec | 前向时是否启动识别 | TRUE |
|
| rec | 前向时是否启动识别 | TRUE |
|
||||||
| cls | 前向时是否启动分类 | FALSE |
|
| cls | 前向时是否启动分类 | FALSE |
|
||||||
|
|
|
@ -17,7 +17,7 @@ pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x is the version of padd
|
||||||
* detection classification and recognition
|
* detection classification and recognition
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR,draw_ocr
|
from paddleocr import PaddleOCR,draw_ocr
|
||||||
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
|
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
|
||||||
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
||||||
result = ocr.ocr(img_path, cls=True)
|
result = ocr.ocr(img_path, cls=True)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -51,7 +51,7 @@ Visualization of results
|
||||||
* detection and recognition
|
* detection and recognition
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR,draw_ocr
|
from paddleocr import PaddleOCR,draw_ocr
|
||||||
ocr = PaddleOCR() # need to run only once to download and load model into memory
|
ocr = PaddleOCR(lang='en') # need to run only once to download and load model into memory
|
||||||
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
||||||
result = ocr.ocr(img_path)
|
result = ocr.ocr(img_path)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -85,7 +85,7 @@ Visualization of results
|
||||||
* classification and recognition
|
* classification and recognition
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to load model into memory
|
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to load model into memory
|
||||||
img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
|
img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
|
||||||
result = ocr.ocr(img_path, det=False, cls=True)
|
result = ocr.ocr(img_path, det=False, cls=True)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -132,7 +132,7 @@ Visualization of results
|
||||||
* only recognition
|
* only recognition
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
ocr = PaddleOCR() # need to run only once to load model into memory
|
ocr = PaddleOCR(lang='en') # need to run only once to load model into memory
|
||||||
img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
|
img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
|
||||||
result = ocr.ocr(img_path, det=False, cls=False)
|
result = ocr.ocr(img_path, det=False, cls=False)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -168,7 +168,7 @@ paddleocr -h
|
||||||
|
|
||||||
* detection classification and recognition
|
* detection classification and recognition
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true
|
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true --lang en
|
||||||
```
|
```
|
||||||
|
|
||||||
Output will be a list, each item contains bounding box, text and recognition confidence
|
Output will be a list, each item contains bounding box, text and recognition confidence
|
||||||
|
@ -181,7 +181,7 @@ Output will be a list, each item contains bounding box, text and recognition con
|
||||||
|
|
||||||
* detection and recognition
|
* detection and recognition
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg
|
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en
|
||||||
```
|
```
|
||||||
|
|
||||||
Output will be a list, each item contains bounding box, text and recognition confidence
|
Output will be a list, each item contains bounding box, text and recognition confidence
|
||||||
|
@ -194,7 +194,7 @@ Output will be a list, each item contains bounding box, text and recognition con
|
||||||
|
|
||||||
* classification and recognition
|
* classification and recognition
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false
|
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --lang en
|
||||||
```
|
```
|
||||||
|
|
||||||
Output will be a list, each item contains text and recognition confidence
|
Output will be a list, each item contains text and recognition confidence
|
||||||
|
@ -217,7 +217,7 @@ Output will be a list, each item only contains bounding box
|
||||||
|
|
||||||
* only recognition
|
* only recognition
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false
|
paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false --lang en
|
||||||
```
|
```
|
||||||
|
|
||||||
Output will be a list, each item contains text and recognition confidence
|
Output will be a list, each item contains text and recognition confidence
|
||||||
|
@ -244,7 +244,7 @@ First, refer to the first section of [inference_en.md](./inference_en.md) to con
|
||||||
```python
|
```python
|
||||||
from paddleocr import PaddleOCR,draw_ocr
|
from paddleocr import PaddleOCR,draw_ocr
|
||||||
# The path of detection and recognition model must contain model and params files
|
# The path of detection and recognition model must contain model and params files
|
||||||
ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
|
ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
|
||||||
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
||||||
result = ocr.ocr(img_path, cls=True)
|
result = ocr.ocr(img_path, cls=True)
|
||||||
for line in result:
|
for line in result:
|
||||||
|
@ -264,7 +264,7 @@ im_show.save('result.jpg')
|
||||||
### Use by command line
|
### Use by command line
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
|
||||||
```
|
```
|
||||||
|
|
||||||
## Parameter Description
|
## Parameter Description
|
||||||
|
@ -298,6 +298,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
|
||||||
| cls_batch_num | When performing classification, the batchsize of forward images | 30 |
|
| cls_batch_num | When performing classification, the batchsize of forward images | 30 |
|
||||||
| enable_mkldnn | Whether to enable mkldnn | FALSE |
|
| enable_mkldnn | Whether to enable mkldnn | FALSE |
|
||||||
| use_zero_copy_run | Whether to forward by zero_copy_run | FALSE |
|
| use_zero_copy_run | Whether to forward by zero_copy_run | FALSE |
|
||||||
|
| lang | The support language, now only chinese(ch) and english(en) are supported | ch |
|
||||||
| det | Enable detction when `ppocr.ocr` func exec | TRUE |
|
| det | Enable detction when `ppocr.ocr` func exec | TRUE |
|
||||||
| rec | Enable recognition when `ppocr.ocr` func exec | TRUE |
|
| rec | Enable recognition when `ppocr.ocr` func exec | TRUE |
|
||||||
| cls | Enable classification when `ppocr.ocr` func exec | FALSE |
|
| cls | Enable classification when `ppocr.ocr` func exec | FALSE |
|
||||||
|
|
40
paddleocr.py
40
paddleocr.py
|
@ -33,11 +33,21 @@ from ppocr.utils.utility import check_and_read_gif, get_image_file_list
|
||||||
|
|
||||||
__all__ = ['PaddleOCR']
|
__all__ = ['PaddleOCR']
|
||||||
|
|
||||||
model_params = {
|
model_urls = {
|
||||||
'det':
|
'det':
|
||||||
'https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar',
|
'https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar',
|
||||||
'rec':
|
'rec': {
|
||||||
'https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar',
|
'ch': {
|
||||||
|
'url':
|
||||||
|
'https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar',
|
||||||
|
'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
|
||||||
|
},
|
||||||
|
'en': {
|
||||||
|
'url':
|
||||||
|
'https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar',
|
||||||
|
'dict_path': './ppocr/utils/ic15_dict.txt'
|
||||||
|
}
|
||||||
|
},
|
||||||
'cls':
|
'cls':
|
||||||
'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar'
|
'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar'
|
||||||
}
|
}
|
||||||
|
@ -123,10 +133,7 @@ def parse_args():
|
||||||
parser.add_argument("--rec_char_type", type=str, default='ch')
|
parser.add_argument("--rec_char_type", type=str, default='ch')
|
||||||
parser.add_argument("--rec_batch_num", type=int, default=30)
|
parser.add_argument("--rec_batch_num", type=int, default=30)
|
||||||
parser.add_argument("--max_text_length", type=int, default=25)
|
parser.add_argument("--max_text_length", type=int, default=25)
|
||||||
parser.add_argument(
|
parser.add_argument("--rec_char_dict_path", type=str, default=None)
|
||||||
"--rec_char_dict_path",
|
|
||||||
type=str,
|
|
||||||
default="./ppocr/utils/ppocr_keys_v1.txt")
|
|
||||||
parser.add_argument("--use_space_char", type=bool, default=True)
|
parser.add_argument("--use_space_char", type=bool, default=True)
|
||||||
|
|
||||||
# params for text classifier
|
# params for text classifier
|
||||||
|
@ -135,10 +142,12 @@ def parse_args():
|
||||||
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
|
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
|
||||||
parser.add_argument("--label_list", type=list, default=['0', '180'])
|
parser.add_argument("--label_list", type=list, default=['0', '180'])
|
||||||
parser.add_argument("--cls_batch_num", type=int, default=30)
|
parser.add_argument("--cls_batch_num", type=int, default=30)
|
||||||
|
parser.add_argument("--cls_thresh", type=float, default=0.9)
|
||||||
|
|
||||||
parser.add_argument("--enable_mkldnn", type=bool, default=False)
|
parser.add_argument("--enable_mkldnn", type=bool, default=False)
|
||||||
parser.add_argument("--use_zero_copy_run", type=bool, default=False)
|
parser.add_argument("--use_zero_copy_run", type=bool, default=False)
|
||||||
|
|
||||||
|
parser.add_argument("--lang", type=str, default='ch')
|
||||||
parser.add_argument("--det", type=str2bool, default=True)
|
parser.add_argument("--det", type=str2bool, default=True)
|
||||||
parser.add_argument("--rec", type=str2bool, default=True)
|
parser.add_argument("--rec", type=str2bool, default=True)
|
||||||
parser.add_argument("--cls", type=str2bool, default=False)
|
parser.add_argument("--cls", type=str2bool, default=False)
|
||||||
|
@ -155,21 +164,28 @@ class PaddleOCR(predict_system.TextSystem):
|
||||||
postprocess_params = parse_args()
|
postprocess_params = parse_args()
|
||||||
postprocess_params.__dict__.update(**kwargs)
|
postprocess_params.__dict__.update(**kwargs)
|
||||||
self.use_angle_cls = postprocess_params.use_angle_cls
|
self.use_angle_cls = postprocess_params.use_angle_cls
|
||||||
|
lang = postprocess_params.lang
|
||||||
|
assert lang in model_urls['rec'], 'param lang must in {}'.format(
|
||||||
|
model_urls['rec'].keys())
|
||||||
|
if postprocess_params.rec_char_dict_path is None:
|
||||||
|
postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
|
||||||
|
'dict_path']
|
||||||
|
|
||||||
# init model dir
|
# init model dir
|
||||||
if postprocess_params.det_model_dir is None:
|
if postprocess_params.det_model_dir is None:
|
||||||
postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
|
postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
|
||||||
if postprocess_params.rec_model_dir is None:
|
if postprocess_params.rec_model_dir is None:
|
||||||
postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec')
|
postprocess_params.rec_model_dir = os.path.join(
|
||||||
|
BASE_DIR, 'rec/{}'.format(lang))
|
||||||
if postprocess_params.cls_model_dir is None:
|
if postprocess_params.cls_model_dir is None:
|
||||||
postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
|
postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
|
||||||
print(postprocess_params)
|
print(postprocess_params)
|
||||||
# download model
|
# download model
|
||||||
maybe_download(postprocess_params.det_model_dir, model_params['det'])
|
maybe_download(postprocess_params.det_model_dir, model_urls['det'])
|
||||||
maybe_download(postprocess_params.rec_model_dir, model_params['rec'])
|
maybe_download(postprocess_params.rec_model_dir,
|
||||||
|
model_urls['rec'][lang]['url'])
|
||||||
if self.use_angle_cls:
|
if self.use_angle_cls:
|
||||||
maybe_download(postprocess_params.cls_model_dir,
|
maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
|
||||||
model_params['cls'])
|
|
||||||
|
|
||||||
if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
|
if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
|
||||||
logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
|
logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
|
||||||
|
|
Loading…
Reference in New Issue