From bf60cd827ba5007b82deaad5805bc276f4803c44 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Wed, 16 Sep 2020 20:00:34 +0800 Subject: [PATCH] =?UTF-8?q?whl=E5=8C=85=E6=B7=BB=E5=8A=A0=E5=88=86?= =?UTF-8?q?=E7=B1=BB=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/doc_ch/whl.md | 123 +++++++++++++++++++++++++++++++++++++++---- doc/doc_en/whl_en.md | 120 ++++++++++++++++++++++++++++++++++++++--- paddleocr.py | 36 +++++++++++-- setup.py | 2 +- 4 files changed, 257 insertions(+), 24 deletions(-) diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 280cc2f6..61ad26a9 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -12,11 +12,44 @@ pip install paddleocr 本地构建并安装 ```bash python setup.py bdist_wheel -pip install dist/paddleocr-0.0.3-py3-none-any.whl +pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本号 ``` ### 1. 代码使用 -* 检测+识别全流程 +* 检测+分类+识别全流程 +```python +from paddleocr import PaddleOCR, draw_ocr +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs/11.jpg' +result = ocr.ocr(img_path, cls=True) +for line in result: + print(line) + +# 显示结果 +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +...... +``` +结果可视化 + +
+ +
+ + +* 检测+识别 ```python from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR() # need to run only once to download and load model into memory @@ -48,12 +81,27 @@ im_show.save('result.jpg') + +* 分类+识别 +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' +result = ocr.ocr(img_path, det=False, cls=True) +for line in result: + print(line) +``` +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + * 单独执行检测 ```python from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' -result = ocr.ocr(img_path,rec=False) +result = ocr.ocr(img_path, rec=False) for line in result: print(line) @@ -84,7 +132,7 @@ im_show.save('result.jpg') from paddleocr import PaddleOCR ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' -result = ocr.ocr(img_path,det=False) +result = ocr.ocr(img_path, det=False) for line in result: print(line) ``` @@ -93,6 +141,20 @@ for line in result: ['韩国小馆', 0.9907421] ``` +* 单独执行分类 +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg' +result = ocr.ocr(img_path, det=False, rec=False, cls=True) +for line in result: + print(line) +``` +结果是一个list,每个item只包含分类结果和分类置信度 +```bash +['0', 0.9999924] +``` + ### 通过命令行使用 查看帮助信息 @@ -100,7 +162,19 @@ for line in result: paddleocr -h ``` -* 检测+识别全流程 +* 检测+分类+识别全流程 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --cls true +``` +结果是一个list,每个item包含了文本框,文字和识别置信度 +```bash +[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]] +[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]] +[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]] +...... +``` + +* 检测+识别 ```bash paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg ``` @@ -112,6 +186,16 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg ...... ``` +* 分类+识别 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false +``` + +结果是一个list,每个item只包含识别结果和识别置信度 +```bash +['韩国小馆', 0.9907421] +``` + * 单独执行检测 ```bash paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false @@ -134,17 +218,27 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false ['韩国小馆', 0.9907421] ``` +* 单独执行分类 +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false --rec false +``` + +结果是一个list,每个item只包含分类结果和分类置信度 +```bash +['0', 0.9999924] +``` + ## 自定义模型 当内置模型无法满足需求时,需要使用到自己训练的模型。 -首先,参照[inference.md](./inference.md) 第一节转换将检测和识别模型转换为inference模型,然后按照如下方式使用 +首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用 ### 代码使用 ```python from paddleocr import PaddleOCR, draw_ocr -# 检测模型和识别模型路径下必须含有model和params文件 -ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}') +# 模型路径下必须含有model和params文件 +ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True) img_path = 'PaddleOCR/doc/imgs/11.jpg' -result = ocr.ocr(img_path) +result = ocr.ocr(img_path, cls=True) for line in result: print(line) @@ -162,7 +256,7 @@ im_show.save('result.jpg') ### 通过命令行使用 ```bash -paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true ``` ## 参数说明 @@ -182,13 +276,20 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_ | det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 | | det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 | | rec_algorithm | 使用的识别算法类型 | CRNN | -| rec_model_dir | 识别模型所在文件夹。传承那方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | +| rec_model_dir | 识别模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | | rec_image_shape | 识别算法的输入图片尺寸 | "3,32,320" | | rec_char_type | 识别算法的字符类型,中文(ch)或英文(en) | ch | | rec_batch_num | 进行识别时,同时前向的图片数 | 30 | | max_text_length | 识别算法能识别的最大文字长度 | 25 | | rec_char_dict_path | 识别模型字典路径,当rec_model_dir使用方式2传参时需要修改为自己的字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | | use_space_char | 是否识别空格 | TRUE | +| use_angle_cls | 是否加载分类模型 | FALSE | +| cls_model_dir | 分类模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/cls`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | +| cls_image_shape | 分类算法的输入图片尺寸 | "3, 48, 192" | +| label_list | 分类算法的标签列表 | ['0', '180'] | +| cls_batch_num | 进行分类时,同时前向的图片数 |30 | | enable_mkldnn | 是否启用mkldnn | FALSE | +| use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE | | det | 前向时使用启动检测 | TRUE | | rec | 前向时是否启动识别 | TRUE | +| cls | 前向时是否启动分类 | FALSE | diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 73ab78c1..49a97653 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -10,10 +10,44 @@ pip install paddleocr build own whl package and install ```bash python setup.py bdist_wheel -pip install dist/paddleocr-0.0.3-py3-none-any.whl +pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x is the version of paddleocr ``` ### 1. Use by code +* detection classification and recognition +```python +from paddleocr import PaddleOCR,draw_ocr +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory +img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' +result = ocr.ocr(img_path, cls=True) +for line in result: + print(line) + +# draw result +from PIL import Image +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +...... +``` + +Visualization of results + +
+ +
+ * detection and recognition ```python from paddleocr import PaddleOCR,draw_ocr @@ -48,6 +82,21 @@ Visualization of results +* classification and recognition +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png' +result = ocr.ocr(img_path, det=False, cls=True) +for line in result: + print(line) +``` + +Output will be a list, each item contains recognition text and confidence +```bash +['PAIN', 0.990372] +``` + * only detection ```python from paddleocr import PaddleOCR,draw_ocr @@ -85,16 +134,31 @@ Visualization of results from paddleocr import PaddleOCR ocr = PaddleOCR() # need to run only once to load model into memory img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png' -result = ocr.ocr(img_path,det=False) +result = ocr.ocr(img_path, det=False, cls=False) for line in result: print(line) ``` -Output will be a list, each item contains text and recognition confidence +Output will be a list, each item contains recognition text and confidence ```bash ['PAIN', 0.990372] ``` +* only classification +```python +from paddleocr import PaddleOCR +ocr = PaddleOCR(use_angle_cls=True) # need to run only once to load model into memory +img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png' +result = ocr.ocr(img_path, det=False, rec=False, cls=True) +for line in result: + print(line) +``` + +Output will be a list, each item contains classification result and confidence +```bash +['0', 0.99999964] +``` + ### Use by command line show help information @@ -102,6 +166,19 @@ show help information paddleocr -h ``` +* detection classification and recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true +``` + +Output will be a list, each item contains bounding box, text and recognition confidence +```bash +[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]] +[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]] +[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]] +...... +``` + * detection and recognition ```bash paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg @@ -115,6 +192,16 @@ Output will be a list, each item contains bounding box, text and recognition con ...... ``` +* classification and recognition +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false +``` + +Output will be a list, each item contains text and recognition confidence +```bash +['PAIN', 0.990372] +``` + * only detection ```bash paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false @@ -130,7 +217,7 @@ Output will be a list, each item only contains bounding box * only recognition ```bash -paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false ``` Output will be a list, each item contains text and recognition confidence @@ -138,6 +225,16 @@ Output will be a list, each item contains text and recognition confidence ['PAIN', 0.990372] ``` +* only classification +```bash +paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --rec false +``` + +Output will be a list, each item contains classification result and confidence +```bash +['0', 0.99999964] +``` + ## Use custom model When the built-in model cannot meet the needs, you need to use your own trained model. First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows @@ -147,9 +244,9 @@ First, refer to the first section of [inference_en.md](./inference_en.md) to con ```python from paddleocr import PaddleOCR,draw_ocr # The path of detection and recognition model must contain model and params files -ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}å') +ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True) img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' -result = ocr.ocr(img_path) +result = ocr.ocr(img_path, cls=True) for line in result: print(line) @@ -167,7 +264,7 @@ im_show.save('result.jpg') ### Use by command line ```bash -paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} +paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true ``` ## Parameter Description @@ -194,6 +291,13 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_ | max_text_length | The maximum text length that the recognition algorithm can recognize | 25 | | rec_char_dict_path | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2 | ./ppocr/utils/ppocr_keys_v1.txt | | use_space_char | Whether to recognize spaces | TRUE | +| use_angle_cls | Whether to load classification model | FALSE | +| cls_model_dir | the classification inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/cls`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None | +| cls_image_shape | image shape of classification algorithm | "3,48,192" | +| label_list | label list of classification algorithm | ['0','180'] | +| cls_batch_num | When performing classification, the batchsize of forward images | 30 | | enable_mkldnn | Whether to enable mkldnn | FALSE | +| use_zero_copy_run | Whether to forward by zero_copy_run | FALSE | | det | Enable detction when `ppocr.ocr` func exec | TRUE | -| rec | Enable detction when `ppocr.ocr` func exec | TRUE | +| rec | Enable recognition when `ppocr.ocr` func exec | TRUE | +| cls | Enable classification when `ppocr.ocr` func exec | FALSE | diff --git a/paddleocr.py b/paddleocr.py index d3d73cb1..381723a1 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -37,6 +37,8 @@ model_params = { 'det': 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar', 'rec': 'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar', + 'cls': + 'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile-v1.1.cls_infer.tar' } SUPPORT_DET_MODEL = ['DB'] @@ -125,11 +127,20 @@ def parse_args(): type=str, default="./ppocr/utils/ppocr_keys_v1.txt") parser.add_argument("--use_space_char", type=bool, default=True) + + # params for text classifier + parser.add_argument("--use_angle_cls", type=str2bool, default=False) + parser.add_argument("--cls_model_dir", type=str, default=None) + parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") + parser.add_argument("--label_list", type=list, default=['0', '180']) + parser.add_argument("--cls_batch_num", type=int, default=30) + parser.add_argument("--enable_mkldnn", type=bool, default=False) + parser.add_argument("--use_zero_copy_run", type=bool, default=False) parser.add_argument("--det", type=str2bool, default=True) parser.add_argument("--rec", type=str2bool, default=True) - parser.add_argument("--use_zero_copy_run", type=bool, default=False) + parser.add_argument("--cls", type=str2bool, default=False) return parser.parse_args() @@ -142,16 +153,22 @@ class PaddleOCR(predict_system.TextSystem): """ postprocess_params = parse_args() postprocess_params.__dict__.update(**kwargs) + self.use_angle_cls = postprocess_params.use_angle_cls # init model dir if postprocess_params.det_model_dir is None: postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det') if postprocess_params.rec_model_dir is None: postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec') + if postprocess_params.cls_model_dir is None: + postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls') print(postprocess_params) # download model maybe_download(postprocess_params.det_model_dir, model_params['det']) maybe_download(postprocess_params.rec_model_dir, model_params['rec']) + if self.use_angle_cls: + maybe_download(postprocess_params.cls_model_dir, + model_params['cls']) if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL: logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL)) @@ -166,7 +183,7 @@ class PaddleOCR(predict_system.TextSystem): # init det_model and rec_model super().__init__(postprocess_params) - def ocr(self, img, det=True, rec=True): + def ocr(self, img, det=True, rec=True, cls=False): """ ocr with paddleocr args: @@ -175,6 +192,10 @@ class PaddleOCR(predict_system.TextSystem): rec: use text recognition or not, if false, only det will be exec. default is True """ assert isinstance(img, (np.ndarray, list, str)) + if cls and not self.use_angle_cls: + print('cls should be false when use_angle_cls is false') + exit(-1) + self.use_angle_cls = cls if isinstance(img, str): image_file = img img, flag = check_and_read_gif(image_file) @@ -194,6 +215,10 @@ class PaddleOCR(predict_system.TextSystem): else: if not isinstance(img, list): img = [img] + if self.use_angle_cls: + img, cls_res, elapse = self.text_classifier(img) + if not rec: + return cls_res rec_res, elapse = self.text_recognizer(img) return rec_res @@ -208,6 +233,9 @@ def main(): ocr_engine = PaddleOCR() for img_path in image_file_list: print(img_path) - result = ocr_engine.ocr(img_path, det=args.det, rec=args.rec) + result = ocr_engine.ocr(img_path, + det=args.det, + rec=args.rec, + cls=args.cls) for line in result: - print(line) \ No newline at end of file + print(line) diff --git a/setup.py b/setup.py index 7141f170..2cea853d 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ setup( package_dir={'paddleocr': ''}, include_package_data=True, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, - version='0.0.3', + version='1.0.0', install_requires=requirements, license='Apache License 2.0', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',