From fc85051d6472469d1a301b43aafcb767da7258f4 Mon Sep 17 00:00:00 2001
From: littletomatodonkey <2120160898@bit.edu.cn>
Date: Thu, 17 Dec 2020 15:28:19 +0800
Subject: [PATCH 01/60] add support for cpu infer (#1480)
* add support for cpu infer
* fix readme
---
StyleText/README.md | 11 ++++++-----
StyleText/README_ch.md | 5 +++--
StyleText/engine/predictors.py | 1 +
StyleText/tools/synth_dataset.py | 8 ++++++++
StyleText/tools/synth_image.py | 6 +++---
5 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/StyleText/README.md b/StyleText/README.md
index 648b1267..632f7b6d 100644
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -69,12 +69,13 @@ fusion_generator:
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
```python
-python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
+python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
-* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
+* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
+* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
For example, enter the following image and corpus `PaddleOCR`.
@@ -122,7 +123,7 @@ In actual application scenarios, it is often necessary to synthesize pictures in
* `corpus_file`: Filepath of the corpus. Corpus file should be a text file which will be split by line-endings('\n'). Corpus generator samples one line each time.
-Example of corpus file:
+Example of corpus file:
```
PaddleOCR
飞桨文字识别
@@ -139,9 +140,9 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task:
``` bash
- python -m tools.synth_dataset.py -c configs/dataset_config.yml
+ python3 tools/synth_dataset.py -c configs/dataset_config.yml
```
-We also provide example corpus and images in `examples` folder.
+We also provide example corpus and images in `examples` folder.

diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md
index 0dd5822b..5b8a3ee0 100644
--- a/StyleText/README_ch.md
+++ b/StyleText/README_ch.md
@@ -61,11 +61,12 @@ fusion_generator:
输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下:
```python
-python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
+python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
如果输入图像尺寸相差过多,效果可能不佳。
+* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
例如,输入如下图片和语料"PaddleOCR":
@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据:
``` bash
- python -m tools.synth_dataset -c configs/dataset_config.yml
+ python tools/synth_dataset.py -c configs/dataset_config.yml
```
我们在examples目录下提供了样例图片和语料。
diff --git a/StyleText/engine/predictors.py b/StyleText/engine/predictors.py
index d9f4afe4..a1ba21f1 100644
--- a/StyleText/engine/predictors.py
+++ b/StyleText/engine/predictors.py
@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu)
+ paddle.set_device('gpu' if use_gpu else 'cpu')
self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"]
diff --git a/StyleText/tools/synth_dataset.py b/StyleText/tools/synth_dataset.py
index 4a0e6d5e..a75f7f39 100644
--- a/StyleText/tools/synth_dataset.py
+++ b/StyleText/tools/synth_dataset.py
@@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
from engine.synthesisers import DatasetSynthesiser
diff --git a/StyleText/tools/synth_image.py b/StyleText/tools/synth_image.py
index 7b4827b8..cbc31186 100644
--- a/StyleText/tools/synth_image.py
+++ b/StyleText/tools/synth_image.py
@@ -16,13 +16,13 @@ import cv2
import sys
import glob
-from utils.config import ArgsParser
-from engine.synthesisers import ImageSynthesiser
-
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+from utils.config import ArgsParser
+from engine.synthesisers import ImageSynthesiser
+
def synth_image():
args = ArgsParser().parse_args()
From fd92294bfa1056bd53339f3ccd6bf2be3c9e936d Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Thu, 17 Dec 2020 20:27:43 +0800
Subject: [PATCH 02/60] fludd 2 paddle
---
deploy/cpp_infer/readme_en.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md
index 30a6912e..8bd76c04 100644
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -107,10 +107,10 @@ make inference_lib_dist
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
-* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`.
+* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
```
-build/fluid_inference_install_dir/
+build/paddle_inference_install_dir/
|-- CMakeCache.txt
|-- paddle
|-- third_party
From 65b4cc0cf336efb1e6d1ed9ad2f91bd3fbbd1866 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Thu, 17 Dec 2020 20:58:23 +0800
Subject: [PATCH 03/60] adjust rec_batch_num as 1
---
tools/infer/utility.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 4b06b60b..1b5e6b52 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -62,7 +62,7 @@ def parse_args():
parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
- parser.add_argument("--rec_batch_num", type=int, default=6)
+ parser.add_argument("--rec_batch_num", type=int, default=1)
parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument(
"--rec_char_dict_path",
From 30e2bd4f34dd4b566ac4ad3831e33b3c1fcecd53 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 10:04:50 +0800
Subject: [PATCH 04/60] =?UTF-8?q?=E9=80=82=E9=85=8Dpy3.5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
paddleocr.py | 4 ++--
setup.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/paddleocr.py b/paddleocr.py
index 1d8cd254..3c3c47ab 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
sys.exit(0)
- postprocess_params.rec_char_dict_path = Path(
- __file__).parent / postprocess_params.rec_char_dict_path
+ postprocess_params.rec_char_dict_path = str(
+ Path(__file__).parent / postprocess_params.rec_char_dict_path)
# init det_model and rec_model
super().__init__(postprocess_params)
diff --git a/setup.py b/setup.py
index f92074be..58f6de48 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''},
include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
- version='2.0.1',
+ version='2.0.2',
install_requires=requirements,
license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
From a6fd8f80664c458b11e8acea3cd120d30c8834fc Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Fri, 18 Dec 2020 11:11:02 +0800
Subject: [PATCH 05/60] adjust cls_batch_num to 6 for speed-up
---
tools/infer/utility.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 1b5e6b52..162f6cb7 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -78,7 +78,7 @@ def parse_args():
parser.add_argument("--cls_model_dir", type=str)
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
parser.add_argument("--label_list", type=list, default=['0', '180'])
- parser.add_argument("--cls_batch_num", type=int, default=30)
+ parser.add_argument("--cls_batch_num", type=int, default=6)
parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
From d489d3c27dbd592a7ad761da6b7e7ee868061499 Mon Sep 17 00:00:00 2001
From: MissPenguin
Date: Fri, 18 Dec 2020 03:40:22 +0000
Subject: [PATCH 06/60] fix sast inference bug
---
tools/infer/predict_det.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index ba0adaee..820d3413 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -70,6 +70,9 @@ class TextDetector(object):
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST":
+ pre_process_list[0] = {
+ 'DetResizeForTest': {'resize_long': args.det_limit_side_len}
+ }
postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
From ec37732512e238598c128eef11c9f2f012ce5915 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Fri, 18 Dec 2020 14:45:06 +0800
Subject: [PATCH 07/60] fix db process
---
deploy/cpp_infer/src/preprocess_op.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp
index 494b774a..cdb20c31 100644
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32;
else
- resize_h = (resize_h / 32 - 1) * 32;
+ resize_h = (resize_h / 32) * 32;
if (resize_w % 32 == 0)
resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32;
else
- resize_w = (resize_w / 32 - 1) * 32;
+ resize_w = (resize_w / 32) * 32;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
From 9039cca26d3fcf35fa432e39b299da50df6342b3 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Fri, 18 Dec 2020 15:27:44 +0800
Subject: [PATCH 08/60] add tensorrt args
---
tools/infer/predict_det.py | 1 +
tools/infer/utility.py | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index ba0adaee..1720369d 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -35,6 +35,7 @@ logger = get_logger()
class TextDetector(object):
def __init__(self, args):
+ self.args = args
self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 162f6cb7..b793254d 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -33,6 +33,8 @@ def parse_args():
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+ parser.add_argument("--use_fp16", type=str2bool, default=False)
+ parser.add_argument("--max_batch_size", type=int, default=10)
parser.add_argument("--gpu_mem", type=int, default=8000)
# params for text detector
@@ -46,7 +48,7 @@ def parse_args():
parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
-
+ parser.add_argument("--max_batch_size", type=int, default=10)
# EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
@@ -113,6 +115,11 @@ def create_predictor(args, mode, logger):
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
+ if args.use_tensorrt:
+ config.enable_tensorrt_engine(
+ precision_mode=AnalysisConfig.Precision.Half
+ if args.use_fp16 else AnalysisConfig.Precision.Float32,
+ max_batch_size=args.max_batch_size)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(6)
From e3a2f818fa95563df7b81baf8eacafb5f2da0460 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 17:46:48 +0800
Subject: [PATCH 09/60] fix typo error
---
deploy/cpp_infer/tools/config.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt
index 95d7989b..f1ab0b11 100644
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -11,7 +11,7 @@ max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
-det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/
+det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config
use_angle_cls 0
From 7f76986c288d7a340c21012a32faf72f80053e96 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 18:29:13 +0800
Subject: [PATCH 10/60] Remove duplicate definitions
---
tools/infer/utility.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index b793254d..c3d294e6 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -34,7 +34,6 @@ def parse_args():
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False)
- parser.add_argument("--max_batch_size", type=int, default=10)
parser.add_argument("--gpu_mem", type=int, default=8000)
# params for text detector
From ae124590150341ca11a833f5f74f175c7fb7799a Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 18:51:19 +0800
Subject: [PATCH 11/60] Save configuration files and logs only during training
---
tools/program.py | 22 ++++++++++++----------
tools/train.py | 2 +-
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/tools/program.py b/tools/program.py
index 787a59d4..4331f9d4 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
return metirc
-def preprocess():
+def preprocess(is_train=False):
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
@@ -350,15 +350,17 @@ def preprocess():
device = paddle.set_device(device)
config['Global']['distributed'] = dist.get_world_size() != 1
-
- # save_config
- save_model_dir = config['Global']['save_model_dir']
- os.makedirs(save_model_dir, exist_ok=True)
- with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
- yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
-
- logger = get_logger(
- name='root', log_file='{}/train.log'.format(save_model_dir))
+ if is_train:
+ # save_config
+ save_model_dir = config['Global']['save_model_dir']
+ os.makedirs(save_model_dir, exist_ok=True)
+ with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
+ yaml.dump(
+ dict(config), f, default_flow_style=False, sort_keys=False)
+ log_file = '{}/train.log'.format(save_model_dir)
+ else:
+ log_file = None
+ logger = get_logger(name='root', log_file=log_file)
if config['Global']['use_visualdl']:
from visualdl import LogWriter
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
diff --git a/tools/train.py b/tools/train.py
index 6e44c598..383f8d83 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -110,6 +110,6 @@ def test_reader(config, device, logger):
if __name__ == '__main__':
- config, device, logger, vdl_writer = program.preprocess()
+ config, device, logger, vdl_writer = program.preprocess(is_train=True)
main(config, device, logger, vdl_writer)
# test_reader(config, device, logger)
From 73d4b41ab03171c0b2782edc3a1e0bf029e45e4a Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 22:36:04 +0800
Subject: [PATCH 12/60] update inference doc
---
doc/doc_ch/inference.md | 11 ++++++++---
doc/doc_en/inference_en.md | 16 ++++++++++++----
2 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md
index 09303e93..c69c127a 100755
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
超轻量中文识别模型推理,可以执行如下命令:
```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
+# 下载超轻量中文识别模型:
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
+tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
```

@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
```
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
-
```
CRNN 文本识别模型推理,可以执行如下命令:
@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
方向分类模型推理,可以执行如下命令:
```
-python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
+# 下载超轻量中文方向分类器模型:
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```

diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md
index 3fcd36c0..8742b7ce 100755
--- a/doc/doc_en/inference_en.md
+++ b/doc/doc_en/inference_en.md
@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
For lightweight Chinese recognition model inference, you can execute the following commands:
```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
+# download CRNN text recognition inference model
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
+tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
```
-
+
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
```bash
-Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
+Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
```
@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
```
-
+```
+# download text angle class inference model:
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
+```

After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
From b615f6704d9b7e7e58c90fc246ec002d7485adac Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 18 Dec 2020 23:00:16 +0800
Subject: [PATCH 13/60] load model by pretrained_model
---
doc/doc_ch/angle_class.md | 2 +-
doc/doc_ch/detection.md | 6 +++---
doc/doc_ch/recognition.md | 5 ++---
doc/doc_en/angle_class_en.md | 2 +-
doc/doc_en/detection_en.md | 6 +++---
doc/doc_en/recognition_en.md | 8 ++++----
6 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/doc/doc_ch/angle_class.md b/doc/doc_ch/angle_class.md
index 3f2027b9..846be15f 100644
--- a/doc/doc_ch/angle_class.md
+++ b/doc/doc_ch/angle_class.md
@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
```
# 预测分类结果
-python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:
diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md
index 08b94a9c..8f0f6979 100644
--- a/doc/doc_ch/detection.md
+++ b/doc/doc_ch/detection.md
@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
测试单张图像的检测效果
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
测试DB模型时,调整后处理阈值,
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
```
测试文件夹下所有图像的检测效果
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md
index dc06365c..b473f3ac 100644
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
-*注意* 评估时必须确保配置文件中 infer_img 字段为空
```
# GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
```
# 预测英文结果
-python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
+python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
```
预测图片:
@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
```
# 预测中文结果
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:
diff --git a/doc/doc_en/angle_class_en.md b/doc/doc_en/angle_class_en.md
index 4c479e7b..e6157d16 100644
--- a/doc/doc_en/angle_class_en.md
+++ b/doc/doc_en/angle_class_en.md
@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
```
# Predict English results
-python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
```
Input image:
diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md
index 7638315a..5c4a63e2 100644
--- a/doc/doc_en/detection_en.md
+++ b/doc/doc_en/detection_en.md
@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
Test the detection result on a single image:
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
When testing the DB model, adjust the post-processing threshold:
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
```
Test the detection result on all images in the folder:
```shell
-python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
+python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md
index bc8faa0f..7723d20b 100644
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -317,11 +317,11 @@ Eval:
### EVALUATION
-The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader.
+The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
```
# GPU evaluation, Global.checkpoints is the weight to be tested
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
```
@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
```
# Predict English results
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
```
Input image:
@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
```
# Predict Chinese results
-python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
Input image:
From fd3ddebbbfd4c6c8557bf4e9c1fd4d3b23a61aad Mon Sep 17 00:00:00 2001
From: dyning
Date: Sat, 19 Dec 2020 20:02:39 +0800
Subject: [PATCH 14/60] Update README_ch.md
---
StyleText/README_ch.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md
index 5b8a3ee0..68dc5a21 100644
--- a/StyleText/README_ch.md
+++ b/StyleText/README_ch.md
@@ -21,7 +21,7 @@
-Style-Text数据合成工具是基于百度自研的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
+Style-Text数据合成工具是基于文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
不同于常用的基于GAN的数据合成工具,Style-Text主要框架包括:1.文本前景风格迁移模块 2.背景抽取模块 3.融合模块。经过这样三步,就可以迅速实现图像文本风格迁移。下图是一些该数据合成工具效果图。
From fa1d09905510e67bd15799ea225519610faedfba Mon Sep 17 00:00:00 2001
From: dyning
Date: Sat, 19 Dec 2020 20:02:58 +0800
Subject: [PATCH 15/60] Update README.md
---
StyleText/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/StyleText/README.md b/StyleText/README.md
index 60a9ee99..d23cf5db 100644
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -22,7 +22,7 @@ English | [简体中文](README_ch.md)
-The Style-Text data synthesis tool is a tool based on Baidu's self-developed text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
+The Style-Text data synthesis tool is a tool based on text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
Different from the commonly used GAN-based data synthesis tools, the main framework of Style-Text includes:
* (1) Text foreground style transfer module.
From 20a7688d23bf4c32f7c0303e56adef5f31b99370 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sat, 19 Dec 2020 20:07:06 +0800
Subject: [PATCH 16/60] Update README_ch.md
---
StyleText/README_ch.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md
index 68dc5a21..ca7f7122 100644
--- a/StyleText/README_ch.md
+++ b/StyleText/README_ch.md
@@ -21,7 +21,7 @@
-Style-Text数据合成工具是基于文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
+Style-Text数据合成工具是基于百度和华科合作研发的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
不同于常用的基于GAN的数据合成工具,Style-Text主要框架包括:1.文本前景风格迁移模块 2.背景抽取模块 3.融合模块。经过这样三步,就可以迅速实现图像文本风格迁移。下图是一些该数据合成工具效果图。
From 0bc91f49470b0dcae363b140955f119ea47abc47 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sat, 19 Dec 2020 20:10:54 +0800
Subject: [PATCH 17/60] Update README.md
---
StyleText/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/StyleText/README.md b/StyleText/README.md
index d23cf5db..a015bafb 100644
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -22,7 +22,7 @@ English | [简体中文](README_ch.md)
-The Style-Text data synthesis tool is a tool based on text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
+The Style-Text data synthesis tool is a tool based on Baidu and HUST cooperation research work, text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
Different from the commonly used GAN-based data synthesis tools, the main framework of Style-Text includes:
* (1) Text foreground style transfer module.
From 9d1bc3ebb070282d691d1730ed01a122aedd34d1 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sat, 19 Dec 2020 20:11:21 +0800
Subject: [PATCH 18/60] Update README.md
---
StyleText/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/StyleText/README.md b/StyleText/README.md
index a015bafb..697f439c 100644
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -22,7 +22,7 @@ English | [简体中文](README_ch.md)
-The Style-Text data synthesis tool is a tool based on Baidu and HUST cooperation research work, text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
+The Style-Text data synthesis tool is a tool based on Baidu and HUST cooperation research work, "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
Different from the commonly used GAN-based data synthesis tools, the main framework of Style-Text includes:
* (1) Text foreground style transfer module.
From 87eca8ff9c77804ac40dd2c7354edbbba51163d1 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:20:31 +0800
Subject: [PATCH 19/60] Update README_ch.md
---
README_ch.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README_ch.md b/README_ch.md
index afdd0536..b1cb778b 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -115,7 +115,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
-PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身,最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器和PACT量化的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
+PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测[2]、检测框矫正和CRNN文本识别三部分组成[7]。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身,最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器[8]和PACT量化[9]的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
## 效果展示 [more](./doc/doc_ch/visualization.md)
From 75a281f4dbe98a325a2c60b96a8305a5362f60d7 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:20:45 +0800
Subject: [PATCH 20/60] Update README.md
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 3f6737f8..9bb055cd 100644
--- a/README.md
+++ b/README.md
@@ -122,8 +122,7 @@ For a new language request, please refer to [Guideline for new language_requests
-PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection, detection frame correction and CRNN text recognition. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner and PACT quantization is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
-
+PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection[2], detection frame correction and CRNN text recognition[7]. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner [8] and PACT quantization [9] is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
## Visualization [more](./doc/doc_en/visualization_en.md)
From 623843881bad6534edd57cf64b40b9064baca420 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:25:58 +0800
Subject: [PATCH 22/60] Update algorithm_overview_en.md
---
doc/doc_en/algorithm_overview_en.md | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 7f1afd02..b0ef5022 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -11,9 +11,9 @@ This tutorial lists the text detection algorithms and text recognition algorithm
### 1. Text Detection Algorithm
PaddleOCR open source text detection algorithms list:
-- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
-- [x] DB([paper](https://arxiv.org/abs/1911.08947))
-- [x] SAST([paper](https://arxiv.org/abs/1908.05498) )(Baidu Self-Research)
+- [x] EAST[2]([paper](https://arxiv.org/abs/1704.03155))
+- [x] DB[1]([paper](https://arxiv.org/abs/1911.08947))
+- [x] SAST[4]([paper](https://arxiv.org/abs/1908.05498) )(Baidu Self-Research)
On the ICDAR2015 dataset, the text detection result is as follows:
@@ -39,11 +39,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
### 2. Text Recognition Algorithm
PaddleOCR open-source text recognition algorithms list:
-- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))
-- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
-- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
-- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
-- [ ] SRN([paper](https://arxiv.org/abs/2003.12294) )(Baidu Self-Research) coming soon
+- [x] CRNN[7]([paper](https://arxiv.org/abs/1507.05717))
+- [x] Rosetta[10]([paper](https://arxiv.org/abs/1910.05085))
+- [ ] STAR-Net[11]([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
+- [ ] RARE[12]([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
+- [ ] SRN[5]([paper](https://arxiv.org/abs/2003.12294) )(Baidu Self-Research) coming soon
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
From fc84375dc62e3b7ec68b4e605ee7367861a021a0 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:26:02 +0800
Subject: [PATCH 23/60] Update algorithm_overview.md
---
doc/doc_ch/algorithm_overview.md | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index a23bfcb1..a7d73042 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -9,9 +9,9 @@
### 1.文本检测算法
PaddleOCR开源的文本检测算法列表:
-- [x] DB([paper]( https://arxiv.org/abs/1911.08947) )(ppocr推荐)
-- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
-- [x] SAST([paper](https://arxiv.org/abs/1908.05498))
+- [x] DB[2]([paper]( https://arxiv.org/abs/1911.08947) )(ppocr推荐)
+- [x] EAST[1]([paper](https://arxiv.org/abs/1704.03155))
+- [x] SAST[4]([paper](https://arxiv.org/abs/1908.05498))
在ICDAR2015文本检测公开数据集上,算法效果如下:
@@ -38,13 +38,13 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
### 2.文本识别算法
PaddleOCR基于动态图开源的文本识别算法列表:
-- [x] CRNN([paper](https://arxiv.org/abs/1507.05717) )(ppocr推荐)
-- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
-- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
-- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
-- [ ] SRN([paper](https://arxiv.org/abs/2003.12294)) coming soon
+- [x] CRNN[7]([paper](https://arxiv.org/abs/1507.05717) )(ppocr推荐)
+- [x] Rosetta[10]([paper](https://arxiv.org/abs/1910.05085))
+- [ ] STAR-Net[11]([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
+- [ ] RARE[12]([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
+- [ ] SRN[5]([paper](https://arxiv.org/abs/2003.12294)) coming soon
-参考[DTRB](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
+参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|-|-|-|-|-|
From 65af573fb0787b1abada32a5baf71020e2ac77aa Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:26:44 +0800
Subject: [PATCH 24/60] Update algorithm_overview.md
---
doc/doc_ch/algorithm_overview.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index a7d73042..6ea90d2c 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -9,9 +9,9 @@
### 1.文本检测算法
PaddleOCR开源的文本检测算法列表:
-- [x] DB[2]([paper]( https://arxiv.org/abs/1911.08947) )(ppocr推荐)
-- [x] EAST[1]([paper](https://arxiv.org/abs/1704.03155))
-- [x] SAST[4]([paper](https://arxiv.org/abs/1908.05498))
+- [x] DB([paper]( https://arxiv.org/abs/1911.08947)) [2](ppocr推荐)
+- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1]
+- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
在ICDAR2015文本检测公开数据集上,算法效果如下:
From e26a060b994ba80e8ddaf9531c3705d0a5c20c4f Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:27:20 +0800
Subject: [PATCH 25/60] Update algorithm_overview.md
---
doc/doc_ch/algorithm_overview.md | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 6ea90d2c..361e251b 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -38,11 +38,11 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
### 2.文本识别算法
PaddleOCR基于动态图开源的文本识别算法列表:
-- [x] CRNN[7]([paper](https://arxiv.org/abs/1507.05717) )(ppocr推荐)
-- [x] Rosetta[10]([paper](https://arxiv.org/abs/1910.05085))
-- [ ] STAR-Net[11]([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
-- [ ] RARE[12]([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
-- [ ] SRN[5]([paper](https://arxiv.org/abs/2003.12294)) coming soon
+- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐)
+- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
+- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
+- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
+- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
From e75c3c1033033e7079f646a21a4bbf307882b423 Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:28:52 +0800
Subject: [PATCH 26/60] Update algorithm_overview_en.md
---
doc/doc_en/algorithm_overview_en.md | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index b0ef5022..427b3de6 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -11,9 +11,9 @@ This tutorial lists the text detection algorithms and text recognition algorithm
### 1. Text Detection Algorithm
PaddleOCR open source text detection algorithms list:
-- [x] EAST[2]([paper](https://arxiv.org/abs/1704.03155))
-- [x] DB[1]([paper](https://arxiv.org/abs/1911.08947))
-- [x] SAST[4]([paper](https://arxiv.org/abs/1908.05498) )(Baidu Self-Research)
+- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[2]
+- [x] DB([paper](https://arxiv.org/abs/1911.08947))[1]
+- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
On the ICDAR2015 dataset, the text detection result is as follows:
@@ -39,11 +39,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
### 2. Text Recognition Algorithm
PaddleOCR open-source text recognition algorithms list:
-- [x] CRNN[7]([paper](https://arxiv.org/abs/1507.05717))
-- [x] Rosetta[10]([paper](https://arxiv.org/abs/1910.05085))
-- [ ] STAR-Net[11]([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
-- [ ] RARE[12]([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
-- [ ] SRN[5]([paper](https://arxiv.org/abs/2003.12294) )(Baidu Self-Research) coming soon
+- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
+- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
+- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
+- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
+- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
From 42fc648c8736cf6a820d5e1ee662811318b57cfc Mon Sep 17 00:00:00 2001
From: dyning
Date: Sun, 20 Dec 2020 08:29:12 +0800
Subject: [PATCH 27/60] Update reference.md
---
doc/doc_ch/reference.md | 78 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 69 insertions(+), 9 deletions(-)
diff --git a/doc/doc_ch/reference.md b/doc/doc_ch/reference.md
index 9d9a6785..f1337ded 100644
--- a/doc/doc_ch/reference.md
+++ b/doc/doc_ch/reference.md
@@ -11,11 +11,12 @@
}
2. DB:
-@article{liao2019real,
- title={Real-time Scene Text Detection with Differentiable Binarization},
+@inproceedings{liao2020real,
+ title={Real-Time Scene Text Detection with Differentiable Binarization.},
author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
- journal={arXiv preprint arXiv:1911.08947},
- year={2019}
+ booktitle={AAAI},
+ pages={11474--11481},
+ year={2020}
}
3. DTRB:
@@ -37,10 +38,11 @@
}
5. SRN:
-@article{yu2020towards,
- title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks},
- author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui},
- journal={arXiv preprint arXiv:2003.12294},
+@inproceedings{yu2020towards,
+ title={Towards accurate scene text recognition with semantic reasoning networks},
+ author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Liu, Tao and Han, Junyu and Liu, Jingtuo and Ding, Errui},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={12113--12122},
year={2020}
}
@@ -52,4 +54,62 @@
pages={9086--9095},
year={2019}
}
-```
\ No newline at end of file
+
+7. CRNN:
+@article{shi2016end,
+ title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition},
+ author={Shi, Baoguang and Bai, Xiang and Yao, Cong},
+ journal={IEEE transactions on pattern analysis and machine intelligence},
+ volume={39},
+ number={11},
+ pages={2298--2304},
+ year={2016},
+ publisher={IEEE}
+}
+
+8. FPGM:
+@inproceedings{he2019filter,
+ title={Filter pruning via geometric median for deep convolutional neural networks acceleration},
+ author={He, Yang and Liu, Ping and Wang, Ziwei and Hu, Zhilan and Yang, Yi},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={4340--4349},
+ year={2019}
+}
+
+9. PACT:
+@article{choi2018pact,
+ title={Pact: Parameterized clipping activation for quantized neural networks},
+ author={Choi, Jungwook and Wang, Zhuo and Venkataramani, Swagath and Chuang, Pierce I-Jen and Srinivasan, Vijayalakshmi and Gopalakrishnan, Kailash},
+ journal={arXiv preprint arXiv:1805.06085},
+ year={2018}
+}
+
+10.Rosetta
+@inproceedings{borisyuk2018rosetta,
+ title={Rosetta: Large scale system for text detection and recognition in images},
+ author={Borisyuk, Fedor and Gordo, Albert and Sivakumar, Viswanath},
+ booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
+ pages={71--79},
+ year={2018}
+}
+
+11.STAR-Net
+@inproceedings{liu2016star,
+ title={STAR-Net: A SpaTial Attention Residue Network for Scene Text Recognition.},
+ author={Liu, Wei and Chen, Chaofeng and Wong, Kwan-Yee K and Su, Zhizhong and Han, Junyu},
+ booktitle={BMVC},
+ volume={2},
+ pages={7},
+ year={2016}
+}
+
+12.RARE
+@inproceedings{shi2016robust,
+ title={Robust scene text recognition with automatic rectification},
+ author={Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4168--4176},
+ year={2016}
+}
+
+```
From e6315ddd76cc73dea641b66b8cec8fbf06418911 Mon Sep 17 00:00:00 2001
From: gnoixnil'il
Date: Sun, 20 Dec 2020 13:46:40 +0800
Subject: [PATCH 28/60] Update det_basic_loss.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
删除重复赋值[self.main_loss_type]
---
ppocr/losses/det_basic_loss.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/ppocr/losses/det_basic_loss.py b/ppocr/losses/det_basic_loss.py
index ef656e8c..57b3667d 100644
--- a/ppocr/losses/det_basic_loss.py
+++ b/ppocr/losses/det_basic_loss.py
@@ -45,7 +45,6 @@ class BalanceLoss(nn.Layer):
self.balance_loss = balance_loss
self.main_loss_type = main_loss_type
self.negative_ratio = negative_ratio
- self.main_loss_type = main_loss_type
self.return_origin = return_origin
self.eps = eps
From f103ed2791d1c4aacabec6ef61658d8dc754067f Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:22:39 +0800
Subject: [PATCH 29/60] add FAQ
---
doc/doc_ch/FAQ.md | 104 ++++++++++++++++++++++++++++++++--------------
1 file changed, 73 insertions(+), 31 deletions(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 27b3126c..1e8ebf9b 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -14,37 +14,48 @@
* [【理论篇】OCR通用30个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
- * [模型训练调优7题](#模型训练调优2)
- * [预测部署9题](#预测部署2)
+ * [模型训练调优18题](#模型训练调优2)
* [【实战篇】PaddleOCR实战87个问题](#PaddleOCR实战问题)
- * [使用咨询21题](#使用咨询)
+ * [使用咨询23题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优25题](#模型训练调优3)
- * [预测部署24题](#预测部署3)
+ * [预测部署25题](#预测部署3)
## 近期更新(2020.12.14)
-#### Q3.1.21:PaddleOCR支持动态图吗?
+#### Q2.3.17: StyleText 合成数据效果不好?
+**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32*N,因此当前版本模型主要适用高度为32的数据。建议要合成的数据尺寸设置为32*N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
-**A**:动态图版本正在紧锣密鼓开发中,将于2020年12月16日发布,敬请关注。
+#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
+**A**:目前PaddleOCR有四个分支,分别是:
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能。
+- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
+- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
+- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
-#### Q3.3.23:检测模型训练或预测时出现elementwise_add报错
+如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
-**A**:设置的输入尺寸必须是32的倍数,否则在网络多次下采样和上采样后,feature map会产生1个像素的diff,从而导致elementwise_add时报shape不匹配的错误。
+**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
-#### Q3.3.24: DB检测训练输入尺寸640,可以改大一些吗?
+#### Q3.1.22: ModuleNotFoundError: No module named 'paddle.nn',
+**A**: paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为
+```
+python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
+```
-**A**: 不建议改大。检测模型训练输入尺寸是预处理中random crop后的尺寸,并非直接将原图进行resize,多数场景下这个尺寸并不小了,改大后可能反而并不合适,而且训练会变慢。另外,代码里可能有的地方参数按照预设输入尺寸适配的,改大后可能有隐藏风险。
+#### Q3.1.23: ImportError: /usr/lib/x86_64_linux-gnu/libstdc++.so.6:version `CXXABI_1.3.11` not found (required by /usr/lib/python3.6/site-package/paddle/fluid/core+avx.so)
+**A**:这个问题是glibc版本不足导致的,Paddle2.0rc1版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。
+如果您的环境不满足这个要求,或者使用的docker镜像为:
+`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`
+`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`,安装Paddle2.0rc版本可能会出现上述错误,
+2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。
+或者访问[dockerhub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获得与您机器适配的镜像。
-#### Q3.3.25: 识别模型训练时,loss能正常下降,但acc一直为0
+#### Q3.4.25 : PaddleOCR模型Python端预测和C++预测结果不一致?
+**A**:正常来说,python端预测和C++预测文本是一致的,如果预测结果差异较大,建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。如果更新代码或者更新代码都没能解决,建议在PaddleOCR群里或者issue中抛出您的问题。
-**A**: 识别模型训练初期acc为0是正常的,多训一段时间指标就上来了。
-
-#### Q3.4.24:DB模型能正确推理预测,但换成EAST或SAST模型时报错或结果不正确
-
-**A**:使用EAST或SAST模型进行推理预测时,需要在命令中指定参数--det_algorithm="EAST" 或 --det_algorithm="SAST",使用DB时不用指定是因为该参数默认值是"DB":https://github.com/PaddlePaddle/PaddleOCR/blob/e7a708e9fdaf413ed7a14da8e4a7b4ac0b211e42/tools/infer/utility.py#L43
## 【精选】OCR精选10个问题
@@ -238,18 +249,15 @@
(2)调大系统的[l2 dcay值](https://github.com/PaddlePaddle/PaddleOCR/blob/a501603d54ff5513fc4fc760319472e59da25424/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml#L47)
-
-### 预测部署
-
-#### Q2.4.1:请问对于图片中的密集文字,有什么好的处理办法吗?
+#### Q2.3.8:请问对于图片中的密集文字,有什么好的处理办法吗?
**A**:可以先试用预训练模型测试一下,例如DB+CRNN,判断下密集文字图片中是检测还是识别的问题,然后针对性的改善。还有一种是如果图象中密集文字较小,可以尝试增大图像分辨率,对图像进行一定范围内的拉伸,将文字稀疏化,提高识别效果。
-#### Q2.4.2:对于一些在识别时稍微模糊的文本,有没有一些图像增强的方式?
+#### Q2.3.9:对于一些在识别时稍微模糊的文本,有没有一些图像增强的方式?
**A**:在人类肉眼可以识别的前提下,可以考虑图像处理中的均值滤波、中值滤波或者高斯滤波等模糊算子尝试。也可以尝试从数据扩增扰动来强化模型鲁棒性,另外新的思路有对抗性训练和超分SR思路,可以尝试借鉴。但目前业界尚无普遍认可的最优方案,建议优先在数据采集阶段增加一些限制提升图片质量。
-#### Q2.4.3:对于特定文字检测,例如身份证只检测姓名,检测指定区域文字更好,还是检测全部区域再筛选更好?
+#### Q2.3.10:对于特定文字检测,例如身份证只检测姓名,检测指定区域文字更好,还是检测全部区域再筛选更好?
**A**:两个角度来说明一般检测全部区域再筛选更好。
@@ -257,11 +265,11 @@
(2)产品的需求可能是变化的,不排除后续对于模型需求变化的可能性(比如又需要增加一个字段),相比于训练模型,后处理的逻辑会更容易调整。
-#### Q2.4.4:对于小白如何快速入门中文OCR项目实践?
+#### Q2.3.11:对于小白如何快速入门中文OCR项目实践?
**A**:建议可以先了解OCR方向的基础知识,大概了解基础的检测和识别模型算法。然后在Github上可以查看OCR方向相关的repo。目前来看,从内容的完备性来看,PaddleOCR的中英文双语教程文档是有明显优势的,在数据集、模型训练、预测部署文档详实,可以快速入手。而且还有微信用户群答疑,非常适合学习实践。项目地址:[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
-#### Q2.4.5:如何识别带空格的英文行文本图像?
+#### Q2.1.12:如何识别带空格的英文行文本图像?
**A**:空格识别可以考虑以下两种方案:
@@ -269,22 +277,36 @@
(2)优化文本识别算法。在识别字典里面引入空格字符,然后在识别的训练数据中,如果用空行,进行标注。此外,合成数据时,通过拼接训练数据,生成含有空格的文本。
-#### Q2.4.6:中英文一起识别时也可以加空格字符来训练吗
+#### Q2.3.13:中英文一起识别时也可以加空格字符来训练吗
**A**:中文识别可以加空格当做分隔符训练,具体的效果如何没法给出直接评判,根据实际业务数据训练来判断。
-#### Q2.4.7:低像素文字或者字号比较小的文字有什么超分辨率方法吗
+#### Q2.3.14:低像素文字或者字号比较小的文字有什么超分辨率方法吗
**A**:超分辨率方法分为传统方法和基于深度学习的方法。基于深度学习的方法中,比较经典的有SRCNN,另外CVPR2020也有一篇超分辨率的工作可以参考文章:Unpaired Image Super-Resolution using Pseudo-Supervision,但是没有充分的实践验证过,需要看实际场景下的效果。
-#### Q2.4.8:表格识别有什么好的模型 或者论文推荐么
+#### Q2.3.15:表格识别有什么好的模型 或者论文推荐么
**A**:表格目前学术界比较成熟的解决方案不多 ,可以尝试下分割的论文方案。
-#### Q2.4.9:弯曲文本有试过opencv的TPS进行弯曲校正吗?
+#### Q2.3.16:弯曲文本有试过opencv的TPS进行弯曲校正吗?
**A**:opencv的tps需要标出上下边界对应的点,这个点很难通过传统方法或者深度学习方法获取。PaddleOCR里StarNet网络中的tps模块实现了自动学点,自动校正,可以直接尝试这个。
+#### Q2.3.17: StyleText 合成数据效果不好?
+**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32*N,因此当前版本模型主要适用高度为32的数据。建议要合成的数据尺寸设置为32*N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
+
+#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
+**A** 目前PaddleOCR有四个分支,分别是:
+
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能。
+- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
+- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
+- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
+
+如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
+
+**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
@@ -392,6 +414,20 @@
**A**:动态图版本正在紧锣密鼓开发中,将于2020年12月16日发布,敬请关注。
+#### Q3.1.22:ModuleNotFoundError: No module named 'paddle.nn',
+**A**:paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为
+```
+python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
+```
+
+#### Q3.1.23: ImportError: /usr/lib/x86_64_linux-gnu/libstdc++.so.6:version `CXXABI_1.3.11` not found (required by /usr/lib/python3.6/site-package/paddle/fluid/core+avx.so)
+**A**:这个问题是glibc版本不足导致的,Paddle2.0rc1版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。
+如果您的环境不满足这个要求,或者使用的docker镜像为:
+`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`
+`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`,安装Paddle2.0rc版本可能会出现上述错误,2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。
+或者访问[dockerhub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获得与您机器适配的镜像。
+
+
### 数据集
@@ -594,11 +630,11 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
#### Q3.3.20: 文字检测时怎么模糊的数据增强?
-**A**: 模糊的数据增强需要修改代码进行添加,以DB为例,参考[Normalize](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppocr/data/imaug/operators.py#L60) ,添加模糊的增强就行
+**A**: 模糊的数据增强需要修改代码进行添加,以DB为例,参考[Normalize](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppocr/data/imaug/operators.py#L60) ,添加模糊的增强就行
#### Q3.3.21: 文字检测时怎么更改图片旋转的角度,实现360度任意旋转?
-**A**: 将[这里](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppocr/data/imaug/iaa_augment.py#L64) 的(-10,10) 改为(-180,180)即可
+**A**: 将[这里](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppocr/data/imaug/iaa_augment.py#L64) 的(-10,10) 改为(-180,180)即可
#### Q3.3.22: 训练数据的长宽比过大怎么修改shape
@@ -728,4 +764,10 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
#### Q3.4.24:DB模型能正确推理预测,但换成EAST或SAST模型时报错或结果不正确
-**A**:使用EAST或SAST模型进行推理预测时,需要在命令中指定参数--det_algorithm="EAST" 或 --det_algorithm="SAST",使用DB时不用指定是因为该参数默认值是"DB":https://github.com/PaddlePaddle/PaddleOCR/blob/e7a708e9fdaf413ed7a14da8e4a7b4ac0b211e42/tools/infer/utility.py#L43
\ No newline at end of file
+**A**:使用EAST或SAST模型进行推理预测时,需要在命令中指定参数--det_algorithm="EAST" 或 --det_algorithm="SAST",使用DB时不用指定是因为该参数默认值是"DB":https://github.com/PaddlePaddle/PaddleOCR/blob/e7a708e9fdaf413ed7a14da8e4a7b4ac0b211e42/tools/infer/utility.py#L43
+
+#### Q3.4.25 : PaddleOCR模型Python端预测和C++预测结果不一致?
+正常来说,python端预测和C++预测文本是一致的,如果预测结果差异较大,
+建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。
+其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。
+如果更新代码或者更新代码都没能解决,建议在PaddleOCR微信群里或者issue中抛出您的问题。
From a8927059c99f07c6c561bb630d7276b7c203a4e5 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:24:27 +0800
Subject: [PATCH 30/60] * to x
---
doc/doc_ch/FAQ.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 1e8ebf9b..a893677f 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -26,7 +26,8 @@
## 近期更新(2020.12.14)
#### Q2.3.17: StyleText 合成数据效果不好?
-**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32*N,因此当前版本模型主要适用高度为32的数据。建议要合成的数据尺寸设置为32*N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
+**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
+建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
**A**:目前PaddleOCR有四个分支,分别是:
From 520670a4e4d1f8500a68a24370831a1109b6be43 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:26:05 +0800
Subject: [PATCH 31/60] * to x
---
doc/doc_ch/FAQ.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index a893677f..182abc8c 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -295,7 +295,8 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
**A**:opencv的tps需要标出上下边界对应的点,这个点很难通过传统方法或者深度学习方法获取。PaddleOCR里StarNet网络中的tps模块实现了自动学点,自动校正,可以直接尝试这个。
#### Q2.3.17: StyleText 合成数据效果不好?
-**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32*N,因此当前版本模型主要适用高度为32的数据。建议要合成的数据尺寸设置为32*N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
+**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
+建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
**A** 目前PaddleOCR有四个分支,分别是:
From 037bd74017b35e0aed8d77a6cede1f064b532835 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:35:46 +0800
Subject: [PATCH 32/60] fix comments
---
README_ch.md | 2 +-
doc/doc_ch/FAQ.md | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index afdd0536..c7ddf0fe 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -8,10 +8,10 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支
**近期更新**
+- 2020.12.21 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数132个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.12.14 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数127个,每周一都会更新,欢迎大家持续关注。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
-- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
- [More](./doc/doc_ch/update.md)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 182abc8c..5740bd0e 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -9,13 +9,13 @@
## PaddleOCR常见问题汇总(持续更新)
-* [近期更新(2020.12.14)](#近期更新)
+* [近期更新(2020.12.21)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
-* [【理论篇】OCR通用30个问题](#OCR通用问题)
+* [【理论篇】OCR通用32个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
* [模型训练调优18题](#模型训练调优2)
-* [【实战篇】PaddleOCR实战87个问题](#PaddleOCR实战问题)
+* [【实战篇】PaddleOCR实战90个问题](#PaddleOCR实战问题)
* [使用咨询23题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优25题](#模型训练调优3)
@@ -23,7 +23,7 @@
-## 近期更新(2020.12.14)
+## 近期更新(2020.12.21)
#### Q2.3.17: StyleText 合成数据效果不好?
**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
@@ -33,7 +33,7 @@
**A**:目前PaddleOCR有四个分支,分别是:
- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能。
- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
-- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
+- dygraph:基于Paddle动态图开发的分支,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
@@ -270,7 +270,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
**A**:建议可以先了解OCR方向的基础知识,大概了解基础的检测和识别模型算法。然后在Github上可以查看OCR方向相关的repo。目前来看,从内容的完备性来看,PaddleOCR的中英文双语教程文档是有明显优势的,在数据集、模型训练、预测部署文档详实,可以快速入手。而且还有微信用户群答疑,非常适合学习实践。项目地址:[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
-#### Q2.1.12:如何识别带空格的英文行文本图像?
+#### Q3.12:如何识别带空格的英文行文本图像?
**A**:空格识别可以考虑以下两种方案:
From b2768e4a552f65c059ab59a755aedf5864e4160d Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:42:15 +0800
Subject: [PATCH 33/60] fix comments
---
doc/doc_ch/FAQ.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 5740bd0e..275d9ac3 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -31,7 +31,7 @@
#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
**A**:目前PaddleOCR有四个分支,分别是:
-- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能。
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
- dygraph:基于Paddle动态图开发的分支,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
@@ -301,7 +301,7 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
**A** 目前PaddleOCR有四个分支,分别是:
-- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能。
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
From f0b9aae35333afa96ecd2a332c6d674ce164020d Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 11:49:20 +0800
Subject: [PATCH 34/60] fix comment
---
README_ch.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index c7ddf0fe..20407d3b 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -10,8 +10,8 @@ PaddleOCR同时支持动态图与静态图两种编程范式
**近期更新**
- 2020.12.21 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数132个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
-- 2020.12.14 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数127个,每周一都会更新,欢迎大家持续关注。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
+- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
- [More](./doc/doc_ch/update.md)
@@ -101,8 +101,8 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- - [【理论篇】OCR通用30个问题](./doc/doc_ch/FAQ.md)
- - [【实战篇】PaddleOCR实战84个问题](./doc/doc_ch/FAQ.md)
+ - [【理论篇】OCR通用32个问题](./doc/doc_ch/FAQ.md)
+ - [【实战篇】PaddleOCR实战90个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
From e1d11ae0e3744f907b0cba4bb3733dc82d7efcb7 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 15:14:52 +0800
Subject: [PATCH 35/60] 2.3.18 to 3.1.24
---
README_ch.md | 4 ++--
doc/doc_ch/FAQ.md | 57 +++++++++++++++++++++++++----------------------
2 files changed, 32 insertions(+), 29 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index 20407d3b..774b074d 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -101,8 +101,8 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- - [【理论篇】OCR通用32个问题](./doc/doc_ch/FAQ.md)
- - [【实战篇】PaddleOCR实战90个问题](./doc/doc_ch/FAQ.md)
+ - [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
+ - [【实战篇】PaddleOCR实战91个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 275d9ac3..288a5492 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -11,12 +11,12 @@
* [近期更新(2020.12.21)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
-* [【理论篇】OCR通用32个问题](#OCR通用问题)
+* [【理论篇】OCR通用31个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
- * [模型训练调优18题](#模型训练调优2)
-* [【实战篇】PaddleOCR实战90个问题](#PaddleOCR实战问题)
- * [使用咨询23题](#使用咨询)
+ * [模型训练调优17题](#模型训练调优2)
+* [【实战篇】PaddleOCR实战91个问题](#PaddleOCR实战问题)
+ * [使用咨询24题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优25题](#模型训练调优3)
* [预测部署25题](#预测部署3)
@@ -29,17 +29,6 @@
**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
-#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
-**A**:目前PaddleOCR有四个分支,分别是:
-- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
-- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
-- dygraph:基于Paddle动态图开发的分支,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
-- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
-
-如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
-
-**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
-
#### Q3.1.22: ModuleNotFoundError: No module named 'paddle.nn',
**A**: paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为
```
@@ -54,6 +43,18 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。
或者访问[dockerhub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获得与您机器适配的镜像。
+#### Q3.1.24: PaddleOCR develop分支和dygraph分支的区别?
+**A**:目前PaddleOCR有四个分支,分别是:
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
+- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
+- dygraph:基于Paddle动态图开发的分支,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
+- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
+
+如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
+
+**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
+
+
#### Q3.4.25 : PaddleOCR模型Python端预测和C++预测结果不一致?
**A**:正常来说,python端预测和C++预测文本是一致的,如果预测结果差异较大,建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。如果更新代码或者更新代码都没能解决,建议在PaddleOCR群里或者issue中抛出您的问题。
@@ -298,18 +299,6 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
-#### Q2.3.18: PaddleOCR develop分支和dygraph分支的区别?
-**A** 目前PaddleOCR有四个分支,分别是:
-
-- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
-- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
-- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
-- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
-
-如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
-
-**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
-
## 【实战篇】PaddleOCR实战问题
@@ -430,6 +419,20 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
或者访问[dockerhub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获得与您机器适配的镜像。
+#### Q3.1.24: PaddleOCR develop分支和dygraph分支的区别?
+**A** 目前PaddleOCR有四个分支,分别是:
+
+- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
+- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
+- dygraph:基于Paddle动态图开发的分支,目前仍在开发中,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
+- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
+
+如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
+
+**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
+
+
+
### 数据集
From 390f240b822aac4f1e2b25a5eb5659ff11c9259a Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 16:30:04 +0800
Subject: [PATCH 36/60] python to python3
---
StyleText/README.md | 4 ++--
StyleText/README_ch.md | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/StyleText/README.md b/StyleText/README.md
index f978f438..5262b505 100644
--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -119,7 +119,7 @@ In actual application scenarios, it is often necessary to synthesize pictures in
* `corpus_file`: Filepath of the corpus. Corpus file should be a text file which will be split by line-endings('\n'). Corpus generator samples one line each time.
-Example of corpus file:
+Example of corpus file:
```
PaddleOCR
飞桨文字识别
@@ -136,7 +136,7 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task:
``` bash
- python -m tools.synth_dataset.py -c configs/dataset_config.yml
+ python3 -m tools.synth_dataset.py -c configs/dataset_config.yml
```
diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md
index a8ab933b..cce136c0 100644
--- a/StyleText/README_ch.md
+++ b/StyleText/README_ch.md
@@ -124,7 +124,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据:
``` bash
- python -m tools.synth_dataset -c configs/dataset_config.yml
+ python3 -m tools.synth_dataset -c configs/dataset_config.yml
```
From 59af7359befb7ba50109e3f694f740a064685a95 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Mon, 21 Dec 2020 17:10:00 +0800
Subject: [PATCH 37/60] inference adaptation 2.0
---
tools/infer/predict_cls.py | 14 +++++---------
tools/infer/predict_det.py | 13 +++++--------
tools/infer/predict_rec.py | 13 ++++---------
tools/infer/utility.py | 25 ++++++++++---------------
4 files changed, 24 insertions(+), 41 deletions(-)
diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py
index 420213ee..3bda7d7d 100755
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -39,7 +39,6 @@ class TextClassifier(object):
self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
self.cls_batch_num = args.cls_batch_num
self.cls_thresh = args.cls_thresh
- self.use_zero_copy_run = args.use_zero_copy_run
postprocess_params = {
'name': 'ClsPostProcess',
"label_list": args.label_list,
@@ -99,12 +98,8 @@ class TextClassifier(object):
norm_img_batch = norm_img_batch.copy()
starttime = time.time()
- if self.use_zero_copy_run:
- self.input_tensor.copy_from_cpu(norm_img_batch)
- self.predictor.zero_copy_run()
- else:
- norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
- self.predictor.run([norm_img_batch])
+ self.input_tensor.copy_from_cpu(norm_img_batch)
+ self.predictor.run()
prob_out = self.output_tensors[0].copy_to_cpu()
cls_result = self.postprocess_op(prob_out)
elapse += time.time() - starttime
@@ -143,10 +138,11 @@ def main(args):
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit()
for ino in range(len(img_list)):
- logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], cls_res[
- ino]))
+ logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+ cls_res[ino]))
logger.info("Total predict time for {} images, cost: {:.3f}".format(
len(img_list), predict_time))
+
if __name__ == "__main__":
main(utility.parse_args())
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index fe772991..ad9cbf57 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -37,7 +37,6 @@ class TextDetector(object):
def __init__(self, args):
self.args = args
self.det_algorithm = args.det_algorithm
- self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{
'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len,
@@ -72,7 +71,9 @@ class TextDetector(object):
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST":
pre_process_list[0] = {
- 'DetResizeForTest': {'resize_long': args.det_limit_side_len}
+ 'DetResizeForTest': {
+ 'resize_long': args.det_limit_side_len
+ }
}
postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh
@@ -161,12 +162,8 @@ class TextDetector(object):
img = img.copy()
starttime = time.time()
- if self.use_zero_copy_run:
- self.input_tensor.copy_from_cpu(img)
- self.predictor.zero_copy_run()
- else:
- im = paddle.fluid.core.PaddleTensor(img)
- self.predictor.run([im])
+ self.input_tensor.copy_from_cpu(img)
+ self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index c615fa0d..54082a50 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -39,7 +39,6 @@ class TextRecognizer(object):
self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm
- self.use_zero_copy_run = args.use_zero_copy_run
postprocess_params = {
'name': 'CTCLabelDecode',
"character_type": args.rec_char_type,
@@ -101,12 +100,8 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
starttime = time.time()
- if self.use_zero_copy_run:
- self.input_tensor.copy_from_cpu(norm_img_batch)
- self.predictor.zero_copy_run()
- else:
- norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
- self.predictor.run([norm_img_batch])
+ self.input_tensor.copy_from_cpu(norm_img_batch)
+ self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
@@ -145,8 +140,8 @@ def main(args):
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit()
for ino in range(len(img_list)):
- logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], rec_res[
- ino]))
+ logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+ rec_res[ino]))
logger.info("Total predict time for {} images, cost: {:.3f}".format(
len(img_list), predict_time))
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index c3d294e6..39c045de 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -20,8 +20,7 @@ import numpy as np
import json
from PIL import Image, ImageDraw, ImageFont
import math
-from paddle.fluid.core import AnalysisConfig
-from paddle.fluid.core import create_paddle_predictor
+from paddle import inference
def parse_args():
@@ -83,8 +82,6 @@ def parse_args():
parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
- parser.add_argument("--use_zero_copy_run", type=str2bool, default=False)
-
parser.add_argument("--use_pdserving", type=str2bool, default=False)
return parser.parse_args()
@@ -110,14 +107,14 @@ def create_predictor(args, mode, logger):
logger.info("not find params file path {}".format(params_file_path))
sys.exit(0)
- config = AnalysisConfig(model_file_path, params_file_path)
+ config = inference.Config(model_file_path, params_file_path)
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
- precision_mode=AnalysisConfig.Precision.Half
- if args.use_fp16 else AnalysisConfig.Precision.Float32,
+ precision_mode=inference.PrecisionType.Half
+ if args.use_fp16 else inference.PrecisionType.Float32,
max_batch_size=args.max_batch_size)
else:
config.disable_gpu()
@@ -130,20 +127,18 @@ def create_predictor(args, mode, logger):
# config.enable_memory_optim()
config.disable_glog_info()
- if args.use_zero_copy_run:
- config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
- config.switch_use_feed_fetch_ops(False)
- else:
- config.switch_use_feed_fetch_ops(True)
+ config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+ config.switch_use_feed_fetch_ops(False)
- predictor = create_paddle_predictor(config)
+ # create predictor
+ predictor = inference.create_predictor(config)
input_names = predictor.get_input_names()
for name in input_names:
- input_tensor = predictor.get_input_tensor(name)
+ input_tensor = predictor.get_input_handle(name)
output_names = predictor.get_output_names()
output_tensors = []
for output_name in output_names:
- output_tensor = predictor.get_output_tensor(output_name)
+ output_tensor = predictor.get_output_handle(output_name)
output_tensors.append(output_tensor)
return predictor, input_tensor, output_tensors
From 3a0090dc004a1411c4d5be30892c62c1a255b204 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Mon, 21 Dec 2020 17:13:32 +0800
Subject: [PATCH 38/60] delete fluid
---
ppocr/losses/det_sast_loss.py | 44 +++++++++++++++++------------------
tools/infer/predict_cls.py | 1 -
tools/infer/predict_det.py | 1 -
tools/infer/predict_rec.py | 1 -
tools/program.py | 2 +-
5 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/ppocr/losses/det_sast_loss.py b/ppocr/losses/det_sast_loss.py
index a07af6a4..2e0c756b 100644
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
@@ -19,7 +19,6 @@ from __future__ import print_function
import paddle
from paddle import nn
from .det_basic_loss import DiceLoss
-import paddle.fluid as fluid
import numpy as np
@@ -27,9 +26,7 @@ class SASTLoss(nn.Layer):
"""
"""
- def __init__(self,
- eps=1e-6,
- **kwargs):
+ def __init__(self, eps=1e-6, **kwargs):
super(SASTLoss, self).__init__()
self.dice_loss = DiceLoss(eps=eps)
@@ -39,7 +36,7 @@ class SASTLoss(nn.Layer):
tcl_mask: N x 128 x 1
tcl_label: N x X list or LoDTensor
"""
-
+
f_score = predicts['f_score']
f_border = predicts['f_border']
f_tvo = predicts['f_tvo']
@@ -53,15 +50,17 @@ class SASTLoss(nn.Layer):
score_loss = 1.0 - 2 * intersection / (union + 1e-5)
#border loss
- l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+ l_border_split, l_border_norm = paddle.split(
+ l_border, num_or_sections=[4, 1], axis=1)
f_border_split = f_border
border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
- l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
- l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
- l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
+ l_border_norm_split = paddle.expand(
+ x=l_border_norm, shape=border_ex_shape)
+ l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
+ l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
border_diff = l_border_split - f_border_split
- abs_border_diff = paddle.abs(border_diff)
+ abs_border_diff = paddle.abs(border_diff)
border_sign = abs_border_diff < 1.0
border_sign = paddle.cast(border_sign, dtype='float32')
border_sign.stop_gradient = True
@@ -72,15 +71,16 @@ class SASTLoss(nn.Layer):
(paddle.sum(l_border_score * l_border_mask) + 1e-5)
#tvo_loss
- l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+ l_tvo_split, l_tvo_norm = paddle.split(
+ l_tvo, num_or_sections=[8, 1], axis=1)
f_tvo_split = f_tvo
tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
- l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
- l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
+ l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
+ l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
#
tvo_geo_diff = l_tvo_split - f_tvo_split
- abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
+ abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
tvo_sign = abs_tvo_geo_diff < 1.0
tvo_sign = paddle.cast(tvo_sign, dtype='float32')
tvo_sign.stop_gradient = True
@@ -91,15 +91,16 @@ class SASTLoss(nn.Layer):
(paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
#tco_loss
- l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+ l_tco_split, l_tco_norm = paddle.split(
+ l_tco, num_or_sections=[2, 1], axis=1)
f_tco_split = f_tco
tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
- l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
- l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
-
+ l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
+ l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
+
tco_geo_diff = l_tco_split - f_tco_split
- abs_tco_geo_diff = paddle.abs(tco_geo_diff)
+ abs_tco_geo_diff = paddle.abs(tco_geo_diff)
tco_sign = abs_tco_geo_diff < 1.0
tco_sign = paddle.cast(tco_sign, dtype='float32')
tco_sign.stop_gradient = True
@@ -109,13 +110,12 @@ class SASTLoss(nn.Layer):
tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
(paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
-
# total loss
tvo_lw, tco_lw = 1.5, 1.5
score_lw, border_lw = 1.0, 1.0
total_loss = score_loss * score_lw + border_loss * border_lw + \
tvo_loss * tvo_lw + tco_loss * tco_lw
-
+
losses = {'loss':total_loss, "score_loss":score_loss,\
"border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
- return losses
\ No newline at end of file
+ return losses
diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py
index 3bda7d7d..6604d3cc 100755
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -24,7 +24,6 @@ import numpy as np
import math
import time
import traceback
-import paddle.fluid as fluid
import tools.infer.utility as utility
from ppocr.postprocess import build_post_process
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index ad9cbf57..e677eb24 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -22,7 +22,6 @@ import cv2
import numpy as np
import time
import sys
-import paddle
import tools.infer.utility as utility
from ppocr.utils.logging import get_logger
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index 54082a50..89d04cff 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -23,7 +23,6 @@ import numpy as np
import math
import time
import traceback
-import paddle.fluid as fluid
import tools.infer.utility as utility
from ppocr.postprocess import build_post_process
diff --git a/tools/program.py b/tools/program.py
index 4331f9d4..c712fe14 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -131,7 +131,7 @@ def check_gpu(use_gpu):
"model on CPU"
try:
- if use_gpu and not paddle.fluid.is_compiled_with_cuda():
+ if use_gpu and not paddle.is_compiled_with_cuda():
print(err)
sys.exit(1)
except Exception as e:
From 551377a013bfbce830d16cf445248a1114c63e32 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 13:41:33 +0000
Subject: [PATCH 39/60] update cpp_infer to 2.0
---
deploy/cpp_infer/include/config.h | 8 ++----
deploy/cpp_infer/include/ocr_cls.h | 9 +++----
deploy/cpp_infer/include/ocr_det.h | 10 ++++----
deploy/cpp_infer/include/ocr_rec.h | 9 +++----
deploy/cpp_infer/src/config.cpp | 8 +++---
deploy/cpp_infer/src/main.cpp | 18 ++++++--------
deploy/cpp_infer/src/ocr_cls.cpp | 28 +++++++--------------
deploy/cpp_infer/src/ocr_det.cpp | 39 ++++++++++++------------------
deploy/cpp_infer/src/ocr_rec.cpp | 33 +++++++++----------------
deploy/cpp_infer/tools/config.txt | 7 +++---
10 files changed, 66 insertions(+), 103 deletions(-)
diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h
index 27539ea7..3faeede1 100644
--- a/deploy/cpp_infer/include/config.h
+++ b/deploy/cpp_infer/include/config.h
@@ -25,9 +25,9 @@
namespace PaddleOCR {
-class Config {
+class OCRConfig {
public:
- explicit Config(const std::string &config_file) {
+ explicit OCRConfig(const std::string &config_file) {
config_map_ = LoadConfig(config_file);
this->use_gpu = bool(stoi(config_map_["use_gpu"]));
@@ -41,8 +41,6 @@ public:
this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"]));
- this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"]));
-
this->max_side_len = stoi(config_map_["max_side_len"]);
this->det_db_thresh = stod(config_map_["det_db_thresh"]);
@@ -76,8 +74,6 @@ public:
bool use_mkldnn = false;
- bool use_zero_copy_run = false;
-
int max_side_len = 960;
double det_db_thresh = 0.3;
diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h
index 38a37cff..87772cc1 100644
--- a/deploy/cpp_infer/include/ocr_cls.h
+++ b/deploy/cpp_infer/include/ocr_cls.h
@@ -30,6 +30,8 @@
#include
#include
+using namespace paddle_infer;
+
namespace PaddleOCR {
class Classifier {
@@ -37,14 +39,12 @@ public:
explicit Classifier(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
- const bool &use_mkldnn, const bool &use_zero_copy_run,
- const double &cls_thresh) {
+ const bool &use_mkldnn, const double &cls_thresh) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
- this->use_zero_copy_run_ = use_zero_copy_run;
this->cls_thresh = cls_thresh;
@@ -57,14 +57,13 @@ public:
cv::Mat Run(cv::Mat &img);
private:
- std::shared_ptr predictor_;
+ std::shared_ptr predictor_;
bool use_gpu_ = false;
int gpu_id_ = 0;
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
- bool use_zero_copy_run_ = false;
double cls_thresh = 0.5;
std::vector mean_ = {0.5f, 0.5f, 0.5f};
diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h
index 0308d07f..d50fd70a 100644
--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -32,6 +32,8 @@
#include
#include
+using namespace paddle_infer;
+
namespace PaddleOCR {
class DBDetector {
@@ -39,8 +41,8 @@ public:
explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
- const bool &use_mkldnn, const bool &use_zero_copy_run,
- const int &max_side_len, const double &det_db_thresh,
+ const bool &use_mkldnn, const int &max_side_len,
+ const double &det_db_thresh,
const double &det_db_box_thresh,
const double &det_db_unclip_ratio,
const bool &visualize) {
@@ -49,7 +51,6 @@ public:
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
- this->use_zero_copy_run_ = use_zero_copy_run;
this->max_side_len_ = max_side_len;
@@ -69,14 +70,13 @@ public:
void Run(cv::Mat &img, std::vector>> &boxes);
private:
- std::shared_ptr predictor_;
+ std::shared_ptr predictor_;
bool use_gpu_ = false;
int gpu_id_ = 0;
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
- bool use_zero_copy_run_ = false;
int max_side_len_ = 960;
diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h
index 89bcd82c..14b77b08 100644
--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -32,6 +32,8 @@
#include
#include
+using namespace paddle_infer;
+
namespace PaddleOCR {
class CRNNRecognizer {
@@ -39,14 +41,12 @@ public:
explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
- const bool &use_mkldnn, const bool &use_zero_copy_run,
- const string &label_path) {
+ const bool &use_mkldnn, const string &label_path) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
- this->use_zero_copy_run_ = use_zero_copy_run;
this->label_list_ = Utility::ReadDict(label_path);
this->label_list_.insert(this->label_list_.begin(),
@@ -63,14 +63,13 @@ public:
Classifier *cls);
private:
- std::shared_ptr predictor_;
+ std::shared_ptr predictor_;
bool use_gpu_ = false;
int gpu_id_ = 0;
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
- bool use_zero_copy_run_ = false;
std::vector label_list_;
diff --git a/deploy/cpp_infer/src/config.cpp b/deploy/cpp_infer/src/config.cpp
index 52dfa209..303c3c12 100644
--- a/deploy/cpp_infer/src/config.cpp
+++ b/deploy/cpp_infer/src/config.cpp
@@ -16,8 +16,8 @@
namespace PaddleOCR {
-std::vector Config::split(const std::string &str,
- const std::string &delim) {
+std::vector OCRConfig::split(const std::string &str,
+ const std::string &delim) {
std::vector res;
if ("" == str)
return res;
@@ -38,7 +38,7 @@ std::vector Config::split(const std::string &str,
}
std::map
-Config::LoadConfig(const std::string &config_path) {
+OCRConfig::LoadConfig(const std::string &config_path) {
auto config = Utility::ReadDict(config_path);
std::map dict;
@@ -53,7 +53,7 @@ Config::LoadConfig(const std::string &config_path) {
return dict;
}
-void Config::PrintConfigInfo() {
+void OCRConfig::PrintConfigInfo() {
std::cout << "=======Paddle OCR inference config======" << std::endl;
for (auto iter = config_map_.begin(); iter != config_map_.end(); iter++) {
std::cout << iter->first << " : " << iter->second << std::endl;
diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp
index 63da62c7..187b70dc 100644
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -42,7 +42,7 @@ int main(int argc, char **argv) {
exit(1);
}
- Config config(argv[1]);
+ OCRConfig config(argv[1]);
config.PrintConfigInfo();
@@ -50,24 +50,22 @@ int main(int argc, char **argv) {
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
- DBDetector det(
- config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem,
- config.cpu_math_library_num_threads, config.use_mkldnn,
- config.use_zero_copy_run, config.max_side_len, config.det_db_thresh,
- config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize);
+ DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
+ config.gpu_mem, config.cpu_math_library_num_threads,
+ config.use_mkldnn, config.max_side_len, config.det_db_thresh,
+ config.det_db_box_thresh, config.det_db_unclip_ratio,
+ config.visualize);
Classifier *cls = nullptr;
if (config.use_angle_cls == true) {
cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
- config.use_mkldnn, config.use_zero_copy_run,
- config.cls_thresh);
+ config.use_mkldnn, config.cls_thresh);
}
CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
- config.use_mkldnn, config.use_zero_copy_run,
- config.char_list_file);
+ config.use_mkldnn, config.char_list_file);
#ifdef USE_MKL
#pragma omp parallel
diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp
index fed2023f..9757b482 100644
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -35,26 +35,16 @@ cv::Mat Classifier::Run(cv::Mat &img) {
this->permute_op_.Run(&resize_img, input.data());
// Inference.
- if (this->use_zero_copy_run_) {
- auto input_names = this->predictor_->GetInputNames();
- auto input_t = this->predictor_->GetInputTensor(input_names[0]);
- input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
- input_t->copy_from_cpu(input.data());
- this->predictor_->ZeroCopyRun();
- } else {
- paddle::PaddleTensor input_t;
- input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
- input_t.data =
- paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
- input_t.dtype = PaddleDType::FLOAT32;
- std::vector outputs;
- this->predictor_->Run({input_t}, &outputs, 1);
- }
+ auto input_names = this->predictor_->GetInputNames();
+ auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+ input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+ input_t->CopyFromCpu(input.data());
+ this->predictor_->Run();
std::vector softmax_out;
std::vector label_out;
auto output_names = this->predictor_->GetOutputNames();
- auto softmax_out_t = this->predictor_->GetOutputTensor(output_names[0]);
+ auto softmax_out_t = this->predictor_->GetOutputHandle(output_names[0]);
auto softmax_shape_out = softmax_out_t->shape();
int softmax_out_num =
@@ -63,7 +53,7 @@ cv::Mat Classifier::Run(cv::Mat &img) {
softmax_out.resize(softmax_out_num);
- softmax_out_t->copy_to_cpu(softmax_out.data());
+ softmax_out_t->CopyToCpu(softmax_out.data());
float score = 0;
int label = 0;
@@ -95,7 +85,7 @@ void Classifier::LoadModel(const std::string &model_dir) {
}
// false for zero copy tensor
- config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
+ config.SwitchUseFeedFetchOps(false);
// true for multiple input
config.SwitchSpecifyInputNames(true);
@@ -104,6 +94,6 @@ void Classifier::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim();
config.DisableGlogInfo();
- this->predictor_ = CreatePaddlePredictor(config);
+ this->predictor_ = CreatePredictor(config);
}
} // namespace PaddleOCR
diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp
index e253f9cc..c6c93991 100644
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -17,12 +17,17 @@
namespace PaddleOCR {
void DBDetector::LoadModel(const std::string &model_dir) {
- AnalysisConfig config;
+ // AnalysisConfig config;
+ paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams");
if (this->use_gpu_) {
config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+ // config.EnableTensorRtEngine(
+ // 1 << 20, 1, 3,
+ // AnalysisConfig::Precision::kFloat32,
+ // false, false);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
@@ -32,10 +37,8 @@ void DBDetector::LoadModel(const std::string &model_dir) {
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
-
- // false for zero copy tensor
- // true for commom tensor
- config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
+ // use zero_copy_run as default
+ config.SwitchUseFeedFetchOps(false);
// true for multiple input
config.SwitchSpecifyInputNames(true);
@@ -44,7 +47,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim();
config.DisableGlogInfo();
- this->predictor_ = CreatePaddlePredictor(config);
+ this->predictor_ = CreatePredictor(config);
}
void DBDetector::Run(cv::Mat &img,
@@ -64,31 +67,21 @@ void DBDetector::Run(cv::Mat &img,
this->permute_op_.Run(&resize_img, input.data());
// Inference.
- if (this->use_zero_copy_run_) {
- auto input_names = this->predictor_->GetInputNames();
- auto input_t = this->predictor_->GetInputTensor(input_names[0]);
- input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
- input_t->copy_from_cpu(input.data());
- this->predictor_->ZeroCopyRun();
- } else {
- paddle::PaddleTensor input_t;
- input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
- input_t.data =
- paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
- input_t.dtype = PaddleDType::FLOAT32;
- std::vector outputs;
- this->predictor_->Run({input_t}, &outputs, 1);
- }
+ auto input_names = this->predictor_->GetInputNames();
+ auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+ input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+ input_t->CopyFromCpu(input.data());
+ this->predictor_->Run();
std::vector out_data;
auto output_names = this->predictor_->GetOutputNames();
- auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
+ auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
std::vector output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies());
out_data.resize(out_num);
- output_t->copy_to_cpu(out_data.data());
+ output_t->CopyToCpu(out_data.data());
int n2 = output_shape[2];
int n3 = output_shape[3];
diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
index d4deb5a1..e33695a7 100644
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -43,32 +43,22 @@ void CRNNRecognizer::Run(std::vector>> boxes,
this->permute_op_.Run(&resize_img, input.data());
// Inference.
- if (this->use_zero_copy_run_) {
- auto input_names = this->predictor_->GetInputNames();
- auto input_t = this->predictor_->GetInputTensor(input_names[0]);
- input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
- input_t->copy_from_cpu(input.data());
- this->predictor_->ZeroCopyRun();
- } else {
- paddle::PaddleTensor input_t;
- input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
- input_t.data =
- paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
- input_t.dtype = PaddleDType::FLOAT32;
- std::vector outputs;
- this->predictor_->Run({input_t}, &outputs, 1);
- }
+ auto input_names = this->predictor_->GetInputNames();
+ auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+ input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+ input_t->CopyFromCpu(input.data());
+ this->predictor_->Run();
std::vector predict_batch;
auto output_names = this->predictor_->GetOutputNames();
- auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
+ auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
auto predict_shape = output_t->shape();
int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
std::multiplies());
predict_batch.resize(out_num);
- output_t->copy_to_cpu(predict_batch.data());
+ output_t->CopyToCpu(predict_batch.data());
// ctc decode
std::vector str_res;
@@ -102,7 +92,8 @@ void CRNNRecognizer::Run(std::vector>> boxes,
}
void CRNNRecognizer::LoadModel(const std::string &model_dir) {
- AnalysisConfig config;
+ // AnalysisConfig config;
+ paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams");
@@ -118,9 +109,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
- // false for zero copy tensor
- // true for commom tensor
- config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
+ config.SwitchUseFeedFetchOps(false);
// true for multiple input
config.SwitchSpecifyInputNames(true);
@@ -129,7 +118,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim();
config.DisableGlogInfo();
- this->predictor_ = CreatePaddlePredictor(config);
+ this->predictor_ = CreatePredictor(config);
}
cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt
index 95d7989b..87467aad 100644
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -1,17 +1,16 @@
# model load config
-use_gpu 0
+use_gpu 0
gpu_id 0
gpu_mem 4000
cpu_math_library_num_threads 10
use_mkldnn 0
-use_zero_copy_run 1
# det config
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
-det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/
+det_model_dir ../../../deploy/cpp_infer/inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config
use_angle_cls 0
@@ -19,7 +18,7 @@ cls_model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer/
cls_thresh 0.9
# rec config
-rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
+rec_model_dir ../../../deploy/cpp_infer/inference/ch_ppocr_mobile_v2.0_rec_infer/
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
# show the detection results
From 885378fd33f659f387aabc86c2b3de065db06d84 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 13:45:02 +0000
Subject: [PATCH 40/60] fix config.txt
---
deploy/cpp_infer/tools/config.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt
index 3aa6d582..34f47ed8 100644
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -18,7 +18,7 @@ cls_model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer/
cls_thresh 0.9
# rec config
-rec_model_dir ../../../deploy/cpp_infer/inference/ch_ppocr_mobile_v2.0_rec_infer/
+rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
# show the detection results
From d49aff19618eead1131feb5c1092ebe498a92df1 Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Mon, 21 Dec 2020 13:54:37 +0000
Subject: [PATCH 41/60] fix mkl warning
---
deploy/cpp_infer/src/main.cpp | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp
index 187b70dc..21890d45 100644
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -67,19 +67,6 @@ int main(int argc, char **argv) {
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.char_list_file);
-#ifdef USE_MKL
-#pragma omp parallel
- for (auto i = 0; i < 10; i++) {
- LOG_IF(WARNING,
- config.cpu_math_library_num_threads != omp_get_num_threads())
- << "WARNING! MKL is running on " << omp_get_num_threads()
- << " threads while cpu_math_library_num_threads is set to "
- << config.cpu_math_library_num_threads
- << ". Possible reason could be 1. You have set omp_set_num_threads() "
- "somewhere; 2. MKL is not linked properly";
- }
-#endif
-
auto start = std::chrono::system_clock::now();
std::vector>> boxes;
det.Run(srcimg, boxes);
From d3bd5103772fe84ce15e3602bce72f1c595702fb Mon Sep 17 00:00:00 2001
From: zhoujun <572459439@qq.com>
Date: Mon, 21 Dec 2020 22:27:11 +0800
Subject: [PATCH 42/60] fix bugs in DetResizeForTest
---
ppocr/data/imaug/operators.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
index 57cd3b4b..8b9175cf 100644
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -119,10 +119,10 @@ class DetResizeForTest(object):
if 'image_shape' in kwargs:
self.image_shape = kwargs['image_shape']
self.resize_type = 1
- if 'limit_side_len' in kwargs:
+ elif 'limit_side_len' in kwargs:
self.limit_side_len = kwargs['limit_side_len']
self.limit_type = kwargs.get('limit_type', 'min')
- if 'resize_long' in kwargs:
+ elif 'resize_long' in kwargs:
self.resize_type = 2
self.resize_long = kwargs.get('resize_long', 960)
else:
From dbd27878cb59789b52affff3556ee070d2c65cfa Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Tue, 22 Dec 2020 10:09:51 +0800
Subject: [PATCH 43/60] update cpp_infer readme
---
deploy/cpp_infer/readme.md | 15 +++++++--------
deploy/cpp_infer/readme_en.md | 19 ++++++++++---------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md
index 66302a01..b563ecf4 100644
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -122,10 +122,10 @@ build/paddle_inference_install_dir/
* 下载之后使用下面的方法解压。
```
-tar -xf fluid_inference.tgz
+tar -xf paddle_inference.tgz
```
-最终会在当前的文件夹中生成`fluid_inference/`的子文件夹。
+最终会在当前的文件夹中生成`paddle_inference/`的子文件夹。
## 2 开始运行
@@ -137,11 +137,11 @@ tar -xf fluid_inference.tgz
```
inference/
|-- det_db
-| |--model
-| |--params
+| |--inference.pdparams
+| |--inference.pdimodel
|-- rec_rcnn
-| |--model
-| |--params
+| |--inference.pdparams
+| |--inference.pdparams
```
@@ -180,7 +180,7 @@ cmake .. \
make -j
```
-`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`fluid_inference`文件夹)或者编译生成的Paddle预测库地址(`build/fluid_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中;为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。
+`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中;为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。
* 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。
@@ -202,7 +202,6 @@ gpu_id 0 # GPU id,使用GPU时有效
gpu_mem 4000 # 申请的GPU内存
cpu_math_library_num_threads 10 # CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快
use_mkldnn 1 # 是否使用mkldnn库
-use_zero_copy_run 1 # 是否使用use_zero_copy_run进行预测
# det config
max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960
diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md
index 8bd76c04..41c764bc 100644
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -130,10 +130,10 @@ Among them, `paddle` is the Paddle library required for C++ prediction later, an
* After downloading, use the following method to uncompress.
```
-tar -xf fluid_inference.tgz
+tar -xf paddle_inference.tgz
```
-Finally you can see the following files in the folder of `fluid_inference/`.
+Finally you can see the following files in the folder of `paddle_inference/`.
## 2. Compile and run the demo
@@ -145,11 +145,11 @@ Finally you can see the following files in the folder of `fluid_inference/`.
```
inference/
|-- det_db
-| |--model
-| |--params
+| |--inference.pdparams
+| |--inference.pdimodel
|-- rec_rcnn
-| |--model
-| |--params
+| |--inference.pdparams
+| |--inference.pdparams
```
@@ -188,7 +188,9 @@ cmake .. \
make -j
```
-`OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`fluid_inference` folder) or the generated Paddle inference library path (`build/fluid_inference_install_dir` folder); `CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.
+`OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`paddle_inference` folder)
+or the generated Paddle inference library path (`build/paddle_inference_install_dir` folder);
+`CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.
* After the compilation is completed, an executable file named `ocr_system` will be generated in the `build` folder.
@@ -211,7 +213,6 @@ gpu_id 0 # GPU id when use_gpu is 1
gpu_mem 4000 # GPU memory requested
cpu_math_library_num_threads 10 # Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed
use_mkldnn 1 # Whether to use mkdlnn library
-use_zero_copy_run 1 # Whether to use use_zero_copy_run for inference
max_side_len 960 # Limit the maximum image height and width to 960
det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result
@@ -244,4 +245,4 @@ The detection results will be shown on the screen, which is as follows.
### 2.3 Notes
-* Paddle2.0.0-beta0 inference model library is recommanded for this tuturial.
+* Paddle2.0.0-beta0 inference model library is recommended for this toturial.
From 2a04e1a12a422941b55e794df9d49fcc312b2e0b Mon Sep 17 00:00:00 2001
From: LDOUBLEV
Date: Tue, 22 Dec 2020 15:57:21 +0800
Subject: [PATCH 44/60] add export to limit GPU-memory-usage
---
tools/infer/predict_cls.py | 2 ++
tools/infer/predict_det.py | 2 ++
tools/infer/predict_rec.py | 2 ++
tools/infer/predict_system.py | 2 ++
tools/infer/utility.py | 5 +++--
tools/infer_cls.py | 2 ++
tools/infer_det.py | 2 ++
tools/infer_rec.py | 2 ++
8 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py
index 6604d3cc..074172cc 100755
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -18,6 +18,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import cv2
import copy
import numpy as np
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index e677eb24..077692af 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -18,6 +18,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import cv2
import numpy as np
import time
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index 89d04cff..974fdbb6 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -18,6 +18,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import cv2
import numpy as np
import math
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index 07dfc216..8c4f9214 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -18,6 +18,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import cv2
import copy
import numpy as np
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 39c045de..966fa3cc 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -33,7 +33,7 @@ def parse_args():
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False)
- parser.add_argument("--gpu_mem", type=int, default=8000)
+ parser.add_argument("--gpu_mem", type=int, default=500)
# params for text detector
parser.add_argument("--image_dir", type=str)
@@ -62,7 +62,7 @@ def parse_args():
parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
- parser.add_argument("--rec_batch_num", type=int, default=1)
+ parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument(
"--rec_char_dict_path",
@@ -123,6 +123,7 @@ def create_predictor(args, mode, logger):
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
+ args.rec_batch_num = 1
# config.enable_memory_optim()
config.disable_glog_info()
diff --git a/tools/infer_cls.py b/tools/infer_cls.py
index 85e11ac3..49696482 100755
--- a/tools/infer_cls.py
+++ b/tools/infer_cls.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import paddle
from ppocr.data import create_operators, transform
diff --git a/tools/infer_det.py b/tools/infer_det.py
index d1b1b752..d890970e 100755
--- a/tools/infer_det.py
+++ b/tools/infer_det.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import cv2
import json
import paddle
diff --git a/tools/infer_rec.py b/tools/infer_rec.py
index e3e85b5d..7e4b0811 100755
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
import paddle
from ppocr.data import create_operators, transform
From 091574bbd1cc844ff7bfefbc51d9730e7a8733c5 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Thu, 24 Dec 2020 17:17:28 +0800
Subject: [PATCH 45/60] fix bug
---
configs/det/det_r50_vd_sast_totaltext.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/configs/det/det_r50_vd_sast_totaltext.yml b/configs/det/det_r50_vd_sast_totaltext.yml
index a92f1b6e..e040c420 100755
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
@@ -62,7 +62,7 @@ Train:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
- data_ratio_list: [0.5, 0.5]
+ ratio_list: [0.5, 0.5]
transforms:
- DecodeImage: # load image
img_mode: BGR
From 31aa45fed220e76feaaff717343e4e67f23e14a7 Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Fri, 25 Dec 2020 16:39:50 +0800
Subject: [PATCH 46/60] add init to fc1
---
ppocr/modeling/transforms/tps.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/ppocr/modeling/transforms/tps.py b/ppocr/modeling/transforms/tps.py
index 86665bed..e7a152c1 100644
--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import math
import paddle
from paddle import nn, ParamAttr
from paddle.nn import functional as F
@@ -88,11 +89,14 @@ class LocalizationNetwork(nn.Layer):
in_channels = num_filters
self.block_list.append(pool)
name = "loc_fc1"
+ stdv = 1.0 / math.sqrt(num_filters_list[-1] * 1.0)
self.fc1 = nn.Linear(
in_channels,
fc_dim,
weight_attr=ParamAttr(
- learning_rate=loc_lr, name=name + "_w"),
+ learning_rate=loc_lr,
+ name=name + "_w",
+ initializer=nn.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name=name + '.b_0'),
name=name)
From 2a2c891d6bfd7f0887b0b9cfd09dbac6ece82909 Mon Sep 17 00:00:00 2001
From: zhoujun <572459439@qq.com>
Date: Sat, 26 Dec 2020 23:40:24 +0800
Subject: [PATCH 47/60] update ratio_list in db config
---
configs/det/det_mv3_db.yml | 2 +-
configs/det/det_r50_vd_db.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/configs/det/det_mv3_db.yml b/configs/det/det_mv3_db.yml
index 5c8a0923..bdb4afc0 100644
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@@ -67,7 +67,7 @@ Train:
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
- ratio_list: [0.5]
+ ratio_list: [1.0]
transforms:
- DecodeImage: # load image
img_mode: BGR
diff --git a/configs/det/det_r50_vd_db.yml b/configs/det/det_r50_vd_db.yml
index f1188fe3..19c059d6 100644
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
@@ -66,7 +66,7 @@ Train:
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
- ratio_list: [0.5]
+ ratio_list: [1.0]
transforms:
- DecodeImage: # load image
img_mode: BGR
From 8985f6c207fa36f52eb562f68afcc26c4dee149c Mon Sep 17 00:00:00 2001
From: littletomatodonkey <2120160898@bit.edu.cn>
Date: Mon, 28 Dec 2020 14:58:32 +0800
Subject: [PATCH 48/60] add faq 20201218 (#1590)
---
README_ch.md | 4 +--
doc/doc_ch/FAQ.md | 70 +++++++++++++++++++++++++++--------------------
2 files changed, 42 insertions(+), 32 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index 3954bf45..3b0a2bfb 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支
**近期更新**
-- 2020.12.21 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数132个,每周一都会更新,欢迎大家持续关注。
+- 2020.12.28 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数137个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
@@ -102,7 +102,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
- - [【实战篇】PaddleOCR实战91个问题](./doc/doc_ch/FAQ.md)
+ - [【实战篇】PaddleOCR实战96个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 288a5492..1abf30cc 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -9,54 +9,44 @@
## PaddleOCR常见问题汇总(持续更新)
-* [近期更新(2020.12.21)](#近期更新)
+* [近期更新(2020.12.28)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
* [【理论篇】OCR通用31个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
* [模型训练调优17题](#模型训练调优2)
-* [【实战篇】PaddleOCR实战91个问题](#PaddleOCR实战问题)
- * [使用咨询24题](#使用咨询)
+* [【实战篇】PaddleOCR实战96个问题](#PaddleOCR实战问题)
+ * [使用咨询28题](#使用咨询)
* [数据集17题](#数据集3)
- * [模型训练调优25题](#模型训练调优3)
+ * [模型训练调优26题](#模型训练调优3)
* [预测部署25题](#预测部署3)
-## 近期更新(2020.12.21)
+## 近期更新(2020.12.28)
-#### Q2.3.17: StyleText 合成数据效果不好?
-**A**: StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
-建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
+#### Q3.1.25: 使用dygraph分支,在docker中训练PaddleOCR的时候,数据路径没有任何问题,但是一直报错`reader rasied an exception`,这是为什么呢?
-#### Q3.1.22: ModuleNotFoundError: No module named 'paddle.nn',
-**A**: paddle.nn是Paddle2.0版本特有的功能,请安装大于等于Paddle 2.0.0rc1的版本,安装方式为
-```
-python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/pypi/simple
-```
+**A** 创建docker的时候,`/dev/shm`的默认大小为64M,如果使用多进程读取数据,共享内存可能不够,因此需要给`/dev/shm`分配更大的空间,在创建docker的时候,传入`--shm-size=8g`表示给`/dev/shm`分配8g的空间。
-#### Q3.1.23: ImportError: /usr/lib/x86_64_linux-gnu/libstdc++.so.6:version `CXXABI_1.3.11` not found (required by /usr/lib/python3.6/site-package/paddle/fluid/core+avx.so)
-**A**:这个问题是glibc版本不足导致的,Paddle2.0rc1版本对gcc版本和glib版本有更高的要求,推荐gcc版本为8.2,glibc版本2.12以上。
-如果您的环境不满足这个要求,或者使用的docker镜像为:
-`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`
-`hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev`,安装Paddle2.0rc版本可能会出现上述错误,
-2.0版本推荐使用新的docker镜像 `paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82`。
-或者访问[dockerhub](https://hub.docker.com/r/paddlepaddle/paddle/tags/)获得与您机器适配的镜像。
+#### Q3.1.26: 在repo中没有找到Lite和PaddleServing相关的部署教程,这是在哪里呢?
-#### Q3.1.24: PaddleOCR develop分支和dygraph分支的区别?
-**A**:目前PaddleOCR有四个分支,分别是:
-- develop:基于Paddle静态图开发的分支,推荐使用paddle1.8 或者2.0版本,该分支具备完善的模型训练、预测、推理部署、量化裁剪等功能,领先于release/1.1分支。
-- release/1.1:PaddleOCR 发布的第一个稳定版本,基于静态图开发,具备完善的训练、预测、推理部署、量化裁剪等功能。
-- dygraph:基于Paddle动态图开发的分支,未来将作为主要开发分支,运行要求使用Paddle2.0rc1版本,目前仍在开发中。
-- release/2.0-rc1-0:PaddleOCR发布的第二个稳定版本,基于动态图和paddle2.0版本开发,动态图开发的工程更易于调试,目前支,支持模型训练、预测,暂不支持移动端部署。
+**A** 目前PaddleOCR的默认分支为dygraph,关于Lite和PaddleLite的动态图部署还在适配中,如果希望在Lite端或者使用PaddleServing部署,推荐使用develop分支(静态图)的代码。
-如果您已经上手过PaddleOCR,并且希望在各种环境上部署PaddleOCR,目前建议使用静态图分支,develop或者release/1.1分支。如果您是初学者,想快速训练,调试PaddleOCR中的算法,建议尝鲜PaddleOCR dygraph分支。
+#### Q3.1.27: 如何可视化acc,loss曲线图,模型网络结构图等?
-**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
+**A** 在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/guides/03_VisualDL/visualdl.html)。
+#### Q3.1.28: 在使用StyleText数据合成工具的时候,报错`ModuleNotFoundError: No module named 'utils.config'`,这是为什么呢?
+
+**A** 有2个解决方案
+- 在StyleText路径下面设置PYTHONPATH:`export PYTHONPATH=./`
+- 拉取最新的代码
+
+#### Q3.3.26: PaddleOCR在训练的时候一直使用cosine_decay的学习率下降策略,这是为什么呢?
+
+**A**: cosine_decay表示在训练的过程中,学习率按照cosine的变化趋势逐渐下降至0,在迭代轮数更长的情况下,比常量的学习率变化策略会有更好的收敛效果,因此在实际训练的时候,均采用了cosine_decay,来获得精度更高的模型。
-#### Q3.4.25 : PaddleOCR模型Python端预测和C++预测结果不一致?
-**A**:正常来说,python端预测和C++预测文本是一致的,如果预测结果差异较大,建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。如果更新代码或者更新代码都没能解决,建议在PaddleOCR群里或者issue中抛出您的问题。
@@ -431,7 +421,23 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
**注意**:develop和dygraph分支要求的Paddle版本、本地环境有差别,请注意不同分支环境安装部分的差异。
+#### Q3.1.25: 使用dygraph分支,在docker中训练PaddleOCR的时候,数据路径没有任何问题,但是一直报错`reader rasied an exception`,这是为什么呢?
+**A** 创建docker的时候,`/dev/shm`的默认大小为64M,如果使用多进程读取数据,共享内存可能不够,因此需要给`/dev/shm`分配更大的空间,在创建docker的时候,传入`--shm-size=8g`表示给`/dev/shm`分配8g的空间。
+
+#### Q3.1.26: 在repo中没有找到Lite和PaddleServing相关的部署教程,这是在哪里呢?
+
+**A** 目前PaddleOCR的默认分支为dygraph,关于Lite和PaddleLite的动态图部署还在适配中,如果希望在Lite端或者使用PaddleServing部署,推荐使用develop分支(静态图)的代码。
+
+#### Q3.1.27: 如何可视化acc,loss曲线图,模型网络结构图等?
+
+**A** 在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/guides/03_VisualDL/visualdl.html)。
+
+#### Q3.1.28: 在使用StyleText数据合成工具的时候,报错`ModuleNotFoundError: No module named 'utils.config'`,这是为什么呢?
+
+**A** 有2个解决方案
+- 在StyleText路径下面设置PYTHONPATH:`export PYTHONPATH=./`
+- 拉取最新的代码
### 数据集
@@ -658,6 +664,10 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
**A**: 识别模型训练初期acc为0是正常的,多训一段时间指标就上来了。
+#### Q3.3.26: PaddleOCR在训练的时候一直使用cosine_decay的学习率下降策略,这是为什么呢?
+
+**A**: cosine_decay表示在训练的过程中,学习率按照cosine的变化趋势逐渐下降至0,在迭代轮数更长的情况下,比常量的学习率变化策略会有更好的收敛效果,因此在实际训练的时候,均采用了cosine_decay,来获得精度更高的模型。
+
### 预测部署
From de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d Mon Sep 17 00:00:00 2001
From: littletomatodonkey <2120160898@bit.edu.cn>
Date: Tue, 29 Dec 2020 13:49:43 +0800
Subject: [PATCH 49/60] add CyclicalCosineDecay (#1599)
---
ppocr/optimizer/learning_rate.py | 51 +++++++++++++++++++++++++++++---
ppocr/optimizer/lr_scheduler.py | 49 ++++++++++++++++++++++++++++++
tools/program.py | 4 +--
3 files changed, 98 insertions(+), 6 deletions(-)
create mode 100644 ppocr/optimizer/lr_scheduler.py
diff --git a/ppocr/optimizer/learning_rate.py b/ppocr/optimizer/learning_rate.py
index 8f303e83..e1b10992 100644
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -18,6 +18,7 @@ from __future__ import print_function
from __future__ import unicode_literals
from paddle.optimizer import lr
+from .lr_scheduler import CyclicalCosineDecay
class Linear(object):
@@ -46,7 +47,7 @@ class Linear(object):
self.end_lr = end_lr
self.power = power
self.last_epoch = last_epoch
- self.warmup_epoch = warmup_epoch * step_each_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
def __call__(self):
learning_rate = lr.PolynomialDecay(
@@ -87,7 +88,7 @@ class Cosine(object):
self.learning_rate = learning_rate
self.T_max = step_each_epoch * epochs
self.last_epoch = last_epoch
- self.warmup_epoch = warmup_epoch * step_each_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
def __call__(self):
learning_rate = lr.CosineAnnealingDecay(
@@ -129,7 +130,7 @@ class Step(object):
self.learning_rate = learning_rate
self.gamma = gamma
self.last_epoch = last_epoch
- self.warmup_epoch = warmup_epoch * step_each_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
def __call__(self):
learning_rate = lr.StepDecay(
@@ -168,7 +169,7 @@ class Piecewise(object):
self.boundaries = [step_each_epoch * e for e in decay_epochs]
self.values = values
self.last_epoch = last_epoch
- self.warmup_epoch = warmup_epoch * step_each_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
def __call__(self):
learning_rate = lr.PiecewiseDecay(
@@ -183,3 +184,45 @@ class Piecewise(object):
end_lr=self.values[0],
last_epoch=self.last_epoch)
return learning_rate
+
+
+class CyclicalCosine(object):
+ """
+ Cyclical cosine learning rate decay
+ Args:
+ learning_rate(float): initial learning rate
+ step_each_epoch(int): steps each epoch
+ epochs(int): total training epochs
+ cycle(int): period of the cosine learning rate
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ learning_rate,
+ step_each_epoch,
+ epochs,
+ cycle,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(CyclicalCosine, self).__init__()
+ self.learning_rate = learning_rate
+ self.T_max = step_each_epoch * epochs
+ self.last_epoch = last_epoch
+ self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+ self.cycle = round(cycle * step_each_epoch)
+
+ def __call__(self):
+ learning_rate = CyclicalCosineDecay(
+ learning_rate=self.learning_rate,
+ T_max=self.T_max,
+ cycle=self.cycle,
+ last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr.LinearWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.learning_rate,
+ last_epoch=self.last_epoch)
+ return learning_rate
diff --git a/ppocr/optimizer/lr_scheduler.py b/ppocr/optimizer/lr_scheduler.py
new file mode 100644
index 00000000..21aec737
--- /dev/null
+++ b/ppocr/optimizer/lr_scheduler.py
@@ -0,0 +1,49 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from paddle.optimizer.lr import LRScheduler
+
+
+class CyclicalCosineDecay(LRScheduler):
+ def __init__(self,
+ learning_rate,
+ T_max,
+ cycle=1,
+ last_epoch=-1,
+ eta_min=0.0,
+ verbose=False):
+ """
+ Cyclical cosine learning rate decay
+ A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf
+ Args:
+ learning rate(float): learning rate
+ T_max(int): maximum epoch num
+ cycle(int): period of the cosine decay
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ eta_min(float): minimum learning rate during training
+ verbose(bool): whether to print learning rate for each epoch
+ """
+ super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch,
+ verbose)
+ self.cycle = cycle
+ self.eta_min = eta_min
+
+ def get_lr(self):
+ if self.last_epoch == 0:
+ return self.base_lr
+ reletive_epoch = self.last_epoch % self.cycle
+ lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \
+ (1 + math.cos(math.pi * reletive_epoch / self.cycle))
+ return lr
diff --git a/tools/program.py b/tools/program.py
index c712fe14..c2915426 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -179,9 +179,9 @@ def train(config,
if 'start_epoch' in best_model_dict:
start_epoch = best_model_dict['start_epoch']
else:
- start_epoch = 0
+ start_epoch = 1
- for epoch in range(start_epoch, epoch_num):
+ for epoch in range(start_epoch, epoch_num + 1):
if epoch > 0:
train_dataloader = build_dataloader(config, 'Train', device, logger)
train_batch_cost = 0.0
From c03b0b0604f238f918b30dff3c043fb467ea27f2 Mon Sep 17 00:00:00 2001
From: zhoujun
Date: Thu, 31 Dec 2020 16:31:21 +0800
Subject: [PATCH 50/60] Update utility.py
---
tools/infer/utility.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 966fa3cc..c19946eb 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -83,6 +83,7 @@ def parse_args():
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
parser.add_argument("--use_pdserving", type=str2bool, default=False)
+ parser.add_argument("--use_tensorrt", type=str2bool, default=False)
return parser.parse_args()
From 499ae66003c278cba948508af812adc80033496d Mon Sep 17 00:00:00 2001
From: zhoujun <572459439@qq.com>
Date: Fri, 1 Jan 2021 21:53:15 +0800
Subject: [PATCH 51/60] update utility
---
tools/infer/utility.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index c19946eb..966fa3cc 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -83,7 +83,6 @@ def parse_args():
parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
parser.add_argument("--use_pdserving", type=str2bool, default=False)
- parser.add_argument("--use_tensorrt", type=str2bool, default=False)
return parser.parse_args()
From c465f40417d24b9bd791b8c05344f1b5f52548fa Mon Sep 17 00:00:00 2001
From: WenmuZhou
Date: Mon, 4 Jan 2021 12:11:21 +0800
Subject: [PATCH 52/60] add tensorrt to hubserving
---
deploy/hubserving/ocr_cls/params.py | 2 +-
deploy/hubserving/ocr_det/params.py | 2 +-
deploy/hubserving/ocr_rec/params.py | 4 ++--
deploy/hubserving/ocr_system/params.py | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/deploy/hubserving/ocr_cls/params.py b/deploy/hubserving/ocr_cls/params.py
index 72a7a102..982f0136 100755
--- a/deploy/hubserving/ocr_cls/params.py
+++ b/deploy/hubserving/ocr_cls/params.py
@@ -18,7 +18,7 @@ def read_params():
cfg.cls_batch_num = 30
cfg.cls_thresh = 0.9
- cfg.use_zero_copy_run = False
cfg.use_pdserving = False
+ cfg.use_tensorrt = False
return cfg
diff --git a/deploy/hubserving/ocr_det/params.py b/deploy/hubserving/ocr_det/params.py
index e50decbb..13215890 100755
--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -27,7 +27,7 @@ def read_params():
# cfg.det_east_cover_thresh = 0.1
# cfg.det_east_nms_thresh = 0.2
- cfg.use_zero_copy_run = False
cfg.use_pdserving = False
+ cfg.use_tensorrt = False
return cfg
diff --git a/deploy/hubserving/ocr_rec/params.py b/deploy/hubserving/ocr_rec/params.py
index 6f428ecb..f8d29114 100644
--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
@@ -13,7 +13,7 @@ def read_params():
#params for text recognizer
cfg.rec_algorithm = "CRNN"
- cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
+ cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
@@ -23,7 +23,7 @@ def read_params():
cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
cfg.use_space_char = True
- cfg.use_zero_copy_run = False
cfg.use_pdserving = False
+ cfg.use_tensorrt = False
return cfg
diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py
index a0e1960b..add46666 100755
--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -47,8 +47,8 @@ def read_params():
cfg.cls_batch_num = 30
cfg.cls_thresh = 0.9
- cfg.use_zero_copy_run = False
cfg.use_pdserving = False
+ cfg.use_tensorrt = False
cfg.drop_score = 0.5
return cfg
From 554e57030b7c4a3fad397c159e185ee69c0863bb Mon Sep 17 00:00:00 2001
From: MissPenguin
Date: Mon, 4 Jan 2021 12:47:32 +0800
Subject: [PATCH 53/60] Update quickstart.md
---
doc/doc_ch/quickstart.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md
index eabf1d91..3cc1b529 100644
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -96,5 +96,5 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode
此外,文档教程中也提供了中文OCR模型的其他预测部署方式:
- [基于C++预测引擎推理](../../deploy/cpp_infer/readme.md)
-- [服务部署](../../deploy/pdserving/readme.md)
-- [端侧部署](../../deploy/lite/readme.md)
+- [服务部署](../../deploy/hubserving/readme.md)
+- [端侧部署(目前只支持静态图)](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/lite)
From c207f786bbbb867dff5af81c3717d00a7a4364d4 Mon Sep 17 00:00:00 2001
From: MissPenguin
Date: Mon, 4 Jan 2021 12:48:28 +0800
Subject: [PATCH 54/60] Update quickstart.md
---
doc/doc_ch/quickstart.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md
index 3cc1b529..d9460989 100644
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -96,5 +96,5 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode
此外,文档教程中也提供了中文OCR模型的其他预测部署方式:
- [基于C++预测引擎推理](../../deploy/cpp_infer/readme.md)
-- [服务部署](../../deploy/hubserving/readme.md)
+- [服务部署](../../deploy/hubserving)
- [端侧部署(目前只支持静态图)](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/lite)
From a486350b8e73ab9b8aa86aba2a5c07649da9c2d2 Mon Sep 17 00:00:00 2001
From: MissPenguin
Date: Mon, 4 Jan 2021 12:49:17 +0800
Subject: [PATCH 55/60] Update quickstart_en.md
---
doc/doc_en/quickstart_en.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md
index e351ecc6..a5c0881d 100644
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -99,5 +99,5 @@ For more text detection and recognition tandem reasoning, please refer to the do
In addition, the tutorial also provides other deployment methods for the Chinese OCR model:
- [Server-side C++ inference](../../deploy/cpp_infer/readme_en.md)
-- [Service deployment](../../deploy/pdserving/readme_en.md)
-- [End-to-end deployment](../../deploy/lite/readme_en.md)
+- [Service deployment](../../deploy/hubserving)
+- [End-to-end deployment](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/lite)
From c91baf7cc66aa2db1911c99eafe68c6d99c2011a Mon Sep 17 00:00:00 2001
From: tink2123
Date: Mon, 4 Jan 2021 13:21:41 +0800
Subject: [PATCH 56/60] add faq, 210104
---
README_ch.md | 2 +-
doc/doc_ch/FAQ.md | 72 +++++++++++++++++++++++++++++++++--------------
2 files changed, 52 insertions(+), 22 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index 3b0a2bfb..7aa43222 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支
**近期更新**
-- 2020.12.28 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数137个,每周一都会更新,欢迎大家持续关注。
+- 2021.1.4 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数142个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 1abf30cc..2ad59f3d 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -9,45 +9,48 @@
## PaddleOCR常见问题汇总(持续更新)
-* [近期更新(2020.12.28)](#近期更新)
+* [近期更新(2020.1.4)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
-* [【理论篇】OCR通用31个问题](#OCR通用问题)
+* [【理论篇】OCR通用32个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
- * [模型训练调优17题](#模型训练调优2)
-* [【实战篇】PaddleOCR实战96个问题](#PaddleOCR实战问题)
- * [使用咨询28题](#使用咨询)
+ * [模型训练调优18题](#模型训练调优2)
+* [【实战篇】PaddleOCR实战100个问题](#PaddleOCR实战问题)
+ * [使用咨询31题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优26题](#模型训练调优3)
- * [预测部署25题](#预测部署3)
+ * [预测部署26题](#预测部署3)
-## 近期更新(2020.12.28)
+## 近期更新(2021.1.4)
-#### Q3.1.25: 使用dygraph分支,在docker中训练PaddleOCR的时候,数据路径没有任何问题,但是一直报错`reader rasied an exception`,这是为什么呢?
+#### Q2.3.18: 请问有哪些修改骨干网络的技巧?
-**A** 创建docker的时候,`/dev/shm`的默认大小为64M,如果使用多进程读取数据,共享内存可能不够,因此需要给`/dev/shm`分配更大的空间,在创建docker的时候,传入`--shm-size=8g`表示给`/dev/shm`分配8g的空间。
+**A**: 可以参考HS-ResNet这篇文章:https://arxiv.org/pdf/2010.07621.pdf
-#### Q3.1.26: 在repo中没有找到Lite和PaddleServing相关的部署教程,这是在哪里呢?
+#### Q3.1.29: PPOCRLabel创建矩形框时只能拖出正方形,如何进行矩形标注?
-**A** 目前PaddleOCR的默认分支为dygraph,关于Lite和PaddleLite的动态图部署还在适配中,如果希望在Lite端或者使用PaddleServing部署,推荐使用develop分支(静态图)的代码。
+**A** 取消勾选:“编辑”-“正方形标注”
-#### Q3.1.27: 如何可视化acc,loss曲线图,模型网络结构图等?
+#### Q3.1.30: Style-Text 如何不文字风格迁移,就像普通文本生成程序一样默认字体直接输出到分割的背景图?
-**A** 在配置文件里有`use_visualdl`的参数,设置为True即可,更多的使用命令可以参考:[VisualDL使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/guides/03_VisualDL/visualdl.html)。
+**A** 使用image_synth模式会输出fake_bg.jpg,即为背景图。如果想要批量提取背景,可以稍微修改一下代码,将fake_bg保存下来即可。要修改的位置:
+https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/StyleText/engine/synthesisers.py#L68
-#### Q3.1.28: 在使用StyleText数据合成工具的时候,报错`ModuleNotFoundError: No module named 'utils.config'`,这是为什么呢?
+#### Q3.1.31: 怎么输出网络结构以及每层的参数信息?
-**A** 有2个解决方案
-- 在StyleText路径下面设置PYTHONPATH:`export PYTHONPATH=./`
-- 拉取最新的代码
-
-#### Q3.3.26: PaddleOCR在训练的时候一直使用cosine_decay的学习率下降策略,这是为什么呢?
-
-**A**: cosine_decay表示在训练的过程中,学习率按照cosine的变化趋势逐渐下降至0,在迭代轮数更长的情况下,比常量的学习率变化策略会有更好的收敛效果,因此在实际训练的时候,均采用了cosine_decay,来获得精度更高的模型。
+**A** 可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。
+### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
+**A**: 图片是在这里读取的:https://github.com/PaddlePaddle/PaddleOCR/blob/67ef25d593c4eabfaaceb22daade4577f53bed81/deploy/hubserving/ocr_system/module.py#L55,
+可以参考下面的写法,将url path转化为np array(https://cloud.tencent.com/developer/article/1467840)
+```
+response = request.urlopen('http://i1.whymtj.com/uploads/tu/201902/9999/52491ae4ba.jpg')
+img_array = np.array(bytearray(response.read()), dtype=np.uint8)
+img = cv.imdecode(img_array, -1)
+```
## 【精选】OCR精选10个问题
@@ -289,6 +292,10 @@
**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
+#### Q2.3.18: 请问有哪些修改骨干网络的技巧?
+
+**A**: 可以参考HS-ResNet这篇文章:https://arxiv.org/pdf/2010.07621.pdf
+
## 【实战篇】PaddleOCR实战问题
@@ -439,6 +446,19 @@ python3 -m pip install paddlepaddle-gpu==2.0.0rc1 -i https://mirror.baidu.com/py
- 在StyleText路径下面设置PYTHONPATH:`export PYTHONPATH=./`
- 拉取最新的代码
+#### Q3.1.29: PPOCRLabel创建矩形框时只能拖出正方形,如何进行矩形标注?
+
+**A** 取消勾选:“编辑”-“正方形标注”
+
+#### Q3.1.30: Style-Text 如何不文字风格迁移,就像普通文本生成程序一样默认字体直接输出到分割的背景图?
+
+**A** 使用image_synth模式会输出fake_bg.jpg,即为背景图。如果想要批量提取背景,可以稍微修改一下代码,将fake_bg保存下来即可。要修改的位置:
+https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/StyleText/engine/synthesisers.py#L68
+
+#### Q3.1.31: 怎么输出网络结构以及每层的参数信息?
+
+**A** 可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。
+
### 数据集
@@ -786,3 +806,13 @@ ps -axu | grep train.py | awk '{print $2}' | xargs kill -9
建议首先排查diff出现在检测模型还是识别模型,或者尝试换其他模型是否有类似的问题。
其次,检查python端和C++端数据处理部分是否存在差异,建议保存环境,更新PaddleOCR代码再试下。
如果更新代码或者更新代码都没能解决,建议在PaddleOCR微信群里或者issue中抛出您的问题。
+
+### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
+
+**A**: 图片是在这里读取的:https://github.com/PaddlePaddle/PaddleOCR/blob/67ef25d593c4eabfaaceb22daade4577f53bed81/deploy/hubserving/ocr_system/module.py#L55,
+可以参考下面的写法,将url path转化为np array(https://cloud.tencent.com/developer/article/1467840)
+```
+response = request.urlopen('http://i1.whymtj.com/uploads/tu/201902/9999/52491ae4ba.jpg')
+img_array = np.array(bytearray(response.read()), dtype=np.uint8)
+img = cv.imdecode(img_array, -1)
+```
From 10d7ecdbffefee6073790154052967a9dc2e4119 Mon Sep 17 00:00:00 2001
From: tink2123
Date: Mon, 4 Jan 2021 13:25:22 +0800
Subject: [PATCH 57/60] add faq, 210104
---
doc/doc_ch/FAQ.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index 2ad59f3d..ca680388 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -9,7 +9,7 @@
## PaddleOCR常见问题汇总(持续更新)
-* [近期更新(2020.1.4)](#近期更新)
+* [近期更新(2021.1.4)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
* [【理论篇】OCR通用32个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
From 0ae781e19acf2f753a26cd763fb18cec8abb6408 Mon Sep 17 00:00:00 2001
From: tink2123
Date: Mon, 4 Jan 2021 15:28:49 +0800
Subject: [PATCH 58/60] add faq, 210104
---
README_ch.md | 4 ++--
doc/doc_ch/FAQ.md | 26 +++++++++++++++-----------
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/README_ch.md b/README_ch.md
index 7aa43222..2a46ccea 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -101,8 +101,8 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- - [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
- - [【实战篇】PaddleOCR实战96个问题](./doc/doc_ch/FAQ.md)
+ - [【理论篇】OCR通用32个问题](./doc/doc_ch/FAQ.md)
+ - [【实战篇】PaddleOCR实战101个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index ca680388..b1d78539 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -11,24 +11,20 @@
* [近期更新(2021.1.4)](#近期更新)
* [【精选】OCR精选10个问题](#OCR精选10个问题)
-* [【理论篇】OCR通用32个问题](#OCR通用问题)
+* [【理论篇】OCR通用31个问题](#OCR通用问题)
* [基础知识7题](#基础知识)
* [数据集7题](#数据集2)
- * [模型训练调优18题](#模型训练调优2)
-* [【实战篇】PaddleOCR实战100个问题](#PaddleOCR实战问题)
+ * [模型训练调优17题](#模型训练调优2)
+* [【实战篇】PaddleOCR实战101个问题](#PaddleOCR实战问题)
* [使用咨询31题](#使用咨询)
* [数据集17题](#数据集3)
* [模型训练调优26题](#模型训练调优3)
- * [预测部署26题](#预测部署3)
+ * [预测部署27题](#预测部署3)
## 近期更新(2021.1.4)
-#### Q2.3.18: 请问有哪些修改骨干网络的技巧?
-
-**A**: 可以参考HS-ResNet这篇文章:https://arxiv.org/pdf/2010.07621.pdf
-
#### Q3.1.29: PPOCRLabel创建矩形框时只能拖出正方形,如何进行矩形标注?
**A** 取消勾选:“编辑”-“正方形标注”
@@ -52,6 +48,12 @@ img_array = np.array(bytearray(response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, -1)
```
+### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
+
+**A** 可以的,识别和检测模块是解耦的。如果想对检测部署,需要自己修改一下main函数,
+只保留检测相关就可以:https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/deploy/cpp_infer/src/main.cpp#L72
+
+
## 【精选】OCR精选10个问题
@@ -292,9 +294,6 @@ img = cv.imdecode(img_array, -1)
**A**:StyleText模型生成的数据主要用于OCR识别模型的训练。PaddleOCR目前识别模型的输入为32 x N,因此当前版本模型主要适用高度为32的数据。
建议要合成的数据尺寸设置为32 x N。尺寸相差不多的数据也可以生成,尺寸很大或很小的数据效果确实不佳。
-#### Q2.3.18: 请问有哪些修改骨干网络的技巧?
-
-**A**: 可以参考HS-ResNet这篇文章:https://arxiv.org/pdf/2010.07621.pdf
@@ -816,3 +815,8 @@ response = request.urlopen('http://i1.whymtj.com/uploads/tu/201902/9999/52491ae4
img_array = np.array(bytearray(response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, -1)
```
+
+### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
+
+**A** 可以的,识别和检测模块是解耦的。如果想对检测部署,需要自己修改一下main函数,
+只保留检测相关就可以:https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/deploy/cpp_infer/src/main.cpp#L72
From 7ada917dfde361885229646ef93c0fc3bf46ec68 Mon Sep 17 00:00:00 2001
From: tink2123
Date: Mon, 4 Jan 2021 15:29:56 +0800
Subject: [PATCH 59/60] add faq, 210104
---
README_ch.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README_ch.md b/README_ch.md
index 2a46ccea..163a19ed 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -101,7 +101,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [效果展示](#效果展示)
- FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- - [【理论篇】OCR通用32个问题](./doc/doc_ch/FAQ.md)
+ - [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战101个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md)
From c3e5522c38fb42ad721de2bae40425589f48b646 Mon Sep 17 00:00:00 2001
From: xiaoting <31891223+tink2123@users.noreply.github.com>
Date: Tue, 5 Jan 2021 11:27:46 +0800
Subject: [PATCH 60/60] format faq
---
doc/doc_ch/FAQ.md | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md
index b1d78539..da31d608 100755
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
@@ -27,20 +27,20 @@
#### Q3.1.29: PPOCRLabel创建矩形框时只能拖出正方形,如何进行矩形标注?
-**A** 取消勾选:“编辑”-“正方形标注”
+**A**: 取消勾选:“编辑”-“正方形标注”
#### Q3.1.30: Style-Text 如何不文字风格迁移,就像普通文本生成程序一样默认字体直接输出到分割的背景图?
-**A** 使用image_synth模式会输出fake_bg.jpg,即为背景图。如果想要批量提取背景,可以稍微修改一下代码,将fake_bg保存下来即可。要修改的位置:
+**A**: 使用image_synth模式会输出fake_bg.jpg,即为背景图。如果想要批量提取背景,可以稍微修改一下代码,将fake_bg保存下来即可。要修改的位置:
https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/StyleText/engine/synthesisers.py#L68
#### Q3.1.31: 怎么输出网络结构以及每层的参数信息?
-**A** 可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。
+**A**: 可以使用 `paddle.summary`, 具体参考:https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc1/api/paddle/hapi/model_summary/summary_cn.html#summary。
-### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
+#### Q3.4.26: 目前paddle hub serving 只支持 imgpath,如果我想用imgurl 去哪里改呢?
-**A**: 图片是在这里读取的:https://github.com/PaddlePaddle/PaddleOCR/blob/67ef25d593c4eabfaaceb22daade4577f53bed81/deploy/hubserving/ocr_system/module.py#L55,
+**A**: 图片是在这里读取的:https://github.com/PaddlePaddle/PaddleOCR/blob/67ef25d593c4eabfaaceb22daade4577f53bed81/deploy/hubserving/ocr_system/module.py#L55,
可以参考下面的写法,将url path转化为np array(https://cloud.tencent.com/developer/article/1467840)
```
response = request.urlopen('http://i1.whymtj.com/uploads/tu/201902/9999/52491ae4ba.jpg')
@@ -48,9 +48,9 @@ img_array = np.array(bytearray(response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, -1)
```
-### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
+#### Q3.4.27: C++ 端侧部署可以只对OCR的检测部署吗?
-**A** 可以的,识别和检测模块是解耦的。如果想对检测部署,需要自己修改一下main函数,
+**A**: 可以的,识别和检测模块是解耦的。如果想对检测部署,需要自己修改一下main函数,
只保留检测相关就可以:https://github.com/PaddlePaddle/PaddleOCR/blob/de3e2e7cd3b8b65ee02d7a41e570fa5b511a3c1d/deploy/cpp_infer/src/main.cpp#L72