diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md
index ecb29f9b..a545b860 100644
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -162,7 +162,7 @@ inference/
sh tools/build.sh
```
-具体地,`tools/build.sh`中内容如下。
+Specifically, the content in `tools/build.sh` is as follows.
```shell
OPENCV_DIR=your_opencv_dir
@@ -201,6 +201,8 @@ make -j
sh tools/run.sh
```
+* If you want to orientation classifier to correct the detected boxes, you can set `use_angle_cls` in the file `tools/config.txt` as 1 to enable the function.
+
The detection results will be shown on the screen, which is as follows.
diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp
index 1dd33b30..e708a6e3 100644
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -53,6 +53,15 @@ int main(int argc, char **argv) {
config.cpu_math_library_num_threads, config.use_mkldnn,
config.use_zero_copy_run, config.max_side_len, config.det_db_thresh,
config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize);
+
+ Classifier *cls = nullptr;
+ if (config.use_angle_cls == true) {
+ cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
+ config.gpu_mem, config.cpu_math_library_num_threads,
+ config.use_mkldnn, config.use_zero_copy_run,
+ config.cls_thresh);
+ }
+
CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.use_zero_copy_run,
@@ -62,7 +71,7 @@ int main(int argc, char **argv) {
std::vector
>> boxes;
det.Run(srcimg, boxes);
- rec.Run(boxes, srcimg);
+ rec.Run(boxes, srcimg, cls);
auto end = std::chrono::system_clock::now();
auto duration =
diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp
new file mode 100644
index 00000000..7cdaaab4
--- /dev/null
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -0,0 +1,110 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+
+namespace PaddleOCR {
+
+cv::Mat Classifier::Run(cv::Mat &img) {
+ cv::Mat src_img;
+ img.copyTo(src_img);
+ cv::Mat resize_img;
+
+ std::vector rec_image_shape = {3, 32, 100};
+ int index = 0;
+ float wh_ratio = float(img.cols) / float(img.rows);
+
+ this->resize_op_.Run(img, resize_img, rec_image_shape);
+
+ this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
+ this->is_scale_);
+
+ std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+
+ this->permute_op_.Run(&resize_img, input.data());
+
+ // Inference.
+ if (this->use_zero_copy_run_) {
+ auto input_names = this->predictor_->GetInputNames();
+ auto input_t = this->predictor_->GetInputTensor(input_names[0]);
+ input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+ input_t->copy_from_cpu(input.data());
+ this->predictor_->ZeroCopyRun();
+ } else {
+ paddle::PaddleTensor input_t;
+ input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
+ input_t.data =
+ paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
+ input_t.dtype = PaddleDType::FLOAT32;
+ std::vector outputs;
+ this->predictor_->Run({input_t}, &outputs, 1);
+ }
+
+ std::vector softmax_out;
+ std::vector label_out;
+ auto output_names = this->predictor_->GetOutputNames();
+ auto softmax_out_t = this->predictor_->GetOutputTensor(output_names[0]);
+ auto label_out_t = this->predictor_->GetOutputTensor(output_names[1]);
+ auto softmax_shape_out = softmax_out_t->shape();
+ auto label_shape_out = label_out_t->shape();
+
+ int softmax_out_num =
+ std::accumulate(softmax_shape_out.begin(), softmax_shape_out.end(), 1,
+ std::multiplies());
+
+ int label_out_num =
+ std::accumulate(label_shape_out.begin(), label_shape_out.end(), 1,
+ std::multiplies());
+ softmax_out.resize(softmax_out_num);
+ label_out.resize(label_out_num);
+
+ softmax_out_t->copy_to_cpu(softmax_out.data());
+ label_out_t->copy_to_cpu(label_out.data());
+
+ int label = label_out[0];
+ float score = softmax_out[label];
+ // std::cout << "\nlabel "< this->cls_thresh) {
+ cv::rotate(src_img, src_img, 1);
+ }
+ return src_img;
+}
+
+void Classifier::LoadModel(const std::string &model_dir) {
+ AnalysisConfig config;
+ config.SetModel(model_dir + "/model", model_dir + "/params");
+
+ if (this->use_gpu_) {
+ config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+ } else {
+ config.DisableGpu();
+ if (this->use_mkldnn_) {
+ config.EnableMKLDNN();
+ }
+ config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+ }
+
+ // false for zero copy tensor
+ config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
+ // true for multiple input
+ config.SwitchSpecifyInputNames(true);
+
+ config.SwitchIrOptim(true);
+
+ config.EnableMemoryOptim();
+ config.DisableGlogInfo();
+
+ this->predictor_ = CreatePaddlePredictor(config);
+}
+} // namespace PaddleOCR
diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp
index bf94abce..48fad9c5 100644
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -108,9 +108,11 @@ void DBDetector::Run(cv::Mat &img,
const double maxvalue = 255;
cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
-
+ cv::Mat dilation_map;
+ cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2,2));
+ cv::dilate(bit_map, dilation_map, dila_ele);
boxes = post_processor_.BoxesFromBitmap(
- pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_);
+ pred_map, dilation_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
index b997d829..7f88adc5 100644
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -17,7 +17,7 @@
namespace PaddleOCR {
void CRNNRecognizer::Run(std::vector>> boxes,
- cv::Mat &img) {
+ cv::Mat &img, Classifier *cls) {
cv::Mat srcimg;
img.copyTo(srcimg);
cv::Mat crop_img;
@@ -27,6 +27,9 @@ void CRNNRecognizer::Run(std::vector>> boxes,
int index = 0;
for (int i = boxes.size() - 1; i >= 0; i--) {
crop_img = GetRotateCropImage(srcimg, boxes[i]);
+ if (cls != nullptr) {
+ crop_img = cls->Run(crop_img);
+ }
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp
index 59115dc4..8c44a54a 100644
--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@@ -294,7 +294,7 @@ PostProcessor::FilterTagDetRes(std::vector>> boxes,
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
- if (rect_width <= 10 || rect_height <= 10)
+ if (rect_width <= 4 || rect_height <= 4)
continue;
root_points.push_back(boxes[n]);
}
diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp
index 0078063e..b44e9d02 100644
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -116,4 +116,26 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
cv::INTER_LINEAR);
}
+void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+ const std::vector &rec_image_shape) {
+ int imgC, imgH, imgW;
+ imgC = rec_image_shape[0];
+ imgH = rec_image_shape[1];
+ imgW = rec_image_shape[2];
+
+ float ratio = float(img.cols) / float(img.rows);
+ int resize_w, resize_h;
+ if (ceilf(imgH * ratio) > imgW)
+ resize_w = imgW;
+ else
+ resize_w = int(ceilf(imgH * ratio));
+
+ cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
+ cv::INTER_LINEAR);
+ if (resize_w < imgW) {
+ cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
+ cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+ }
+}
+
} // namespace PaddleOCR
\ No newline at end of file
diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt
index 6c53f29e..7e03b9d1 100644
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -10,9 +10,14 @@ use_zero_copy_run 1
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
-det_db_unclip_ratio 2.0
+det_db_unclip_ratio 1.6
det_model_dir ./inference/det_db
+# cls config
+use_angle_cls 0
+cls_model_dir ./inference/cls
+cls_thresh 0.9
+
# rec config
rec_model_dir ./inference/rec_crnn
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
diff --git a/deploy/hubserving/ocr_det/params.py b/deploy/hubserving/ocr_det/params.py
index e88ab45c..f37993a1 100644
--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -13,7 +13,7 @@ def read_params():
#params for text detector
cfg.det_algorithm = "DB"
- cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+ cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
cfg.det_max_side_len = 960
#DB parmas
diff --git a/deploy/hubserving/ocr_rec/params.py b/deploy/hubserving/ocr_rec/params.py
index 59772e21..58a8bc11 100644
--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
@@ -28,7 +28,7 @@ def read_params():
#params for text recognizer
cfg.rec_algorithm = "CRNN"
- cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+ cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py
index 0ff56d37..d83fe692 100644
--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -10,10 +10,10 @@ class Config(object):
def read_params():
cfg = Config()
-
+
#params for text detector
cfg.det_algorithm = "DB"
- cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+ cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
cfg.det_max_side_len = 960
#DB parmas
@@ -28,7 +28,7 @@ def read_params():
#params for text recognizer
cfg.rec_algorithm = "CRNN"
- cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+ cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
@@ -38,6 +38,13 @@ def read_params():
cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
cfg.use_space_char = True
+ #params for text classifier
+ cfg.use_angle_cls = False
+ cfg.cls_model_dir = "./inference/ch_ppocr_mobile-v1.1.cls_infer/"
+ cfg.cls_image_shape = "3, 48, 192"
+ cfg.label_list = ['0', '180']
+ cfg.cls_batch_num = 30
+
cfg.use_zero_copy_run = False
return cfg
diff --git a/doc/doc_ch/serving.md b/deploy/hubserving/readme.md
similarity index 83%
rename from doc/doc_ch/serving.md
rename to deploy/hubserving/readme.md
index 99fe3006..5d29b432 100644
--- a/doc/doc_ch/serving.md
+++ b/deploy/hubserving/readme.md
@@ -1,10 +1,12 @@
-# 服务部署
+[English](readme_en.md) | 简体中文
PaddleOCR提供2种服务部署方式:
-- 基于HubServing的部署:已集成到PaddleOCR中([code](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/hubserving)),按照本教程使用;
-- 基于PaddleServing的部署:详见PaddleServing官网[demo](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/ocr),后续也将集成到PaddleOCR。
+- 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",按照本教程使用;
+- 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",使用方法参考[文档](../pdserving/readme.md)。
-服务部署目录下包括检测、识别、2阶段串联三种服务包,根据需求选择相应的服务包进行安装和启动。目录如下:
+# 基于PaddleHub Serving的服务部署
+
+hubserving服务部署目录下包括检测、识别、2阶段串联三种服务包,请根据需求选择相应的服务包进行安装和启动。目录结构如下:
```
deploy/hubserving/
└─ ocr_det 检测模块服务包
@@ -30,11 +32,18 @@ pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
# 在Linux下设置环境变量
export PYTHONPATH=.
-# 在Windows下设置环境变量
+
+# 或者,在Windows下设置环境变量
SET PYTHONPATH=.
```
-### 2. 安装服务模块
+### 2. 下载推理模型
+安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是v1.1版的超轻量模型,默认检测模型路径为:
+`./inference/ch_ppocr_mobile_v1.1_det_infer/`,识别模型路径为:`./inference/ch_ppocr_mobile_v1.1_rec_infer/`。
+
+**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的[模型库](../../doc/doc_ch/models_list.md)下载,也可以替换成自己训练转换好的模型。
+
+### 3. 安装服务模块
PaddleOCR提供3种服务模块,根据需要安装所需模块。
* 在Linux环境下,安装示例如下:
@@ -61,15 +70,7 @@ hub install deploy\hubserving\ocr_rec\
hub install deploy\hubserving\ocr_system\
```
-#### 安装模型
-安装服务模块前,需要将训练好的模型放到对应的文件夹内。默认使用的是:
-./inference/ch_det_mv3_db/
-和
-./inference/ch_rec_mv3_crnn/
-这两个模型可以在https://github.com/PaddlePaddle/PaddleOCR 下载
-可以在./deploy/hubserving/ocr_system/params.py 里面修改成自己的模型
-
-### 3. 启动服务
+### 4. 启动服务
#### 方式1. 命令行命令启动(仅支持CPU)
**启动命令:**
```shell
@@ -172,7 +173,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
```hub serving stop --port/-p XXXX```
- 2、 到相应的`module.py`和`params.py`等文件中根据实际需求修改代码。
-例如,如果需要替换部署服务所用模型,则需要到`params.py`中修改模型路径参数`det_model_dir`和`rec_model_dir`,当然,同时可能还需要修改其他相关参数,请根据实际情况修改调试。 建议修改后先直接运行`module.py`调试,能正确运行预测后再启动服务测试。
+例如,如果需要替换部署服务所用模型,则需要到`params.py`中修改模型路径参数`det_model_dir`和`rec_model_dir`,当然,同时可能还需要修改其他相关参数,请根据实际情况修改调试。 **强烈建议修改后先直接运行`module.py`调试,能正确运行预测后再启动服务测试。**
- 3、 卸载旧服务包
```hub uninstall ocr_system```
diff --git a/doc/doc_en/serving_en.md b/deploy/hubserving/readme_en.md
similarity index 84%
rename from doc/doc_en/serving_en.md
rename to deploy/hubserving/readme_en.md
index 7439cc84..efef1cda 100644
--- a/doc/doc_en/serving_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -1,10 +1,12 @@
-# Service deployment
+English | [简体中文](readme.md)
-PaddleOCR provides 2 service deployment methods::
-- Based on **HubServing**:Has been integrated into PaddleOCR ([code](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/hubserving)). Please follow this tutorial.
-- Based on **PaddleServing**:See PaddleServing official website for details ([demo](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/ocr)). Follow-up will also be integrated into PaddleOCR.
+PaddleOCR provides 2 service deployment methods:
+- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please follow this tutorial.
+- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please refer to the [tutorial](../pdserving/readme_en.md) for usage.
-The service deployment directory includes three service packages: detection, recognition, and two-stage series connection. Select the corresponding service package to install and start service according to your needs. The directory is as follows:
+# Service deployment based on PaddleHub Serving
+
+The hubserving service deployment directory includes three service packages: detection, recognition, and two-stage series connection. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:
```
deploy/hubserving/
└─ ocr_det detection module service package
@@ -31,11 +33,17 @@ pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
# Set environment variables on Linux
export PYTHONPATH=.
+
# Set environment variables on Windows
SET PYTHONPATH=.
```
-### 2. Install Service Module
+### 2. Download inference model
+Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v1.1 is used, and the default detection model path is: `./inference/ch_ppocr_mobile_v1.1_det_infer/`, the default recognition model path is: `./inference/ch_ppocr_mobile_v1.1_rec_infer/`.
+
+**The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.
+
+### 3. Install Service Module
PaddleOCR provides 3 kinds of service modules, install the required modules according to your needs.
* On Linux platform, the examples are as follows.
@@ -62,7 +70,7 @@ hub install deploy\hubserving\ocr_rec\
hub install deploy\hubserving\ocr_system\
```
-### 3. Start service
+### 4. Start service
#### Way 1. Start with command line parameters (CPU only)
**start command:**
diff --git a/deploy/lite/Makefile b/deploy/lite/Makefile
index 96e05ecf..4c30d644 100644
--- a/deploy/lite/Makefile
+++ b/deploy/lite/Makefile
@@ -40,8 +40,8 @@ CXX_LIBS = ${OPENCV_LIBS} -L$(LITE_ROOT)/cxx/lib/ -lpaddle_light_api_shared $(SY
#CXX_LIBS = $(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a $(SYSTEM_LIBS)
-ocr_db_crnn: fetch_opencv ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o
- $(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o -o ocr_db_crnn $(CXX_LIBS) $(LDFLAGS)
+ocr_db_crnn: fetch_opencv ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o cls_process.o
+ $(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o cls_process.o -o ocr_db_crnn $(CXX_LIBS) $(LDFLAGS)
ocr_db_crnn.o: ocr_db_crnn.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o ocr_db_crnn.o -c ocr_db_crnn.cc
@@ -49,6 +49,9 @@ ocr_db_crnn.o: ocr_db_crnn.cc
crnn_process.o: fetch_opencv crnn_process.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o crnn_process.o -c crnn_process.cc
+cls_process.o: fetch_opencv cls_process.cc
+ $(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o cls_process.o -c cls_process.cc
+
db_post_process.o: fetch_clipper fetch_opencv db_post_process.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o db_post_process.o -c db_post_process.cc
@@ -73,5 +76,5 @@ fetch_opencv:
.PHONY: clean
clean:
- rm -f ocr_db_crnn.o clipper.o db_post_process.o crnn_process.o
+ rm -f ocr_db_crnn.o clipper.o db_post_process.o crnn_process.o cls_process.o
rm -f ocr_db_crnn
diff --git a/deploy/lite/cls_process.cc b/deploy/lite/cls_process.cc
new file mode 100644
index 00000000..f522e4bc
--- /dev/null
+++ b/deploy/lite/cls_process.cc
@@ -0,0 +1,43 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cls_process.h" //NOLINT
+#include
+#include
+#include
+
+const std::vector rec_image_shape{3, 32, 100};
+
+cv::Mat ClsResizeImg(cv::Mat img) {
+ int imgC, imgH, imgW;
+ imgC = rec_image_shape[0];
+ imgH = rec_image_shape[1];
+ imgW = rec_image_shape[2];
+
+ float ratio = static_cast(img.cols) / static_cast(img.rows);
+
+ int resize_w, resize_h;
+ if (ceilf(imgH * ratio) > imgW)
+ resize_w = imgW;
+ else
+ resize_w = int(ceilf(imgH * ratio));
+ cv::Mat resize_img;
+ cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
+ cv::INTER_LINEAR);
+ if (resize_w < imgW) {
+ cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
+ cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+ }
+ return resize_img;
+}
\ No newline at end of file
diff --git a/deploy/lite/cls_process.h b/deploy/lite/cls_process.h
new file mode 100644
index 00000000..eedeeb9b
--- /dev/null
+++ b/deploy/lite/cls_process.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "math.h" //NOLINT
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+
+cv::Mat ClsResizeImg(cv::Mat img);
\ No newline at end of file
diff --git a/deploy/lite/config.txt b/deploy/lite/config.txt
index 8ed835dd..f08f8e49 100644
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -1,4 +1,4 @@
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
-det_db_unclip_ratio 2.0
\ No newline at end of file
+det_db_unclip_ratio 1.6
\ No newline at end of file
diff --git a/deploy/lite/db_post_process.cc b/deploy/lite/db_post_process.cc
index eac4d6d2..495016bc 100644
--- a/deploy/lite/db_post_process.cc
+++ b/deploy/lite/db_post_process.cc
@@ -293,7 +293,7 @@ FilterTagDetRes(std::vector>> boxes, float ratio_h,
rect_height =
static_cast(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
- if (rect_width <= 10 || rect_height <= 10)
+ if (rect_width <= 4 || rect_height <= 4)
continue;
root_points.push_back(boxes[n]);
}
diff --git a/deploy/lite/ocr_db_crnn.cc b/deploy/lite/ocr_db_crnn.cc
index c94062fd..07bec2c6 100644
--- a/deploy/lite/ocr_db_crnn.cc
+++ b/deploy/lite/ocr_db_crnn.cc
@@ -15,6 +15,7 @@
#include "paddle_api.h" // NOLINT
#include
+#include "cls_process.h"
#include "crnn_process.h"
#include "db_post_process.h"
@@ -105,11 +106,55 @@ cv::Mat DetResizeImg(const cv::Mat img, int max_size_len,
return resize_img;
}
+cv::Mat RunClsModel(cv::Mat img, std::shared_ptr predictor_cls,
+ const float thresh = 0.5) {
+ std::vector mean = {0.5f, 0.5f, 0.5f};
+ std::vector scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
+
+ cv::Mat srcimg;
+ img.copyTo(srcimg);
+ cv::Mat crop_img;
+ cv::Mat resize_img;
+
+ int index = 0;
+ float wh_ratio =
+ static_cast(crop_img.cols) / static_cast(crop_img.rows);
+
+ resize_img = ClsResizeImg(crop_img);
+ resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
+
+ const float *dimg = reinterpret_cast(resize_img.data);
+
+ std::unique_ptr input_tensor0(std::move(predictor_cls->GetInput(0)));
+ input_tensor0->Resize({1, 3, resize_img.rows, resize_img.cols});
+ auto *data0 = input_tensor0->mutable_data();
+
+ NeonMeanScale(dimg, data0, resize_img.rows * resize_img.cols, mean, scale);
+ // Run CLS predictor
+ predictor_cls->Run();
+
+ // Get output and run postprocess
+ std::unique_ptr softmax_out(
+ std::move(predictor_cls->GetOutput(0)));
+ std::unique_ptr label_out(
+ std::move(predictor_cls->GetOutput(1)));
+ auto *softmax_scores = softmax_out->mutable_data();
+ auto *label_idxs = label_out->data();
+ int label_idx = label_idxs[0];
+ float score = softmax_scores[label_idx];
+
+ if (label_idx % 2 == 1 && score > thresh) {
+ cv::rotate(srcimg, srcimg, 1);
+ }
+ return srcimg;
+}
+
void RunRecModel(std::vector>> boxes, cv::Mat img,
std::shared_ptr predictor_crnn,
std::vector &rec_text,
std::vector &rec_text_score,
- std::vector charactor_dict) {
+ std::vector charactor_dict,
+ std::shared_ptr predictor_cls) {
std::vector mean = {0.5f, 0.5f, 0.5f};
std::vector scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
@@ -121,6 +166,7 @@ void RunRecModel(std::vector>> boxes, cv::Mat img,
int index = 0;
for (int i = boxes.size() - 1; i >= 0; i--) {
crop_img = GetRotateCropImage(srcimg, boxes[i]);
+ crop_img = RunClsModel(crop_img, predictor_cls);
float wh_ratio =
static_cast(crop_img.cols) / static_cast(crop_img.rows);
@@ -243,8 +289,10 @@ RunDetModel(std::shared_ptr predictor, cv::Mat img,
const double maxvalue = 255;
cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
-
- auto boxes = BoxesFromBitmap(pred_map, bit_map, Config);
+ cv::Mat dilation_map;
+ cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2,2));
+ cv::dilate(bit_map, dilation_map, dila_ele);
+ auto boxes = BoxesFromBitmap(pred_map, dilation_map, Config);
std::vector>> filter_boxes =
FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg);
@@ -323,8 +371,9 @@ int main(int argc, char **argv) {
}
std::string det_model_file = argv[1];
std::string rec_model_file = argv[2];
- std::string img_path = argv[3];
- std::string dict_path = argv[4];
+ std::string cls_model_file = argv[3];
+ std::string img_path = argv[4];
+ std::string dict_path = argv[5];
//// load config from txt file
auto Config = LoadConfigTxt("./config.txt");
@@ -333,6 +382,7 @@ int main(int argc, char **argv) {
auto det_predictor = loadModel(det_model_file);
auto rec_predictor = loadModel(rec_model_file);
+ auto cls_predictor = loadModel(cls_model_file);
auto charactor_dict = ReadDict(dict_path);
charactor_dict.push_back(" ");
@@ -343,7 +393,7 @@ int main(int argc, char **argv) {
std::vector rec_text;
std::vector rec_text_score;
RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
- charactor_dict);
+ charactor_dict, cls_predictor);
auto end = std::chrono::system_clock::now();
auto duration =
diff --git a/deploy/pdserving/readme.md b/deploy/pdserving/readme.md
index f9ad80b8..af12d508 100644
--- a/deploy/pdserving/readme.md
+++ b/deploy/pdserving/readme.md
@@ -1,5 +1,10 @@
-# Paddle Serving 服务部署(Beta)
+[English](readme_en.md) | 简体中文
+PaddleOCR提供2种服务部署方式:
+- 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",使用方法参考[文档](../hubserving/readme.md)。
+- 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",按照本教程使用。
+
+# Paddle Serving 服务部署
本教程将介绍基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)部署PaddleOCR在线预测服务的详细步骤。
## 快速启动服务
@@ -14,36 +19,19 @@
**操作系统版本:CentOS 6以上**
-**Python3操作指南:**
+**Python版本: 2.7/3.6/3.7**
+
+**Python操作指南:**
```
-#以下提供beta版本的paddle serving whl包,欢迎试用,正式版会在8月中正式上线
-#GPU用户下载server包使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server_gpu-0.3.2-py3-none-any.whl
-python -m pip install paddle_serving_server_gpu-0.3.2-py3-none-any.whl
-#CPU版本使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server-0.3.2-py3-none-any.whl
-python -m pip install paddle_serving_server-0.3.2-py3-none-any.whl
+#CPU/GPU版本选择一个
+#GPU版本服务端
+python -m pip install paddle_serving_server_gpu
+#CPU版本服务端
+python -m pip install paddle_serving_server
#客户端和App包使用以下链接(CPU,GPU通用)
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_client-0.3.2-cp36-none-any.whl
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_app-0.1.2-py3-none-any.whl
-python -m pip install paddle_serving_app-0.1.2-py3-none-any.whl paddle_serving_client-0.3.2-cp36-none-any.whl
+python -m pip install paddle_serving_app paddle_serving_client
```
-**Python2操作指南:**
-```
-#以下提供beta版本的paddle serving whl包,欢迎试用,正式版会在8月中正式上线
-#GPU用户下载server包使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server_gpu-0.3.2-py2-none-any.whl
-python -m pip install paddle_serving_server_gpu-0.3.2-py2-none-any.whl
-#CPU版本使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server-0.3.2-py2-none-any.whl
-python -m pip install paddle_serving_server-0.3.2-py2-none-any.whl
-
-#客户端和App包使用以下链接(CPU,GPU通用)
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_app-0.1.2-py2-none-any.whl
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_client-0.3.2-cp27-none-any.whl
-python -m pip install paddle_serving_app-0.1.2-py2-none-any.whl paddle_serving_client-0.3.2-cp27-none-any.whl
-```
### 2. 模型转换
可以使用`paddle_serving_app`提供的模型,执行下列命令
diff --git a/deploy/pdserving/readme_en.md b/deploy/pdserving/readme_en.md
new file mode 100644
index 00000000..9a0c684f
--- /dev/null
+++ b/deploy/pdserving/readme_en.md
@@ -0,0 +1,123 @@
+English | [简体中文](readme.md)
+
+PaddleOCR provides 2 service deployment methods:
+- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please refer to the [tutorial](../hubserving/readme_en.md) for usage.
+- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please follow this tutorial.
+
+# Service deployment based on Paddle Serving
+
+This tutorial will introduce the detail steps of deploying PaddleOCR online prediction service based on [Paddle Serving](https://github.com/PaddlePaddle/Serving).
+
+## Quick start service
+
+### 1. Prepare the environment
+Let's first install the relevant components of Paddle Serving. GPU is recommended for service deployment with Paddle Serving.
+
+**Requirements:**
+- **CUDA version: 9.0**
+- **CUDNN version: 7.0**
+- **Operating system version: >= CentOS 6**
+- **Python version: 2.7/3.6/3.7**
+
+**Installation:**
+```
+# install GPU server
+python -m pip install paddle_serving_server_gpu
+
+# or, install CPU server
+python -m pip install paddle_serving_server
+
+# install client and App package (CPU/GPU)
+python -m pip install paddle_serving_app paddle_serving_client
+```
+
+### 2. Model transformation
+You can directly use converted model provided by `paddle_serving_app` for convenience. Execute the following command to obtain:
+```
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+python -m paddle_serving_app.package --get_model ocr_det
+tar -xzvf ocr_det.tar.gz
+```
+Executing the above command will download the `db_crnn_mobile` model, which is in different format with inference model. If you want to use other models for deployment, you can refer to the [tutorial](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md) to convert your inference model to a model which is deployable for Paddle Serving.
+
+We take `ch_rec_r34_vd_crnn` model as example. Download the inference model by executing the following command:
+```
+wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar
+tar xf ch_rec_r34_vd_crnn_infer.tar
+```
+
+Convert the downloaded model by executing the following python script:
+```
+from paddle_serving_client.io import inference_model_to_serving
+inference_model_dir = "ch_rec_r34_vd_crnn"
+serving_client_dir = "serving_client_dir"
+serving_server_dir = "serving_server_dir"
+feed_var_names, fetch_var_names = inference_model_to_serving(
+ inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")
+```
+
+Finally, model configuration of client and server will be generated in `serving_client_dir` and `serving_server_dir`.
+
+### 3. Start service
+Start the standard version or the fast version service according to your actual needs. The comparison of the two versions is shown in the table below:
+
+|version|characteristics|recommended scenarios|
+|-|-|-|
+|standard version|High stability, suitable for distributed deployment|Large throughput and cross regional deployment|
+|fast version|Easy to deploy and fast to predict|Suitable for scenarios which requires high prediction speed and fast iteration speed|
+
+#### Mode 1. Start the standard mode service
+
+```
+# start with CPU
+python -m paddle_serving_server.serve --model ocr_det_model --port 9293
+python ocr_web_server.py cpu
+
+# or, with GPU
+python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
+python ocr_web_server.py gpu
+```
+
+#### Mode 2. Start the fast mode service
+
+```
+# start with CPU
+python ocr_local_server.py cpu
+
+# or, with GPU
+python ocr_local_server.py gpu
+```
+
+## Send prediction requests
+
+```
+python ocr_web_client.py
+```
+
+## Returned result format
+
+The returned result is a JSON string, eg.
+```
+{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}}
+```
+
+You can also print the readable result in `res`:
+```
+土地整治与土壤修复研究中心
+华南农业大学1素图
+```
+
+## User defined service module modification
+
+The pre-processing and post-processing process, can be found in the `preprocess` and `postprocess` function in `ocr_web_server.py` or `ocr_local_server.py`. The pre-processing/post-processing library for common CV models provided by `paddle_serving_app` is called.
+You can modify the corresponding code as actual needs.
+
+If you only want to start the detection service or the recognition service, execute the corresponding script reffering to the following table. Indicate the CPU or GPU is used in the start command parameters.
+
+| task | standard | fast |
+| ---- | ----------------- | ------------------- |
+| detection | det_web_server.py | det_local_server.py |
+| recognition | rec_web_server.py | rec_local_server.py |
+
+More info can be found in [Paddle Serving](https://github.com/PaddlePaddle/Serving).
diff --git a/deploy/slim/prune/README_ch.md b/deploy/slim/prune/README_ch.md
new file mode 100644
index 00000000..fbd9921d
--- /dev/null
+++ b/deploy/slim/prune/README_ch.md
@@ -0,0 +1,180 @@
+\> 运行示例前请先安装develop版本PaddleSlim
+
+
+
+# 模型裁剪压缩教程
+
+压缩结果:
+
+
+
+ 序号
+ 任务
+ 模型
+ 压缩策略[3] [4]
+ 精度(自建中文数据集)
+ 耗时[1] (ms)
+ 整体耗时[2] (ms)
+ 加速比
+ 整体模型大小(M)
+ 压缩比例
+ 下载链接
+
+
+
+
+ 0
+ 检测
+ MobileNetV3_DB
+ 无
+ 61.7
+ 224
+ 375
+ -
+ 8.6
+ -
+
+
+
+ 识别
+ MobileNetV3_CRNN
+ 无
+ 62.0
+ 9.52
+
+
+
+ 1
+ 检测
+ SlimTextDet
+ PACT量化训练
+ 62.1
+ 195
+ 348
+ 8%
+ 2.8
+ 67.82%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+ 2
+ 检测
+ SlimTextDet_quat_pruning
+ 剪裁+PACT量化训练
+ 60.86
+ 142
+ 288
+ 30%
+ 2.8
+ 67.82%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+ 3
+ 检测
+ SlimTextDet_pruning
+ 剪裁
+ 61.57
+ 138
+ 295
+ 27%
+ 2.9
+ 66.28%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+
+
+
+## 概述
+
+复杂的模型有利于提高模型的性能,但也导致模型中存在一定冗余,模型裁剪通过移出网络模型中的子模型来减少这种冗余,达到减少模型计算复杂度,提高模型推理性能的目的。
+
+该示例使用PaddleSlim提供的[裁剪压缩API](https://paddlepaddle.github.io/PaddleSlim/api/prune_api/)对OCR模型进行压缩。
+
+在阅读该示例前,建议您先了解以下内容:
+
+
+
+\- [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
+
+\- [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)
+
+
+
+## 安装PaddleSlim
+
+```bash
+
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+
+cd Paddleslim
+
+python setup.py install
+
+```
+
+
+## 获取预训练模型
+[检测预训练模型下载地址]()
+
+
+## 敏感度分析训练
+ 加载预训练模型后,通过对现有模型的每个网络层进行敏感度分析,了解各网络层冗余度,从而决定每个网络层的裁剪比例。敏感度分析的具体细节见:[敏感度分析](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
+
+进入PaddleOCR根目录,通过以下命令对模型进行敏感度分析:
+
+```bash
+
+python deploy/slim/prune/sensitivity_anal.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./deploy/slim/prune/pretrain_models/det_mv3_db/best_accuracy Global.test_batch_size_per_card=1
+
+```
+
+
+
+## 裁剪模型与fine-tune
+ 裁剪时通过之前的敏感度分析文件决定每个网络层的裁剪比例。在具体实现时,为了尽可能多的保留从图像中提取的低阶特征,我们跳过了backbone中靠近输入的4个卷积层。同样,为了减少由于裁剪导致的模型性能损失,我们通过之前敏感度分析所获得的敏感度表,挑选出了一些冗余较少,对裁剪较为敏感的[网络层](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/slim/prune/pruning_and_finetune.py#L41),并在之后的裁剪过程中选择避开这些网络层。裁剪过后finetune的过程沿用OCR检测模型原始的训练策略。
+
+```bash
+
+python deploy/slim/prune/pruning_and_finetune.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./deploy/slim/prune/pretrain_models/det_mv3_db/best_accuracy Global.test_batch_size_per_card=1
+
+```
+
+
+
+
+
+## 导出模型
+
+在得到裁剪训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
+
+```bash
+
+python deploy/slim/prune/export_prune_model.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./output/det_db/best_accuracy Global.test_batch_size_per_card=1 Global.save_inference_dir=inference_model
+
+```
diff --git a/deploy/slim/prune/README_en.md b/deploy/slim/prune/README_en.md
new file mode 100644
index 00000000..d854c107
--- /dev/null
+++ b/deploy/slim/prune/README_en.md
@@ -0,0 +1,183 @@
+\> PaddleSlim develop version should be installed before runing this example.
+
+
+
+# Model compress tutorial (Pruning)
+
+Compress results:
+
+
+
+ ID
+ Task
+ Model
+ Compress Strategy[3] [4]
+ Criterion(Chinese dataset)
+ Inference Time[1] (ms)
+ Inference Time(Total model)[2] (ms)
+ Acceleration Ratio
+ Model Size(MB)
+ Commpress Ratio
+ Download Link
+
+
+
+
+ 0
+ Detection
+ MobileNetV3_DB
+ None
+ 61.7
+ 224
+ 375
+ -
+ 8.6
+ -
+
+
+
+ Recognition
+ MobileNetV3_CRNN
+ None
+ 62.0
+ 9.52
+
+
+
+ 1
+ Detection
+ SlimTextDet
+ PACT Quant Aware Training
+ 62.1
+ 195
+ 348
+ 8%
+ 2.8
+ 67.82%
+
+
+
+ Recognition
+ SlimTextRec
+ PACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+ 2
+ Detection
+ SlimTextDet_quat_pruning
+ Pruning+PACT Quant Aware Training
+ 60.86
+ 142
+ 288
+ 30%
+ 2.8
+ 67.82%
+
+
+
+ Recognition
+ SlimTextRec
+ PPACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+ 3
+ Detection
+ SlimTextDet_pruning
+ Pruning
+ 61.57
+ 138
+ 295
+ 27%
+ 2.9
+ 66.28%
+
+
+
+ Recognition
+ SlimTextRec
+ PACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+
+
+
+## Overview
+
+Generally, a more complex model would achive better performance in the task, but it also leads to some redundancy in the model. Model Pruning is a technique that reduces this redundancy by removing the sub-models in the neural network model, so as to reduce model calculation complexity and improve model inference performance.
+
+This example uses PaddleSlim provided[APIs of Pruning](https://paddlepaddle.github.io/PaddleSlim/api/prune_api/) to compress the OCR model.
+
+It is recommended that you could understand following pages before reading this example,:
+
+
+
+\- [The training strategy of OCR model](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
+
+\- [PaddleSlim Document](https://paddlepaddle.github.io/PaddleSlim/)
+
+
+
+## Install PaddleSlim
+
+```bash
+
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+
+cd Paddleslim
+
+python setup.py install
+
+```
+
+
+## Download Pretrain Model
+
+[Download link of Detection pretrain model]()
+
+
+## Pruning sensitivity analysis
+
+ After the pre-training model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, thereby determining the pruning ratio of each network layer. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
+
+Enter the PaddleOCR root directory,perform sensitivity analysis on the model with the following command:
+
+```bash
+
+python deploy/slim/prune/sensitivity_anal.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./deploy/slim/prune/pretrain_models/det_mv3_db/best_accuracy Global.test_batch_size_per_card=1
+
+```
+
+
+
+## Model pruning and Fine-tune
+
+ When pruning, the previous sensitivity analysis file would determines the pruning ratio of each network layer. In the specific implementation, in order to retain as many low-level features extracted from the image as possible, we skipped the 4 convolutional layers close to the input in the backbone. Similarly, in order to reduce the model performance loss caused by pruning, we selected some of the less redundant and more sensitive [network layer](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/slim/prune/pruning_and_finetune.py#L41) through the sensitivity table obtained from the previous sensitivity analysis.And choose to skip these network layers in the subsequent pruning process. After pruning, the model need a finetune process to recover the performance and the training strategy of finetune is similar to the strategy of training original OCR detection model.
+
+```bash
+
+python deploy/slim/prune/pruning_and_finetune.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./deploy/slim/prune/pretrain_models/det_mv3_db/best_accuracy Global.test_batch_size_per_card=1
+
+```
+
+
+
+
+
+## Export inference model
+
+After getting the model after pruning and finetuning we, can export it as inference_model for predictive deployment:
+
+```bash
+
+python deploy/slim/prune/export_prune_model.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./output/det_db/best_accuracy Global.test_batch_size_per_card=1 Global.save_inference_dir=inference_model
+
+```
diff --git a/deploy/slim/prune/export_prune_model.py b/deploy/slim/prune/export_prune_model.py
new file mode 100644
index 00000000..0603966f
--- /dev/null
+++ b/deploy/slim/prune/export_prune_model.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..', '..', '..'))
+sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))
+
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.save_load import init_model
+from paddleslim.prune import load_model
+
+
+def main():
+ startup_prog, eval_program, place, config, _ = program.preprocess()
+
+ feeded_var_names, target_vars, fetches_var_name = program.build_export(
+ config, eval_program, startup_prog)
+ eval_program = eval_program.clone(for_test=True)
+ exe = fluid.Executor(place)
+ exe.run(startup_prog)
+
+ if config['Global']['checkpoints'] is not None:
+ path = config['Global']['checkpoints']
+ else:
+ path = config['Global']['pretrain_weights']
+
+ load_model(exe, eval_program, path)
+
+ save_inference_dir = config['Global']['save_inference_dir']
+ if not os.path.exists(save_inference_dir):
+ os.makedirs(save_inference_dir)
+ fluid.io.save_inference_model(
+ dirname=save_inference_dir,
+ feeded_var_names=feeded_var_names,
+ main_program=eval_program,
+ target_vars=target_vars,
+ executor=exe,
+ model_filename='model',
+ params_filename='params')
+ print("inference model saved in {}/model and {}/params".format(
+ save_inference_dir, save_inference_dir))
+ print("save success, output_name_list:", fetches_var_name)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/deploy/slim/prune/pruning_and_finetune.py b/deploy/slim/prune/pruning_and_finetune.py
new file mode 100644
index 00000000..0a03cb44
--- /dev/null
+++ b/deploy/slim/prune/pruning_and_finetune.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import numpy as np
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..', '..', '..'))
+sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))
+
+import tools.program as program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from ppocr.utils.character import CharacterOps
+from ppocr.utils.utility import initial_logger
+from paddleslim.prune import Pruner, save_model
+from paddleslim.analysis import flops
+from paddleslim.core.graph_wrapper import *
+from paddleslim.prune import load_sensitivities, get_ratios_by_loss, merge_sensitive
+logger = initial_logger()
+
+skip_list = [
+ 'conv10_linear_weights', 'conv11_linear_weights', 'conv12_expand_weights',
+ 'conv12_linear_weights', 'conv12_se_2_weights', 'conv13_linear_weights',
+ 'conv2_linear_weights', 'conv4_linear_weights', 'conv5_expand_weights',
+ 'conv5_linear_weights', 'conv5_se_2_weights', 'conv6_linear_weights',
+ 'conv7_linear_weights', 'conv8_expand_weights', 'conv8_linear_weights',
+ 'conv9_expand_weights', 'conv9_linear_weights'
+]
+
+
+def main():
+ config = program.load_config(FLAGS.config)
+ program.merge_config(FLAGS.opt)
+ logger.info(config)
+
+ # check if set use_gpu=True in paddlepaddle cpu version
+ use_gpu = config['Global']['use_gpu']
+ program.check_gpu(use_gpu)
+
+ alg = config['Global']['algorithm']
+ assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
+ if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+ config['Global']['char_ops'] = CharacterOps(config['Global'])
+
+ place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+ startup_program = fluid.Program()
+ train_program = fluid.Program()
+ train_build_outputs = program.build(
+ config, train_program, startup_program, mode='train')
+ train_loader = train_build_outputs[0]
+ train_fetch_name_list = train_build_outputs[1]
+ train_fetch_varname_list = train_build_outputs[2]
+ train_opt_loss_name = train_build_outputs[3]
+
+ eval_program = fluid.Program()
+ eval_build_outputs = program.build(
+ config, eval_program, startup_program, mode='eval')
+ eval_fetch_name_list = eval_build_outputs[1]
+ eval_fetch_varname_list = eval_build_outputs[2]
+ eval_program = eval_program.clone(for_test=True)
+
+ train_reader = reader_main(config=config, mode="train")
+ train_loader.set_sample_list_generator(train_reader, places=place)
+
+ eval_reader = reader_main(config=config, mode="eval")
+
+ exe = fluid.Executor(place)
+ exe.run(startup_program)
+
+ # compile program for multi-devices
+ init_model(config, train_program, exe)
+
+ sen = load_sensitivities("sensitivities_0.data")
+ for i in skip_list:
+ sen.pop(i)
+ back_bone_list = ['conv' + str(x) for x in range(1, 5)]
+ for i in back_bone_list:
+ for key in list(sen.keys()):
+ if i + '_' in key:
+ sen.pop(key)
+ ratios = get_ratios_by_loss(sen, 0.03)
+ logger.info("FLOPs before pruning: {}".format(flops(eval_program)))
+ pruner = Pruner(criterion='geometry_median')
+ print("ratios: {}".format(ratios))
+ pruned_val_program, _, _ = pruner.prune(
+ eval_program,
+ fluid.global_scope(),
+ params=ratios.keys(),
+ ratios=ratios.values(),
+ place=place,
+ only_graph=True)
+
+ pruned_program, _, _ = pruner.prune(
+ train_program,
+ fluid.global_scope(),
+ params=ratios.keys(),
+ ratios=ratios.values(),
+ place=place)
+ logger.info("FLOPs after pruning: {}".format(flops(pruned_val_program)))
+ train_compile_program = program.create_multi_devices_program(
+ pruned_program, train_opt_loss_name)
+
+
+ train_info_dict = {'compile_program':train_compile_program,\
+ 'train_program':pruned_program,\
+ 'reader':train_loader,\
+ 'fetch_name_list':train_fetch_name_list,\
+ 'fetch_varname_list':train_fetch_varname_list}
+
+ eval_info_dict = {'program':pruned_val_program,\
+ 'reader':eval_reader,\
+ 'fetch_name_list':eval_fetch_name_list,\
+ 'fetch_varname_list':eval_fetch_varname_list}
+
+ if alg in ['EAST', 'DB']:
+ program.train_eval_det_run(
+ config, exe, train_info_dict, eval_info_dict, is_pruning=True)
+ else:
+ program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
+
+
+if __name__ == '__main__':
+ parser = program.ArgsParser()
+ FLAGS = parser.parse_args()
+ main()
diff --git a/deploy/slim/prune/sensitivity_anal.py b/deploy/slim/prune/sensitivity_anal.py
new file mode 100644
index 00000000..b416f09a
--- /dev/null
+++ b/deploy/slim/prune/sensitivity_anal.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..', '..', '..'))
+sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))
+
+import json
+import cv2
+from paddle import fluid
+import paddleslim as slim
+from copy import deepcopy
+from tools.eval_utils.eval_det_utils import eval_det_run
+
+from tools import program
+from ppocr.utils.utility import initial_logger
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from ppocr.utils.character import CharacterOps
+from ppocr.utils.utility import create_module
+from ppocr.data.reader_main import reader_main
+
+logger = initial_logger()
+
+
+def get_pruned_params(program):
+ params = []
+ for param in program.global_block().all_parameters():
+ if len(
+ param.shape
+ ) == 4 and 'depthwise' not in param.name and 'transpose' not in param.name:
+ params.append(param.name)
+ return params
+
+
+def eval_function(eval_args, mode='eval'):
+ exe = eval_args['exe']
+ config = eval_args['config']
+ eval_info_dict = eval_args['eval_info_dict']
+ metrics = eval_det_run(exe, config, eval_info_dict, mode=mode)
+ return metrics['hmean']
+
+
+def main():
+ config = program.load_config(FLAGS.config)
+ program.merge_config(FLAGS.opt)
+ logger.info(config)
+
+ # check if set use_gpu=True in paddlepaddle cpu version
+ use_gpu = config['Global']['use_gpu']
+ program.check_gpu(use_gpu)
+
+ alg = config['Global']['algorithm']
+ assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
+ if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+ config['Global']['char_ops'] = CharacterOps(config['Global'])
+
+ place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+ startup_prog = fluid.Program()
+ eval_program = fluid.Program()
+ eval_build_outputs = program.build(
+ config, eval_program, startup_prog, mode='test')
+ eval_fetch_name_list = eval_build_outputs[1]
+ eval_fetch_varname_list = eval_build_outputs[2]
+ eval_program = eval_program.clone(for_test=True)
+ exe = fluid.Executor(place)
+ exe.run(startup_prog)
+
+ init_model(config, eval_program, exe)
+
+ eval_reader = reader_main(config=config, mode="eval")
+ eval_info_dict = {'program':eval_program,\
+ 'reader':eval_reader,\
+ 'fetch_name_list':eval_fetch_name_list,\
+ 'fetch_varname_list':eval_fetch_varname_list}
+ eval_args = dict()
+ eval_args = {'exe': exe, 'config': config, 'eval_info_dict': eval_info_dict}
+ metrics = eval_function(eval_args)
+ print("Baseline: {}".format(metrics))
+
+ params = get_pruned_params(eval_program)
+ print('Start to analyze')
+ sens_0 = slim.prune.sensitivity(
+ eval_program,
+ place,
+ params,
+ eval_function,
+ sensitivities_file="sensitivities_0.data",
+ pruned_ratios=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
+ eval_args=eval_args,
+ criterion='geometry_median')
+
+
+if __name__ == '__main__':
+ parser = program.ArgsParser()
+ FLAGS = parser.parse_args()
+ main()
diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md
index f2e92f54..d1aa3d71 100755
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@@ -1,21 +1,148 @@
> 运行示例前请先安装1.2.0或更高版本PaddleSlim
+
# 模型量化压缩教程
+压缩结果:
+
+
+
+ 序号
+ 任务
+ 模型
+ 压缩策略
+ 精度(自建中文数据集)
+ 耗时(ms)
+ 整体耗时(ms)
+ 加速比
+ 整体模型大小(M)
+ 压缩比例
+ 下载链接
+
+
+
+
+ 0
+ 检测
+ MobileNetV3_DB
+ 无
+ 61.7
+ 224
+ 375
+ -
+ 8.6
+ -
+
+
+
+ 识别
+ MobileNetV3_CRNN
+ 无
+ 62.0
+ 9.52
+
+
+
+ 1
+ 检测
+ SlimTextDet
+ PACT量化训练
+ 62.1
+ 195
+ 348
+ 8%
+ 2.8
+ 67.82%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+ 2
+ 检测
+ SlimTextDet_quat_pruning
+ 剪裁+PACT量化训练
+ 60.86
+ 142
+ 288
+ 30%
+ 2.8
+ 67.82%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+ 3
+ 检测
+ SlimTextDet_pruning
+ 剪裁
+ 61.57
+ 138
+ 295
+ 27%
+ 2.9
+ 66.28%
+
+
+
+ 识别
+ SlimTextRec
+ PACT量化训练
+ 61.48
+ 8.6
+
+
+
+
+
+
+
## 概述
+复杂的模型有利于提高模型的性能,但也导致模型中存在一定冗余,模型量化将全精度缩减到定点数减少这种冗余,达到减少模型计算复杂度,提高模型推理性能的目的。
+
该示例使用PaddleSlim提供的[量化压缩API](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)对OCR模型进行压缩。
在阅读该示例前,建议您先了解以下内容:
- [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
-- [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)
+- [PaddleSlim使用文档](https://paddleslim.readthedocs.io/zh_CN/latest/index.html)
+
+
## 安装PaddleSlim
-可按照[PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)中的步骤安装PaddleSlim。
+```bash
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+
+cd Paddleslim
+
+python setup.py install
+```
+
+
+
+## 获取预训练模型
+
+[识别预训练模型下载地址]()
+
+[检测预训练模型下载地址]()
## 量化训练
+加载预训练模型后,在定义好量化策略后即可对模型进行量化。量化相关功能的使用具体细节见:[模型量化](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/quantization_api.html)
进入PaddleOCR根目录,通过以下命令对模型进行量化:
@@ -25,10 +152,11 @@ python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global
-## 评估并导出
+
+## 导出模型
在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
```bash
-python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_model
+python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_inference_model
```
diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
new file mode 100755
index 00000000..4b8a2b23
--- /dev/null
+++ b/deploy/slim/quantization/README_en.md
@@ -0,0 +1,167 @@
+\> PaddleSlim 1.2.0 or higher version should be installed before runing this example.
+
+
+
+# Model compress tutorial (Quantization)
+
+Compress results:
+
+
+
+ ID
+ Task
+ Model
+ Compress Strategy
+ Criterion(Chinese dataset)
+ Inference Time(ms)
+ Inference Time(Total model)(ms)
+ Acceleration Ratio
+ Model Size(MB)
+ Commpress Ratio
+ Download Link
+
+
+
+
+ 0
+ Detection
+ MobileNetV3_DB
+ None
+ 61.7
+ 224
+ 375
+ -
+ 8.6
+ -
+
+
+
+ Recognition
+ MobileNetV3_CRNN
+ None
+ 62.0
+ 9.52
+
+
+
+ 1
+ Detection
+ SlimTextDet
+ PACT Quant Aware Training
+ 62.1
+ 195
+ 348
+ 8%
+ 2.8
+ 67.82%
+
+
+
+ Recognition
+ SlimTextRec
+ PACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+ 2
+ Detection
+ SlimTextDet_quat_pruning
+ Pruning+PACT Quant Aware Training
+ 60.86
+ 142
+ 288
+ 30%
+ 2.8
+ 67.82%
+
+
+
+ Recognition
+ SlimTextRec
+ PPACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+ 3
+ Detection
+ SlimTextDet_pruning
+ Pruning
+ 61.57
+ 138
+ 295
+ 27%
+ 2.9
+ 66.28%
+
+
+
+ Recognition
+ SlimTextRec
+ PACT Quant Aware Training
+ 61.48
+ 8.6
+
+
+
+
+
+
+
+## Overview
+
+Generally, a more complex model would achive better performance in the task, but it also leads to some redundancy in the model. Quantization is a technique that reduces this redundancyby reducing the full precision data to a fixed number, so as to reduce model calculation complexity and improve model inference performance.
+
+This example uses PaddleSlim provided [APIs of Quantization](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/) to compress the OCR model.
+
+It is recommended that you could understand following pages before reading this example,:
+
+
+
+- [The training strategy of OCR model](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md)
+
+- [PaddleSlim Document](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)
+
+
+
+## Install PaddleSlim
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+
+cd Paddleslim
+
+python setup.py install
+
+```
+
+
+## Download Pretrain Model
+
+[Download link of Detection pretrain model]()
+
+[Download link of recognization pretrain model]()
+
+
+## Quan-Aware Training
+
+After loading the pre training model, the model can be quantified after defining the quantization strategy. For specific details of quantization method, see:[Model Quantization](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/quantization_api.html)
+
+Enter the PaddleOCR root directory,perform model quantization with the following command:
+
+```bash
+python deploy/slim/prune/sensitivity_anal.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./deploy/slim/prune/pretrain_models/det_mv3_db/best_accuracy Global.test_batch_size_per_card=1
+```
+
+
+
+## Export inference model
+
+After getting the model after pruning and finetuning we, can export it as inference_model for predictive deployment:
+
+```bash
+python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_inference_model
+```
diff --git a/doc/datasets/doc.jpg b/doc/datasets/doc.jpg
new file mode 100644
index 00000000..f57e62ab
Binary files /dev/null and b/doc/datasets/doc.jpg differ
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
new file mode 100644
index 00000000..9c2499f3
--- /dev/null
+++ b/doc/doc_ch/algorithm_overview.md
@@ -0,0 +1,78 @@
+
+## 算法介绍
+- [1.文本检测算法](#文本检测算法)
+- [2.文本识别算法](#文本识别算法)
+
+
+### 1.文本检测算法
+
+PaddleOCR开源的文本检测算法列表:
+- [x] DB([paper](https://arxiv.org/abs/1911.08947))(ppocr推荐)
+- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
+- [x] SAST([paper](https://arxiv.org/abs/1908.05498))
+
+在ICDAR2015文本检测公开数据集上,算法效果如下:
+
+|模型|骨干网络|precision|recall|Hmean|下载链接|
+|-|-|-|-|-|-|
+|EAST|ResNet50_vd|88.18%|85.51%|86.82%|[下载链接](https://paddleocr.bj.bcebos.com/det_r50_vd_east.tar)|
+|EAST|MobileNetV3|81.67%|79.83%|80.74%|[下载链接](https://paddleocr.bj.bcebos.com/det_mv3_east.tar)|
+|DB|ResNet50_vd|83.79%|80.65%|82.19%|[下载链接](https://paddleocr.bj.bcebos.com/det_r50_vd_db.tar)|
+|DB|MobileNetV3|75.92%|73.18%|74.53%|[下载链接](https://paddleocr.bj.bcebos.com/det_mv3_db.tar)|
+|SAST|ResNet50_vd|92.18%|82.96%|87.33%|[下载链接](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_icdar2015.tar)|
+
+在Total-text文本检测公开数据集上,算法效果如下:
+
+|模型|骨干网络|precision|recall|Hmean|下载链接|
+|-|-|-|-|-|-|
+|SAST|ResNet50_vd|88.74%|79.80%|84.03%|[下载链接](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)|
+
+**说明:** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:[百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi)
+
+
+使用[LSVT](./datasets.md#1icdar2019-lsvt)街景数据集共3w张数据,训练中文检测模型的相关配置和预训练文件如下:
+
+|模型|骨干网络|配置文件|预训练模型|
+|-|-|-|-|
+|超轻量中文模型|MobileNetV3|det_mv3_db.yml|[下载链接](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar)|
+|通用中文OCR模型|ResNet50_vd|det_r50_vd_db.yml|[下载链接](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar)|
+
+* 注: 上述DB模型的训练和评估,需设置后处理参数box_thresh=0.6,unclip_ratio=1.5,使用不同数据集、不同模型训练,可调整这两个参数进行优化
+
+PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训练/评估中的文本检测部分](./detection.md)。
+
+
+### 2.文本识别算法
+
+PaddleOCR开源的文本识别算法列表:
+- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))(ppocr推荐)
+- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
+- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))
+- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))
+- [x] SRN([paper](https://arxiv.org/abs/2003.12294))
+
+参考[DTRB](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
+
+|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
+|-|-|-|-|-|
+|Rosetta|Resnet34_vd|80.24%|rec_r34_vd_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_r34_vd_none_none_ctc.tar)|
+|Rosetta|MobileNetV3|78.16%|rec_mv3_none_none_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_mv3_none_none_ctc.tar)|
+|CRNN|Resnet34_vd|82.20%|rec_r34_vd_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_r34_vd_none_bilstm_ctc.tar)|
+|CRNN|MobileNetV3|79.37%|rec_mv3_none_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_mv3_none_bilstm_ctc.tar)|
+|STAR-Net|Resnet34_vd|83.93%|rec_r34_vd_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_r34_vd_tps_bilstm_ctc.tar)|
+|STAR-Net|MobileNetV3|81.56%|rec_mv3_tps_bilstm_ctc|[下载链接](https://paddleocr.bj.bcebos.com/rec_mv3_tps_bilstm_ctc.tar)|
+|RARE|Resnet34_vd|84.90%|rec_r34_vd_tps_bilstm_attn|[下载链接](https://paddleocr.bj.bcebos.com/rec_r34_vd_tps_bilstm_attn.tar)|
+|RARE|MobileNetV3|83.32%|rec_mv3_tps_bilstm_attn|[下载链接](https://paddleocr.bj.bcebos.com/rec_mv3_tps_bilstm_attn.tar)|
+|SRN|Resnet50_vd_fpn|88.33%|rec_r50fpn_vd_none_srn|[下载链接](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar)|
+
+**说明:** SRN模型使用了数据扰动方法对上述提到对两个训练集进行增广,增广后的数据可以在[百度网盘](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA)上下载,提取码: y3ry。
+原始论文使用两阶段训练平均精度为89.74%,PaddleOCR中使用one-stage训练,平均精度为88.33%。两种预训练权重均在[下载链接](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar)中。
+
+使用[LSVT](./datasets.md#1icdar2019-lsvt)街景数据集根据真值将图crop出来30w数据,进行位置校准。此外基于LSVT语料生成500w合成数据训练中文模型,相关配置和预训练文件如下:
+
+|模型|骨干网络|配置文件|预训练模型|
+|-|-|-|-|
+|超轻量中文模型|MobileNetV3|rec_chinese_lite_train.yml|[下载链接](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar)|
+|通用中文OCR模型|Resnet34_vd|rec_chinese_common_train.yml|[下载链接](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar)|
+
+PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。
diff --git a/doc/doc_ch/angle_class.md b/doc/doc_ch/angle_class.md
new file mode 100644
index 00000000..b2118661
--- /dev/null
+++ b/doc/doc_ch/angle_class.md
@@ -0,0 +1,127 @@
+## 文字角度分类
+
+### 数据准备
+
+请按如下步骤设置数据集:
+
+训练数据的默认存储路径是 `PaddleOCR/train_data/cls`,如果您的磁盘上已有数据集,只需创建软链接至数据集目录:
+
+```
+ln -sf /train_data/cls/dataset
+```
+
+请参考下文组织您的数据。
+- 训练集
+
+首先请将训练图片放入同一个文件夹(train_images),并用一个txt文件(cls_gt_train.txt)记录图片路径和标签。
+
+**注意:** 默认请将图片路径和图片标签用 `\t` 分割,如用其他方式分割将造成训练报错
+
+0和180分别表示图片的角度为0度和180度
+
+```
+" 图像文件名 图像标注信息 "
+
+train_data/cls/word_001.jpg 0
+train_data/cls/word_002.jpg 180
+```
+
+最终训练集应有如下文件结构:
+```
+|-train_data
+ |-cls
+ |- cls_gt_train.txt
+ |- train
+ |- word_001.png
+ |- word_002.jpg
+ |- word_003.jpg
+ | ...
+```
+
+- 测试集
+
+同训练集类似,测试集也需要提供一个包含所有图片的文件夹(test)和一个cls_gt_test.txt,测试集的结构如下所示:
+
+```
+|-train_data
+ |-cls
+ |- 和一个cls_gt_test.txt
+ |- test
+ |- word_001.jpg
+ |- word_002.jpg
+ |- word_003.jpg
+ | ...
+```
+
+### 启动训练
+
+PaddleOCR提供了训练脚本、评估脚本和预测脚本。
+
+开始训练:
+
+*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
+
+```
+# 设置PYTHONPATH路径
+export PYTHONPATH=$PYTHONPATH:.
+# GPU训练 支持单卡,多卡训练,通过CUDA_VISIBLE_DEVICES指定卡号
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+# 启动训练
+python3 tools/train.py -c configs/cls/cls_mv3.yml
+```
+
+- 数据增强
+
+PaddleOCR提供了多种数据增强方式,如果您希望在训练时加入扰动,请在配置文件中设置 `distort: true`。
+
+默认的扰动方式有:颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse),随机数据增强(RandAugment)。
+
+训练过程中除随机数据增强外每种扰动方式以50%的概率被选择,具体代码实现请参考:
+[randaugment.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/cls/randaugment.py)
+[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+
+*由于OpenCV的兼容性问题,扰动操作暂时只支持linux*
+
+### 训练
+
+PaddleOCR支持训练和评估交替进行, 可以在 `configs/cls/cls_mv3.yml` 中修改 `eval_batch_step` 设置评估频率,默认每500个iter评估一次。评估过程中默认将最佳acc模型,保存为 `output/cls_mv3/best_accuracy` 。
+
+如果验证集很大,测试将会比较耗时,建议减少评估次数,或训练完再进行评估。
+
+**注意,预测/评估时的配置文件请务必与训练一致。**
+
+### 评估
+
+评估数据集可以通过`configs/cls/cls_reader.yml` 修改EvalReader中的 `label_file_path` 设置。
+
+*注意* 评估时必须确保配置文件中 infer_img 字段为空
+```
+export CUDA_VISIBLE_DEVICES=0
+# GPU 评估, Global.checkpoints 为待测权重
+python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+```
+
+### 预测
+
+* 训练引擎的预测
+
+使用 PaddleOCR 训练好的模型,可以通过以下脚本进行快速预测。
+
+默认预测图片存储在 `infer_img` 里,通过 `-o Global.checkpoints` 指定权重:
+
+```
+# 预测分类结果
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
+```
+
+预测图片:
+
+
+
+得到输入图像的预测结果:
+
+```
+infer_img: doc/imgs_words/en/word_1.png
+ scores: [[0.93161047 0.06838956]]
+ label: [0]
+```
diff --git a/doc/doc_ch/benchmark.md b/doc/doc_ch/benchmark.md
index 65d9a534..520a2fce 100644
--- a/doc/doc_ch/benchmark.md
+++ b/doc/doc_ch/benchmark.md
@@ -1,29 +1,51 @@
# Benchmark
-本文给出了PaddleOCR超轻量中文模型(8.6M)在各平台的预测耗时benchmark。
+本文给出了中英文OCR系列模型精度指标和在各平台预测耗时的benchmark。
## 测试数据
-- 从中文公开数据集[ICDAR2017-RCTW](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/datasets.md#ICDAR2017-RCTW-17)中随机采样**500**张图像。
-该集合大部分图片是通过手机摄像头在野外采集的。有些是截图。这些图片展示了各种各样的场景,包括街景、海报、菜单、室内场景和手机应用程序的截图。
+针对OCR实际应用场景,包括合同,车牌,铭牌,火车票,化验单,表格,证书,街景文字,名片,数码显示屏等,收集的300张图像,每张图平均有17个文本框,下图给出了一些图像示例。
+
+
+
+
## 评估指标
-在四种平台上的预测耗时指标如下:
-|长边尺寸(px)|T4(s)|V100(s)|Intel至强6148(s)|骁龙855(s)|
-|-|-|-|-|-|
-|960|0.092|0.057|0.319|0.354|
-|640|0.067|0.045|0.198|0.236|
-|480|0.057|0.043|0.151|0.175|
-
-说明:
+说明:
+- v1.0是未添加优化策略的DB+CRNN模型,v1.1是添加多种优化策略和方向分类器的PP-OCR模型。slim_v1.1是使用裁剪或量化的模型。
+- 检测输入图像的的长边尺寸是960。
- 评估耗时阶段为图像输入到结果输出的完整阶段,包括了图像的预处理和后处理。
-- `Intel至强6148`为服务器端CPU型号,测试中使用Intel MKL-DNN 加速CPU预测速度,使用该操作需要:
- - 更新到飞桨latest版本:https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-dev ,请根据自己环境的CUDA版本和Python版本选择相应的mkl版wheel包,如,CUDA10、Python3.7环境,应操作:
- ```shell
- # 获取安装包
- wget https://paddle-wheel.bj.bcebos.com/0.0.0-gpu-cuda10-cudnn7-mkl/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
- # 安装
- pip3.7 install paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
- ```
- - 预测时使用参数打开加速开关: `--enable_mkldnn True`
+- `Intel至强6148`为服务器端CPU型号,测试中使用Intel MKL-DNN 加速。
- `骁龙855`为移动端处理平台型号。
+
+不同预测模型大小和整体识别精度对比
+
+| 模型名称 | 整体模型 大小\(M\) | 检测模型 大小\(M\) | 方向分类器 模型大小\(M\) | 识别模型 大小\(M\) | 整体识别 F\-score |
+|:-:|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 |
+| ch\_ppocr\_server\_v1\.1 | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.5414 |
+| ch\_ppocr\_mobile\_v1\.0 | 8\.6 | 4\.1 | \- | 4\.5 | 0\.393 |
+| ch\_ppocr\_server\_v1\.0 | 203\.8 | 98\.5 | \- | 105\.3 | 0\.4436 |
+
+不同预测模型在T4 GPU上预测速度对比,单位ms
+
+| 模型名称 | 整体 | 检测 | 方向分类器 | 识别 |
+|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24 | 78 |
+| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25 | 140 |
+| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \- | 76 |
+| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \- | 147 |
+
+不同预测模型在CPU上预测速度对比,单位ms
+
+| 模型名称 | 整体 | 检测 | 方向分类器 | 识别 |
+|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 421 | 164 | 51 | 206 |
+| ch\_ppocr\_mobile\_v1\.0 | 398 | 219 | \- | 179 |
+
+裁剪量化模型和原始模型模型大小,整体识别精度和在SD 855上预测速度对比
+
+| 模型名称 | 整体模型 大小\(M\) | 检测模型 大小\(M\) | 方向分类器 模型大小\(M\) | 识别模型 大小\(M\) | 整体识别 F\-score | SD 855 \(ms\) |
+|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | 306 |
+| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5 | 1\.4 | 0\.5 | 1\.6 | 0\.521 | 268 |
diff --git a/doc/doc_ch/customize.md b/doc/doc_ch/customize.md
index 6e471c1c..5944bf08 100644
--- a/doc/doc_ch/customize.md
+++ b/doc/doc_ch/customize.md
@@ -6,7 +6,7 @@
PaddleOCR提供了EAST、DB两种文本检测算法,均支持MobileNetV3、ResNet50_vd两种骨干网络,根据需要选择相应的配置文件,启动训练。例如,训练使用MobileNetV3作为骨干网络的DB检测模型(即超轻量模型使用的配置):
```
-python3 tools/train.py -c configs/det/det_mv3_db.yml
+python3 tools/train.py -c configs/det/det_mv3_db.yml 2>&1 | tee det_db.log
```
更详细的数据准备和训练教程参考文档教程中[文本检测模型训练/评估/预测](./detection.md)。
@@ -14,7 +14,7 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml
PaddleOCR提供了CRNN、Rosetta、STAR-Net、RARE四种文本识别算法,均支持MobileNetV3、ResNet34_vd两种骨干网络,根据需要选择相应的配置文件,启动训练。例如,训练使用MobileNetV3作为骨干网络的CRNN识别模型(即超轻量模型使用的配置):
```
-python3 tools/train.py -c configs/rec/rec_chinese_lite_train.yml
+python3 tools/train.py -c configs/rec/rec_chinese_lite_train.yml 2>&1 | tee rec_ch_lite.log
```
更详细的数据准备和训练教程参考文档教程中[文本识别模型训练/评估/预测](./recognition.md)。
diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md
index 84c90d18..c2b62edb 100644
--- a/doc/doc_ch/detection.md
+++ b/doc/doc_ch/detection.md
@@ -14,6 +14,15 @@ wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/train_icdar2015_l
wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/test_icdar2015_label.txt
```
+PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `train_data/gen_label.py`, 这里以训练集为例:
+
+```
+# 将官网下载的标签文件转换为 train_icdar2015_label.txt
+python gen_label.py --mode="det" --root_path="icdar_c4_train_imgs/" \
+ --input_path="ch4_training_localization_transcription_gt" \
+ --output_label="train_icdar2015_label.txt"
+```
+
解压数据集和下载标注文件后,PaddleOCR/train_data/ 有两个文件夹和两个文件,分别是:
```
/PaddleOCR/train_data/icdar2015/text_localization/
@@ -62,7 +71,10 @@ tar -xf ./pretrain_models/MobileNetV3_large_x0_5_pretrained.tar ./pretrain_model
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
```shell
-python3 tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/
+# 训练 mv3_db 模型,并将训练日志保存为 tain_det.log
+python3 tools/train.py -c configs/det/det_mv3_db.yml \
+ -o Global.pretrain_weights=./pretrain_models/MobileNetV3_large_x0_5_pretrained/ \
+ 2>&1 | tee train_det.log
```
上述指令中,通过-c 选择训练使用configs/det/det_db_mv3.yml配置文件。
diff --git a/doc/doc_ch/framework.png b/doc/doc_ch/framework.png
new file mode 100644
index 00000000..db151a0e
Binary files /dev/null and b/doc/doc_ch/framework.png differ
diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md
index 293fee2f..709a0751 100644
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -11,24 +11,29 @@ inference 模型(`fluid.io.save_inference_model`保存的模型)
- [一、训练模型转inference模型](#训练模型转inference模型)
- [检测模型转inference模型](#检测模型转inference模型)
- [识别模型转inference模型](#识别模型转inference模型)
-
+ - [方向分类模型转inference模型](#方向分类模型转inference模型)
+
- [二、文本检测模型推理](#文本检测模型推理)
- [1. 超轻量中文检测模型推理](#超轻量中文检测模型推理)
- [2. DB文本检测模型推理](#DB文本检测模型推理)
- [3. EAST文本检测模型推理](#EAST文本检测模型推理)
- [4. SAST文本检测模型推理](#SAST文本检测模型推理)
-
+
- [三、文本识别模型推理](#文本识别模型推理)
- [1. 超轻量中文识别模型推理](#超轻量中文识别模型推理)
- [2. 基于CTC损失的识别模型推理](#基于CTC损失的识别模型推理)
- [3. 基于Attention损失的识别模型推理](#基于Attention损失的识别模型推理)
- - [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
-
-- [四、文本检测、识别串联推理](#文本检测、识别串联推理)
+ - [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
+ - [5. 多语言模型的推理](#多语言模型的推理)
+
+- [四、方向分类模型推理](#方向识别模型推理)
+ - [1. 方向分类模型推理](#方向分类模型推理)
+
+- [五、文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理)
- [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理)
- [2. 其他模型推理](#其他模型推理)
-
-
+
+
## 一、训练模型转inference模型
@@ -84,6 +89,32 @@ python3 tools/export_model.py -c configs/rec/rec_chinese_lite_train.yml -o Globa
└─ params 识别inference模型的参数文件
```
+
+### 方向分类模型转inference模型
+
+下载方向分类模型:
+```
+wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile-v1.1.cls_pre.tar && tar xf ./ch_lite/ch_ppocr_mobile-v1.1.cls_pre.tar -C ./ch_lite/
+```
+
+方向分类模型转inference模型与检测的方式相同,如下:
+```
+# -c后面设置训练算法的yml配置文件
+# -o配置可选参数
+# Global.checkpoints参数设置待转换的训练模型地址,不用添加文件后缀.pdmodel,.pdopt或.pdparams。
+# Global.save_inference_dir参数设置转换的模型将保存的地址。
+
+python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/cls_model/best_accuracy \
+ Global.save_inference_dir=./inference/cls/
+```
+
+转换成功后,在目录下有两个文件:
+```
+/inference/cls/
+ └─ model 识别inference模型的program文件
+ └─ params 识别inference模型的参数文件
+```
+
## 二、文本检测模型推理
@@ -275,15 +306,52 @@ dict_character = list(self.character_str)
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path"
```
-
-## 四、文本检测、识别串联推理
+
+### 5. 多语言模型的推理
+如果您需要预测的是其他语言模型,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径, 同时为了得到正确的可视化结果,
+需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/` 路径下有默认提供的小语种字体,例如韩文识别:
+
+```
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/korean_dict.txt" --vis_font_path="doc/korean.ttf"
+```
+
+
+执行命令后,上图的预测结果为:
+``` text
+2020-09-19 16:15:05,076-INFO: index: [205 206 38 39]
+2020-09-19 16:15:05,077-INFO: word : 바탕으로
+2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
+```
+
+
+## 四、方向分类模型推理
+
+下面将介绍方向分类模型推理。
+
+
+### 1. 方向分类模型推理
+
+方向分类模型推理,可以执行如下命令:
+
+```
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
+```
+
+
+
+执行命令后,上面图像的预测结果(分类的方向和得分)会打印到屏幕上,示例如下:
+
+Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999963]
+
+
+## 五、文本检测、方向分类和文字识别串联推理
### 1. 超轻量中文OCR模型推理
-在执行预测时,需要通过参数image_dir指定单张图像或者图像集合的路径、参数det_model_dir指定检测inference模型的路径和参数rec_model_dir指定识别inference模型的路径。可视化识别结果默认保存到 ./inference_results 文件夹里面。
+在执行预测时,需要通过参数`image_dir`指定单张图像或者图像集合的路径、参数`det_model_dir`,`cls_model_dir`和`rec_model_dir`分别指定检测,方向分类和识别的inference模型路径。参数`use_angle_cls`用于控制是否启用方向分类模型。可视化识别结果默认保存到 ./inference_results 文件夹里面。
```
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/"
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls true
```
执行命令后,识别结果图像如下:
diff --git a/doc/doc_ch/installation.md b/doc/doc_ch/installation.md
index 5bc14b89..381d1a9e 100644
--- a/doc/doc_ch/installation.md
+++ b/doc/doc_ch/installation.md
@@ -7,7 +7,7 @@ PaddleOCR 工作环境
- glibc 2.23
- cuDNN 7.6+ (GPU)
-建议使用我们提供的docker运行PaddleOCR,有关docker、nvidia-docker使用请参考[链接](https://docs.docker.com/get-started/)。
+建议使用我们提供的docker运行PaddleOCR,有关docker、nvidia-docker使用请参考[链接](https://www.runoob.com/docker/docker-tutorial.html/)。
*如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。*
diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md
new file mode 100644
index 00000000..cf389bdc
--- /dev/null
+++ b/doc/doc_ch/models_list.md
@@ -0,0 +1,71 @@
+## OCR模型列表(V1.1,9月22日更新)
+
+- [一、文本检测模型](#文本检测模型)
+- [二、文本识别模型](#文本识别模型)
+ - [1. 中文识别模型](#中文识别模型)
+ - [2. 英文识别模型](#英文识别模型)
+ - [3. 多语言识别模型](#多语言识别模型)
+- [三、文本方向分类模型](#文本方向分类模型)
+
+PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训练模型`、`slim模型`,模型区别说明如下:
+
+|模型类型|模型格式|简介|
+|-|-|-|
+|推理模型|model、params|用于python预测引擎推理,[详情](./inference.md)|
+|训练模型、预训练模型|\*.pdmodel、\*.pdopt、\*.pdparams|训练过程中保存的checkpoints模型,保存的是模型的参数,多用于模型指标评估和恢复训练|
+|slim模型|\*.nb|用于lite部署|
+
+
+
+### 一、文本检测模型
+|模型名称|模型简介|推理模型大小|下载地址|
+|-|-|-|-|
+|ch_ppocr_mobile_slim_v1.1_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|1.4M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_opt.nb)|
+|ch_ppocr_mobile_v1.1_det|原始超轻量模型,支持中英文、多语种文本检测|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_train.tar)|
+|ch_ppocr_server_v1.1_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|47.2M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_train.tar)|
+
+
+
+### 二、文本识别模型
+
+
+#### 1. 中文识别模型
+|模型名称|模型简介|推理模型大小|下载地址|
+|-|-|-|-|
+|ch_ppocr_mobile_slim_v1.1_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|1.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_opt.nb)|
+|ch_ppocr_mobile_v1.1_rec|原始超轻量模型,支持中英文、数字识别|4.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_pre.tar)|
+|ch_ppocr_server_v1.1_rec|通用模型,支持中英文、数字识别|105M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_pre.tar)|
+
+**说明:** `训练模型`是基于预训练模型在真实数据与竖排合成文本数据上finetune得到的模型,在真实应用场景中有着更好的表现,`预训练模型`则是直接基于全量真实数据与合成数据训练得到,更适合用于在自己的数据集上finetune。
+
+
+#### 2. 英文识别模型
+|模型名称|模型简介|推理模型大小|下载地址|
+|-|-|-|-|
+|en_ppocr_mobile_slim_v1.1_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|0.9M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_opt.nb)|
+|en_ppocr_mobile_v1.1_rec|原始超轻量模型,支持英文、数字识别|2.0M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_train.tar)|
+
+
+#### 3. 多语言识别模型(更多语言持续更新中...)
+|模型名称|模型简介|推理模型大小|下载地址|
+|-|-|-|-|
+| french_ppocr_mobile_v1.1_rec |法文识别|2.1M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_train.tar)|
+| german_ppocr_mobile_v1.1_rec |德文识别|2.1M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_train.tar)|
+| korean_ppocr_mobile_v1.1_rec |韩文识别|3.4M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_train.tar)|
+| japan_ppocr_mobile_v1.1_rec |日文识别|3.7M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_train.tar)|
+
+
+
+### 三、文本方向分类模型
+|模型名称|模型简介|推理模型大小|下载地址|
+|-|-|-|-|
+|ch_ppocr_mobile_v1.1_cls_quant|slim量化版模型|0.5M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_train.tar) / [slim模型]()|
+|ch_ppocr_mobile_v1.1_cls|原始模型|850kb|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_train.tar)|
+
+
+## OCR模型列表(V1.0,7月16日更新)
+
+|模型名称|模型简介|检测模型地址|识别模型地址|支持空格的识别模型地址|
+|-|-|-|-|-|
+|chinese_db_crnn_mobile|8.6M超轻量级中文OCR模型|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar)|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar)|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance.tar)
+|chinese_db_crnn_server|通用中文OCR模型|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar)|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar)|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance.tar)
diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md
index 1dc52efa..c8955f7f 100644
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -44,6 +44,13 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
```
+PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `train_data/gen_label.py`, 这里以训练集为例:
+
+```
+# 将官网下载的标签文件转换为 rec_gt_label.txt
+python gen_label.py --mode="rec" --input_path="{path/of/origin/label}" --output_label="rec_gt_label.txt"
+```
+
最终训练集应有如下文件结构:
```
|-train_data
@@ -128,8 +135,8 @@ tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar
export PYTHONPATH=$PYTHONPATH:.
# GPU训练 支持单卡,多卡训练,通过CUDA_VISIBLE_DEVICES指定卡号
export CUDA_VISIBLE_DEVICES=0,1,2,3
-# 训练icdar15英文数据
-python3 tools/train.py -c configs/rec/rec_icdar15_train.yml
+# 训练icdar15英文数据 并将训练日志保存为 tain_rec.log
+python3 tools/train.py -c configs/rec/rec_icdar15_train.yml 2>&1 | tee train_rec.log
```
- 数据增强
@@ -201,7 +208,19 @@ Optimizer:
```
**注意,预测/评估时的配置文件请务必与训练一致。**
+- 小语种
+PaddleOCR也提供了多语言的, `configs/rec/multi_languages` 路径下的提供了多语言的配置文件,目前PaddleOCR支持的多语言算法有:
+
+| 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
+| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
+| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 |
+| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
+| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
+| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
+| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
+
+多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体和测试数据可以在[百度网盘]()上下载。
### 评估
diff --git a/doc/doc_ch/tree.md b/doc/doc_ch/tree.md
new file mode 100644
index 00000000..f730d8f0
--- /dev/null
+++ b/doc/doc_ch/tree.md
@@ -0,0 +1,208 @@
+# 整体目录结构
+
+PaddleOCR 的整体目录结构介绍如下:
+
+```
+PaddleOCR
+├── configs // 配置文件,可通过yml文件选择模型结构并修改超参
+│ ├── cls // 方向分类器相关配置文件
+│ │ ├── cls_mv3.yml // 训练配置相关,包括骨干网络、head、loss、优化器
+│ │ └── cls_reader.yml // 数据读取相关,数据读取方式、数据存储路径
+│ ├── det // 检测相关配置文件
+│ │ ├── det_db_icdar15_reader.yml // 数据读取
+│ │ ├── det_mv3_db.yml // 训练配置
+│ │ ...
+│ └── rec // 识别相关配置文件
+│ ├── rec_benchmark_reader.yml // LMDB 格式数据读取相关
+│ ├── rec_chinese_common_train.yml // 通用中文训练配置
+│ ├── rec_icdar15_reader.yml // simple 数据读取相关,包括数据读取函数、数据路径、标签文件
+│ ...
+├── deploy // 部署相关
+│ ├── android_demo // android_demo
+│ │ ...
+│ ├── cpp_infer // C++ infer
+│ │ ├── CMakeLists.txt // Cmake 文件
+│ │ ├── docs // 说明文档
+│ │ │ └── windows_vs2019_build.md
+│ │ ├── include // 头文件
+│ │ │ ├── clipper.h // clipper 库
+│ │ │ ├── config.h // 预测配置
+│ │ │ ├── ocr_cls.h // 方向分类器
+│ │ │ ├── ocr_det.h // 文字检测
+│ │ │ ├── ocr_rec.h // 文字识别
+│ │ │ ├── postprocess_op.h // 检测后处理
+│ │ │ ├── preprocess_op.h // 检测预处理
+│ │ │ └── utility.h // 工具
+│ │ ├── readme.md // 说明文档
+│ │ ├── ...
+│ │ ├── src // 源文件
+│ │ │ ├── clipper.cpp
+│ │ │ ├── config.cpp
+│ │ │ ├── main.cpp
+│ │ │ ├── ocr_cls.cpp
+│ │ │ ├── ocr_det.cpp
+│ │ │ ├── ocr_rec.cpp
+│ │ │ ├── postprocess_op.cpp
+│ │ │ ├── preprocess_op.cpp
+│ │ │ └── utility.cpp
+│ │ └── tools // 编译、执行脚本
+│ │ ├── build.sh // 编译脚本
+│ │ ├── config.txt // 配置文件
+│ │ └── run.sh // 测试启动脚本
+│ ├── docker
+│ │ └── hubserving
+│ │ ├── cpu
+│ │ │ └── Dockerfile
+│ │ ├── gpu
+│ │ │ └── Dockerfile
+│ │ ├── README_cn.md
+│ │ ├── README.md
+│ │ └── sample_request.txt
+│ ├── hubserving // hubserving
+│ │ ├── ocr_det // 文字检测
+│ │ │ ├── config.json // serving 配置
+│ │ │ ├── __init__.py
+│ │ │ ├── module.py // 预测模型
+│ │ │ └── params.py // 预测参数
+│ │ ├── ocr_rec // 文字识别
+│ │ │ ├── config.json
+│ │ │ ├── __init__.py
+│ │ │ ├── module.py
+│ │ │ └── params.py
+│ │ └── ocr_system // 系统预测
+│ │ ├── config.json
+│ │ ├── __init__.py
+│ │ ├── module.py
+│ │ └── params.py
+│ ├── imgs // 预测图片
+│ │ ├── cpp_infer_pred_12.png
+│ │ └── demo.png
+│ ├── ios_demo // ios demo
+│ │ ...
+│ ├── lite // lite 部署
+│ │ ├── cls_process.cc // 方向分类器数据处理
+│ │ ├── cls_process.h
+│ │ ├── config.txt // 检测配置参数
+│ │ ├── crnn_process.cc // crnn数据处理
+│ │ ├── crnn_process.h
+│ │ ├── db_post_process.cc // db数据处理
+│ │ ├── db_post_process.h
+│ │ ├── Makefile // 编译文件
+│ │ ├── ocr_db_crnn.cc // 串联预测
+│ │ ├── prepare.sh // 数据准备
+│ │ ├── readme.md // 说明文档
+│ │ ...
+│ ├── pdserving // pdserving 部署
+│ │ ├── det_local_server.py // 检测 快速版,部署方便预测速度快
+│ │ ├── det_web_server.py // 检测 完整版,稳定性高分布式部署
+│ │ ├── ocr_local_server.py // 检测+识别 快速版
+│ │ ├── ocr_web_client.py // 客户端
+│ │ ├── ocr_web_server.py // 检测+识别 完整版
+│ │ ├── readme.md // 说明文档
+│ │ ├── rec_local_server.py // 识别 快速版
+│ │ └── rec_web_server.py // 识别 完整版
+│ └── slim
+│ └── quantization // 量化相关
+│ ├── export_model.py // 导出模型
+│ ├── quant.py // 量化
+│ └── README.md // 说明文档
+├── doc // 文档教程
+│ ...
+├── paddleocr.py
+├── ppocr // 网络核心代码
+│ ├── data // 数据处理
+│ │ ├── cls // 方向分类器
+│ │ │ ├── dataset_traversal.py // 数据传输,定义数据读取器,读取数据并组成batch
+│ │ │ └── randaugment.py // 随机数据增广操作
+│ │ ├── det // 检测
+│ │ │ ├── data_augment.py // 数据增广操作
+│ │ │ ├── dataset_traversal.py // 数据传输,定义数据读取器,读取数据并组成batch
+│ │ │ ├── db_process.py // db 数据处理
+│ │ │ ├── east_process.py // east 数据处理
+│ │ │ ├── make_border_map.py // 生成边界图
+│ │ │ ├── make_shrink_map.py // 生成收缩图
+│ │ │ ├── random_crop_data.py // 随机切割
+│ │ │ └── sast_process.py // sast 数据处理
+│ │ ├── reader_main.py // 数据读取器主函数
+│ │ └── rec // 识别
+│ │ ├── dataset_traversal.py // 数据传输,定义数据读取器,包含 LMDB_Reader 和 Simple_Reader
+│ │ └── img_tools.py // 数据处理相关,包括数据归一化、扰动
+│ ├── __init__.py
+│ ├── modeling // 组网相关
+│ │ ├── architectures // 模型架构,定义模型所需的各个模块
+│ │ │ ├── cls_model.py // 方向分类器
+│ │ │ ├── det_model.py // 检测
+│ │ │ └── rec_model.py // 识别
+│ │ ├── backbones // 骨干网络
+│ │ │ ├── det_mobilenet_v3.py // 检测 mobilenet_v3
+│ │ │ ├── det_resnet_vd.py
+│ │ │ ├── det_resnet_vd_sast.py
+│ │ │ ├── rec_mobilenet_v3.py // 识别 mobilenet_v3
+│ │ │ ├── rec_resnet_fpn.py
+│ │ │ └── rec_resnet_vd.py
+│ │ ├── common_functions.py // 公共函数
+│ │ ├── heads // 头函数
+│ │ │ ├── cls_head.py // 分类头
+│ │ │ ├── det_db_head.py // db 检测头
+│ │ │ ├── det_east_head.py // east 检测头
+│ │ │ ├── det_sast_head.py // sast 检测头
+│ │ │ ├── rec_attention_head.py // 识别 attention
+│ │ │ ├── rec_ctc_head.py // 识别 ctc
+│ │ │ ├── rec_seq_encoder.py // 识别 序列编码
+│ │ │ ├── rec_srn_all_head.py // 识别 srn 相关
+│ │ │ └── self_attention // srn attention
+│ │ │ └── model.py
+│ │ ├── losses // 损失函数
+│ │ │ ├── cls_loss.py // 方向分类器损失函数
+│ │ │ ├── det_basic_loss.py // 检测基础loss
+│ │ │ ├── det_db_loss.py // DB loss
+│ │ │ ├── det_east_loss.py // EAST loss
+│ │ │ ├── det_sast_loss.py // SAST loss
+│ │ │ ├── rec_attention_loss.py // attention loss
+│ │ │ ├── rec_ctc_loss.py // ctc loss
+│ │ │ └── rec_srn_loss.py // srn loss
+│ │ └── stns // 空间变换网络
+│ │ └── tps.py // TPS 变换
+│ ├── optimizer.py // 优化器
+│ ├── postprocess // 后处理
+│ │ ├── db_postprocess.py // DB 后处理
+│ │ ├── east_postprocess.py // East 后处理
+│ │ ├── lanms // lanms 相关
+│ │ │ ...
+│ │ ├── locality_aware_nms.py // nms
+│ │ └── sast_postprocess.py // sast 后处理
+│ └── utils // 工具
+│ ├── character.py // 字符处理,包括对文本的编码和解码,计算预测准确率
+│ ├── check.py // 参数加载检查
+│ ├── ic15_dict.txt // 英文数字字典,区分大小写
+│ ├── ppocr_keys_v1.txt // 中文字典,用于训练中文模型
+│ ├── save_load.py // 模型保存和加载函数
+│ ├── stats.py // 统计
+│ └── utility.py // 工具函数,包含输入参数是否合法等相关检查工具
+├── README_en.md // 说明文档
+├── README.md
+├── requirments.txt // 安装依赖
+├── setup.py // whl包打包脚本
+└── tools // 启动工具
+ ├── eval.py // 评估函数
+ ├── eval_utils // 评估工具
+ │ ├── eval_cls_utils.py // 分类相关
+ │ ├── eval_det_iou.py // 检测 iou 相关
+ │ ├── eval_det_utils.py // 检测相关
+ │ ├── eval_rec_utils.py // 识别相关
+ │ └── __init__.py
+ ├── export_model.py // 导出 infer 模型
+ ├── infer // 基于预测引擎预测
+ │ ├── predict_cls.py
+ │ ├── predict_det.py
+ │ ├── predict_rec.py
+ │ ├── predict_system.py
+ │ └── utility.py
+ ├── infer_cls.py // 基于训练引擎 预测分类
+ ├── infer_det.py // 基于训练引擎 预测检测
+ ├── infer_rec.py // 基于训练引擎 预测识别
+ ├── program.py // 整体流程
+ ├── test_hubserving.py
+ └── train.py // 启动训练
+
+```
diff --git a/doc/doc_ch/tricks.md b/doc/doc_ch/tricks.md
deleted file mode 100644
index b6852bc9..00000000
--- a/doc/doc_ch/tricks.md
+++ /dev/null
@@ -1,68 +0,0 @@
-## 中文OCR训练预测技巧
-这里整理了一些中文OCR训练预测技巧,持续更新中,欢迎各位小伙伴贡献OCR炼丹秘籍~
-- [更换骨干网络](#更换骨干网络)
-- [中文长文本识别](#中文长文本识别)
-- [空格识别](#空格识别)
-
-
-#### 1、更换骨干网络
-- **问题描述**
-
- 目前PaddleOCR中使用的骨干网络有ResNet_vd系列和MobileNetV3系列,更换骨干网络是否有助于效果提升?更换时需要注意什么?
-
-- **炼丹建议**
-
- - 无论是文字检测,还是文字识别,骨干网络的选择是预测效果和预测效率的权衡。一般,选择更大规模的骨干网络,例如ResNet101_vd,则检测或识别更准确,但预测耗时相应也会增加。而选择更小规模的骨干网络,例如MobileNetV3_small_x0_35,则预测更快,但检测或识别的准确率会大打折扣。幸运的是不同骨干网络的检测或识别效果与在ImageNet数据集图像1000分类任务效果正相关。[**飞桨图像分类套件PaddleClas**](https://github.com/PaddlePaddle/PaddleClas)汇总了ResNet_vd、Res2Net、HRNet、MobileNetV3、GhostNet等23种系列的分类网络结构,在上述图像分类任务的top1识别准确率,GPU(V100和T4)和CPU(骁龙855)的预测耗时以及相应的[**117个预训练模型下载地址**](https://paddleclas.readthedocs.io/zh_CN/latest/models/models_intro.html)。
- - 文字检测骨干网络的替换,主要是确定类似与ResNet的4个stages,以方便集成后续的类似FPN的检测头。此外,对于文字检测问题,使用ImageNet训练的分类预训练模型,可以加速收敛和效果提升。
- - 文字识别的骨干网络的替换,需要注意网络宽高stride的下降位置。由于文本识别一般宽高比例很大,因此高度下降频率少一些,宽度下降频率多一些。可以参考PaddleOCR中[MobileNetV3骨干网络](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/modeling/backbones/rec_mobilenet_v3.py)的改动。
-
-
-#### 2、中文长文本识别
-- **问题描述**
-
- 中文识别模型训练时分辨率最大是[3,32,320],如果待识别的文本图像太长,如下图所示,该如何适配?
-
-
-
-
-
-- **炼丹建议**
-
- 在中文识别模型训练时,并不是采用直接将训练样本缩放到[3,32,320]进行训练,而是先等比例缩放图像,保证图像高度为32,宽度不足320的部分补0,宽高比大于10的样本直接丢弃。预测时,如果是单张图像预测,则按上述操作直接对图像缩放,不做宽度320的限制。如果是多张图预测,则采用batch方式预测,每个batch的宽度动态变换,采用这个batch中最长宽度。[参考代码如下](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/tools/infer/predict_rec.py):
-
- ```
- def resize_norm_img(self, img, max_wh_ratio):
- imgC, imgH, imgW = self.rec_image_shape
- assert imgC == img.shape[2]
- if self.character_type == "ch":
- imgW = int((32 * max_wh_ratio))
- h, w = img.shape[:2]
- ratio = w / float(h)
- if math.ceil(imgH * ratio) > imgW:
- resized_w = imgW
- else:
- resized_w = int(math.ceil(imgH * ratio))
- resized_image = cv2.resize(img, (resized_w, imgH))
- resized_image = resized_image.astype('float32')
- resized_image = resized_image.transpose((2, 0, 1)) / 255
- resized_image -= 0.5
- resized_image /= 0.5
- padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
- padding_im[:, :, 0:resized_w] = resized_image
- return padding_im
- ```
-
-
-#### 3、空格识别
-- **问题描述**
-
- 如下图所示,对于中英文混合场景,为了便于阅读和使用识别结果,往往需要将单词之间的空格识别出来,这种情况如何适配?
-
-
-
-
-
-- **炼丹建议**
-
- 空格识别可以考虑以下两种方案:(1)优化文本检测算法。检测结果在空格处将文本断开。这种方案在检测数据标注时,需要将含有空格的文本行分成好多段。(2)优化文本识别算法。在识别字典里面引入空格字符,然后在识别的训练数据中,如果用空行,进行标注。此外,合成数据时,通过拼接训练数据,生成含有空格的文本。PaddleOCR目前采用的是第二种方案。
-
\ No newline at end of file
diff --git a/doc/doc_ch/update.md b/doc/doc_ch/update.md
index 23a47df5..017cd947 100644
--- a/doc/doc_ch/update.md
+++ b/doc/doc_ch/update.md
@@ -1,4 +1,7 @@
# 更新
+- 2020.9.19 更新超轻量压缩ppocr_mobile_slim系列模型,整体模型3.5M(详见[PP-OCR Pipline](#PP-OCR)),适合在移动端部署使用。[模型下载](#模型下载)
+- 2020.9.17 更新超轻量ppocr_mobile系列和通用ppocr_server系列中英文ocr模型,媲美商业效果。[模型下载](#模型下载)
+- 2020.8.26 更新OCR相关的84个常见问题及解答,具体参考[FAQ](./doc/doc_ch/FAQ.md)
- 2020.8.24 支持通过whl包安装使用PaddleOCR,具体参考[Paddleocr Package使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md)
- 2020.8.21 更新8月18日B站直播课回放和PPT,课节2,易学易用的OCR工具大礼包,[获取地址](https://aistudio.baidu.com/aistudio/education/group/info/1519)
- 2020.8.16 开源文本检测算法[SAST](https://arxiv.org/abs/1908.05498)和文本识别算法[SRN](https://arxiv.org/abs/2003.12294)
diff --git a/doc/doc_ch/visualization.md b/doc/doc_ch/visualization.md
index 5a711fe9..fca07591 100644
--- a/doc/doc_ch/visualization.md
+++ b/doc/doc_ch/visualization.md
@@ -1,10 +1,27 @@
# 效果展示
-- [超轻量级中文OCR效果展示](#超轻量级中文OCR)
-- [通用中文OCR效果展示](#通用中文OCR)
-- [支持空格的中文OCR效果展示](#支持空格的中文OCR)
+- PP-OCR 1.1系列模型效果
+ - [通用ppocr_server_1.1效果展示](#通用ppocr_server_1.1效果展示)
+ - [通用ppocr_mobile_1.1效果展示(待补充)]()
+- PP-OCR 1.0系列模型效果
+ - [超轻量ppocr_mobile_1.0效果展示](#超轻量ppocr_mobile_1.0效果展示)
+ - [通用ppocr_server_1.0效果展示](#通用ppocr_server_1.0效果展示)
-
-## 超轻量级中文OCR效果展示
+
+## 通用ppocr_server_1.1效果展示
+
+
+
+
+
+
+## 超轻量ppocr_mobile_1.0效果展示
@@ -14,32 +31,17 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-## 通用中文OCR效果展示
+
+## 通用ppocr_server_1.0效果展示
@@ -52,16 +54,3 @@
-
-
-## 支持空格的中文OCR效果展示
-
-### 轻量级模型
-
-
-
-
-### 通用模型
-
-
-
diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md
index 280cc2f6..46796ce6 100644
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -12,11 +12,46 @@ pip install paddleocr
本地构建并安装
```bash
python setup.py bdist_wheel
-pip install dist/paddleocr-0.0.3-py3-none-any.whl
+pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本号
```
### 1. 代码使用
-* 检测+识别全流程
+* 检测+分类+识别全流程
+```python
+from paddleocr import PaddleOCR, draw_ocr
+# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语,可以通过修改lang参数进行切换
+# 参数依次为`zh`, `en`, `french`, `german`, `korean`, `japan`。
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs/11.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+ print(line)
+
+# 显示结果
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+结果是一个list,每个item包含了文本框,文字和识别置信度
+```bash
+[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
+[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]
+......
+```
+结果可视化
+
+
+
+
+
+
+* 检测+识别
```python
from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
@@ -48,12 +83,27 @@ im_show.save('result.jpg')
+
+* 分类+识别
+```python
+from paddleocr import PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
+result = ocr.ocr(img_path, det=False, cls=True)
+for line in result:
+ print(line)
+```
+结果是一个list,每个item只包含识别结果和识别置信度
+```bash
+['韩国小馆', 0.9907421]
+```
+
* 单独执行检测
```python
from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs/11.jpg'
-result = ocr.ocr(img_path,rec=False)
+result = ocr.ocr(img_path, rec=False)
for line in result:
print(line)
@@ -84,7 +134,7 @@ im_show.save('result.jpg')
from paddleocr import PaddleOCR
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
-result = ocr.ocr(img_path,det=False)
+result = ocr.ocr(img_path, det=False)
for line in result:
print(line)
```
@@ -93,6 +143,20 @@ for line in result:
['韩国小馆', 0.9907421]
```
+* 单独执行分类
+```python
+from paddleocr import PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
+result = ocr.ocr(img_path, det=False, rec=False, cls=True)
+for line in result:
+ print(line)
+```
+结果是一个list,每个item只包含分类结果和分类置信度
+```bash
+['0', 0.9999924]
+```
+
### 通过命令行使用
查看帮助信息
@@ -100,7 +164,19 @@ for line in result:
paddleocr -h
```
-* 检测+识别全流程
+* 检测+分类+识别全流程
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --cls true
+```
+结果是一个list,每个item包含了文本框,文字和识别置信度
+```bash
+[[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
+[[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
+[[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['(45元/每公斤,100公斤起订)', 0.9676722]]
+......
+```
+
+* 检测+识别
```bash
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
```
@@ -112,6 +188,16 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
......
```
+* 分类+识别
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false
+```
+
+结果是一个list,每个item只包含识别结果和识别置信度
+```bash
+['韩国小馆', 0.9907421]
+```
+
* 单独执行检测
```bash
paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
@@ -134,17 +220,27 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
['韩国小馆', 0.9907421]
```
+* 单独执行分类
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --cls true --det false --rec false
+```
+
+结果是一个list,每个item只包含分类结果和分类置信度
+```bash
+['0', 0.9999924]
+```
+
## 自定义模型
当内置模型无法满足需求时,需要使用到自己训练的模型。
-首先,参照[inference.md](./inference.md) 第一节转换将检测和识别模型转换为inference模型,然后按照如下方式使用
+首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用
### 代码使用
```python
from paddleocr import PaddleOCR, draw_ocr
-# 检测模型和识别模型路径下必须含有model和params文件
-ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}')
+# 模型路径下必须含有model和params文件
+ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
img_path = 'PaddleOCR/doc/imgs/11.jpg'
-result = ocr.ocr(img_path)
+result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
@@ -162,7 +258,7 @@ im_show.save('result.jpg')
### 通过命令行使用
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir}
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
```
## 参数说明
@@ -182,13 +278,21 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
| det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 |
| det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 |
| rec_algorithm | 使用的识别算法类型 | CRNN |
-| rec_model_dir | 识别模型所在文件夹。传承那方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None |
+| rec_model_dir | 识别模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None |
| rec_image_shape | 识别算法的输入图片尺寸 | "3,32,320" |
| rec_char_type | 识别算法的字符类型,中文(ch)或英文(en) | ch |
| rec_batch_num | 进行识别时,同时前向的图片数 | 30 |
| max_text_length | 识别算法能识别的最大文字长度 | 25 |
| rec_char_dict_path | 识别模型字典路径,当rec_model_dir使用方式2传参时需要修改为自己的字典路径 | ./ppocr/utils/ppocr_keys_v1.txt |
| use_space_char | 是否识别空格 | TRUE |
+| use_angle_cls | 是否加载分类模型 | FALSE |
+| cls_model_dir | 分类模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/cls`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None |
+| cls_image_shape | 分类算法的输入图片尺寸 | "3, 48, 192" |
+| label_list | 分类算法的标签列表 | ['0', '180'] |
+| cls_batch_num | 进行分类时,同时前向的图片数 |30 |
| enable_mkldnn | 是否启用mkldnn | FALSE |
+| use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE |
+| lang | 模型语言类型,目前支持 中文(ch)和英文(en) | ch |
| det | 前向时使用启动检测 | TRUE |
| rec | 前向时是否启动识别 | TRUE |
+| cls | 前向时是否启动分类 | FALSE |
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
new file mode 100644
index 00000000..3b0f7784
--- /dev/null
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -0,0 +1,77 @@
+## Algorithm introduction
+
+[TOC]
+
+
+
+### 1. Text Detection Algorithm
+
+PaddleOCR open source text detection algorithms list:
+- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
+- [x] DB([paper](https://arxiv.org/abs/1911.08947))
+- [x] SAST([paper](https://arxiv.org/abs/1908.05498))(Baidu Self-Research)
+
+On the ICDAR2015 dataset, the text detection result is as follows:
+
+|Model|Backbone|precision|recall|Hmean|Download link|
+|-|-|-|-|-|-|
+|EAST|ResNet50_vd|88.18%|85.51%|86.82%|[Download link](https://paddleocr.bj.bcebos.com/det_r50_vd_east.tar)|
+|EAST|MobileNetV3|81.67%|79.83%|80.74%|[Download link](https://paddleocr.bj.bcebos.com/det_mv3_east.tar)|
+|DB|ResNet50_vd|83.79%|80.65%|82.19%|[Download link](https://paddleocr.bj.bcebos.com/det_r50_vd_db.tar)|
+|DB|MobileNetV3|75.92%|73.18%|74.53%|[Download link](https://paddleocr.bj.bcebos.com/det_mv3_db.tar)|
+|SAST|ResNet50_vd|92.18%|82.96%|87.33%|[Download link](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_icdar2015.tar)|
+
+On Total-Text dataset, the text detection result is as follows:
+
+|Model|Backbone|precision|recall|Hmean|Download link|
+|-|-|-|-|-|-|
+|SAST|ResNet50_vd|88.74%|79.80%|84.03%|[Download link](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)|
+
+**Note:** Additional data, like icdar2013, icdar2017, COCO-Text, ArT, was added to the model training of SAST. Download English public dataset in organized format used by PaddleOCR from [Baidu Drive](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (download code: 2bpi).
+
+For use of [LSVT](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/datasets_en.md#1-icdar2019-lsvt) street view dataset with a total of 3w training data,the related configuration and pre-trained models for text detection task are as follows:
+|Model|Backbone|Configuration file|Pre-trained model|
+|-|-|-|-|
+|ultra-lightweight OCR model|MobileNetV3|det_mv3_db.yml|[Download link](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar)|
+|General OCR model|ResNet50_vd|det_r50_vd_db.yml|[Download link](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar)|
+
+* Note: For the training and evaluation of the above DB model, post-processing parameters box_thresh=0.6 and unclip_ratio=1.5 need to be set. If using different datasets and different models for training, these two parameters can be adjusted for better result.
+
+For the training guide and use of PaddleOCR text detection algorithms, please refer to the document [Text detection model training/evaluation/prediction](./doc/doc_en/detection_en.md)
+
+
+### 2. Text Recognition Algorithm
+
+PaddleOCR open-source text recognition algorithms list:
+- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))
+- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
+- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))
+- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))
+- [x] SRN([paper](https://arxiv.org/abs/2003.12294))(Baidu Self-Research)
+
+Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
+
+|Model|Backbone|Avg Accuracy|Module combination|Download link|
+|-|-|-|-|-|
+|Rosetta|Resnet34_vd|80.24%|rec_r34_vd_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_r34_vd_none_none_ctc.tar)|
+|Rosetta|MobileNetV3|78.16%|rec_mv3_none_none_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_mv3_none_none_ctc.tar)|
+|CRNN|Resnet34_vd|82.20%|rec_r34_vd_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_r34_vd_none_bilstm_ctc.tar)|
+|CRNN|MobileNetV3|79.37%|rec_mv3_none_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_mv3_none_bilstm_ctc.tar)|
+|STAR-Net|Resnet34_vd|83.93%|rec_r34_vd_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_r34_vd_tps_bilstm_ctc.tar)|
+|STAR-Net|MobileNetV3|81.56%|rec_mv3_tps_bilstm_ctc|[Download link](https://paddleocr.bj.bcebos.com/rec_mv3_tps_bilstm_ctc.tar)|
+|RARE|Resnet34_vd|84.90%|rec_r34_vd_tps_bilstm_attn|[Download link](https://paddleocr.bj.bcebos.com/rec_r34_vd_tps_bilstm_attn.tar)|
+|RARE|MobileNetV3|83.32%|rec_mv3_tps_bilstm_attn|[Download link](https://paddleocr.bj.bcebos.com/rec_mv3_tps_bilstm_attn.tar)|
+|SRN|Resnet50_vd_fpn|88.33%|rec_r50fpn_vd_none_srn|[Download link](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar)|
+
+**Note:** SRN model uses data expansion method to expand the two training sets mentioned above, and the expanded data can be downloaded from [Baidu Drive](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA) (download code: y3ry).
+
+The average accuracy of the two-stage training in the original paper is 89.74%, and that of one stage training in paddleocr is 88.33%. Both pre-trained weights can be downloaded [here](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar).
+
+We use [LSVT](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/datasets_en.md#1-icdar2019-lsvt) dataset and cropout 30w training data from original photos by using position groundtruth and make some calibration needed. In addition, based on the LSVT corpus, 500w synthetic data is generated to train the model. The related configuration and pre-trained models are as follows:
+
+|Model|Backbone|Configuration file|Pre-trained model|
+|-|-|-|-|
+|ultra-lightweight OCR model|MobileNetV3|rec_chinese_lite_train.yml|[Download link](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar) & [pre-trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance.tar)|
+|General OCR model|Resnet34_vd|rec_chinese_common_train.yml|[Download link](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar) & [pre-trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance.tar)|
+
+Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./doc/doc_en/recognition_en.md)
diff --git a/doc/doc_en/angle_class_en.md b/doc/doc_en/angle_class_en.md
new file mode 100644
index 00000000..c7fff3a1
--- /dev/null
+++ b/doc/doc_en/angle_class_en.md
@@ -0,0 +1,126 @@
+## TEXT ANGLE CLASSIFICATION
+
+### DATA PREPARATION
+
+Please organize the dataset as follows:
+
+The default storage path for training data is `PaddleOCR/train_data/cls`, if you already have a dataset on your disk, just create a soft link to the dataset directory:
+
+```
+ln -sf /train_data/cls/dataset
+```
+
+please refer to the following to organize your data.
+
+- Training set
+
+First put the training images in the same folder (train_images), and use a txt file (cls_gt_train.txt) to store the image path and label.
+
+* Note: by default, the image path and image label are split with `\t`, if you use other methods to split, it will cause training error
+
+0 and 180 indicate that the angle of the image is 0 degrees and 180 degrees, respectively.
+
+```
+" Image file name Image annotation "
+
+train_data/word_001.jpg 0
+train_data/word_002.jpg 180
+```
+
+The final training set should have the following file structure:
+
+```
+|-train_data
+ |-cls
+ |- cls_gt_train.txt
+ |- train
+ |- word_001.png
+ |- word_002.jpg
+ |- word_003.jpg
+ | ...
+```
+
+- Test set
+
+Similar to the training set, the test set also needs to be provided a folder
+containing all images (test) and a cls_gt_test.txt. The structure of the test set is as follows:
+
+```
+|-train_data
+ |-cls
+ |- cls_gt_test.txt
+ |- test
+ |- word_001.jpg
+ |- word_002.jpg
+ |- word_003.jpg
+ | ...
+```
+
+### TRAINING
+
+PaddleOCR provides training scripts, evaluation scripts, and prediction scripts.
+
+Start training:
+
+```
+# Set PYTHONPATH path
+export PYTHONPATH=$PYTHONPATH:.
+# GPU training Support single card and multi-card training, specify the card number through CUDA_VISIBLE_DEVICES
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+# Training icdar15 English data
+python3 tools/train.py -c configs/cls/cls_mv3.yml
+```
+
+- Data Augmentation
+
+PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, please set `distort: true` in the configuration file.
+
+The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse, RandAugment.
+
+Except for RandAugment, each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to:
+[randaugment.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/cls/randaugment.py)
+[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+
+
+- Training
+
+PaddleOCR supports alternating training and evaluation. You can modify `eval_batch_step` in `configs/cls/cls_mv3.yml` to set the evaluation frequency. By default, it is evaluated every 500 iter and the best acc model is saved under `output/cls_mv3/best_accuracy` during the evaluation process.
+
+If the evaluation set is large, the test will be time-consuming. It is recommended to reduce the number of evaluations, or evaluate after training.
+
+**Note that the configuration file for prediction/evaluation must be consistent with the training.**
+
+### EVALUATION
+
+The evaluation data set can be modified via `configs/cls/cls_reader.yml` setting of `label_file_path` in EvalReader.
+
+```
+export CUDA_VISIBLE_DEVICES=0
+# GPU evaluation, Global.checkpoints is the weight to be tested
+python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+```
+
+### PREDICTION
+
+* Training engine prediction
+
+Using the model trained by paddleocr, you can quickly get prediction through the following script.
+
+The default prediction picture is stored in `infer_img`, and the weight is specified via `-o Global.checkpoints`:
+
+```
+# Predict English results
+python3 tools/infer_rec.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
+```
+
+Input image:
+
+
+
+Get the prediction result of the input image:
+
+```
+infer_img: doc/imgs_words/en/word_1.png
+ scores: [[0.93161047 0.06838956]]
+ label: [0]
+```
diff --git a/doc/doc_en/benchmark_en.md b/doc/doc_en/benchmark_en.md
index 9e2dadb1..91b01594 100644
--- a/doc/doc_en/benchmark_en.md
+++ b/doc/doc_en/benchmark_en.md
@@ -1,36 +1,56 @@
# BENCHMARK
-This document gives the prediction time-consuming benchmark of PaddleOCR Ultra Lightweight Chinese Model (8.6M) on each platform.
+This document gives the performance of the series models for Chinese and English recognition.
## TEST DATA
-* 500 images were randomly sampled from the Chinese public data set [ICDAR2017-RCTW](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/datasets.md#ICDAR2017-RCTW-17).
- Most of the pictures in the set were collected in the wild through mobile phone cameras.
- Some are screenshots.
- These pictures show various scenes, including street scenes, posters, menus, indoor scenes and screenshots of mobile applications.
+
+We collected 300 images for different real application scenarios to evaluate the overall OCR system, including contract samples, license plates, nameplates, train tickets, test sheets, forms, certificates, street view images, business cards, digital meter, etc. The following figure shows some images of the test set.
+
+
+
+
## MEASUREMENT
-The predicted time-consuming indicators on the four platforms are as follows:
-
-| Long size(px) | T4(s) | V100(s) | Intel Xeon 6148(s) | Snapdragon 855(s) |
-| :---------: | :-----: | :-------: | :------------------: | :-----------------: |
-| 960 | 0.092 | 0.057 | 0.319 | 0.354 |
-| 640 | 0.067 | 0.045 | 0.198 | 0.236 |
-| 480 | 0.057 | 0.043 | 0.151 | 0.175 |
Explanation:
-* The evaluation time-consuming stage is the complete stage from image input to result output, including image
-pre-processing and post-processing.
-* ```Intel Xeon 6148``` is the server-side CPU model. Intel MKL-DNN is used in the test to accelerate the CPU prediction speed.
-To use this operation, you need to:
- * Update to the latest version of PaddlePaddle: https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-dev
- Please select the corresponding mkl version wheel package according to the CUDA version and Python version of your environment,
- for example, CUDA10, Python3.7 environment, you should:
+- v1.0 indicates DB+CRNN models without the strategies. v1.1 indicates the PP-OCR models with the strategies and the direction classify. slim_v1.1 indicates the PP-OCR models with prunner or quantization.
- ```
- # Obtain the installation package
- wget https://paddle-wheel.bj.bcebos.com/0.0.0-gpu-cuda10-cudnn7-mkl/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
- # Installation
- pip3.7 install paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
- ```
- * Use parameters ```--enable_mkldnn True``` to turn on the acceleration switch when making predictions
-* ```Snapdragon 855``` is a mobile processing platform model.
+- The long size of the input for the text detector is 960.
+
+- The evaluation time-consuming stage is the complete stage from image input to result output, including image pre-processing and post-processing.
+
+- ```Intel Xeon 6148``` is the server-side CPU model. Intel MKL-DNN is used in the test to accelerate the CPU prediction speed.
+
+- ```Snapdragon 855``` is a mobile processing platform model.
+
+Compares the model size and F-score:
+
+| Model Name | Model Size of the Whole System\(M\) | Model Size of the Text Detector\(M\) | Model Size of the Direction Classifier\(M\) | Model Size of the Text Recognizer \(M\) | F\-score |
+|:-:|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 |
+| ch\_ppocr\_server\_v1\.1 | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.5414 |
+| ch\_ppocr\_mobile\_v1\.0 | 8\.6 | 4\.1 | \- | 4\.5 | 0\.393 |
+| ch\_ppocr\_server\_v1\.0 | 203\.8 | 98\.5 | \- | 105\.3 | 0\.4436 |
+
+Compares the time-consuming on T4 GPU (ms):
+
+| Model Name | Overall | Text Detector | Direction Classifier | Text Recognizer |
+|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24 | 78 |
+| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25 | 140 |
+| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \- | 76 |
+| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \- | 147 |
+
+Compares the time-consuming on CPU (ms):
+
+| Model Name | Overall | Text Detector | Direction Classifier | Text Recognizer |
+|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 421 | 164 | 51 | 206 |
+| ch\_ppocr\_mobile\_v1\.0 | 398 | 219 | \- | 179 |
+
+Compares the model size, F-score, the time-consuming on SD 855 of between the slim models and the original models:
+
+| Model Name | Model Size of the Whole System\(M\) | Model Size of the Text Detector\(M\) | Model Size of the Direction Classifier\(M\) | Model Size of the Text Recognizer \(M\) | F\-score | SD 855 \(ms\) |
+|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
+| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | 306 |
+| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5 | 1\.4 | 0\.5 | 1\.6 | 0\.521 | 268 |
diff --git a/doc/doc_en/customize_en.md b/doc/doc_en/customize_en.md
index b63de67c..fb47c14f 100644
--- a/doc/doc_en/customize_en.md
+++ b/doc/doc_en/customize_en.md
@@ -6,7 +6,7 @@ The process of making a customized ultra-lightweight OCR models can be divided i
PaddleOCR provides two text detection algorithms: EAST and DB. Both support MobileNetV3 and ResNet50_vd backbone networks, select the corresponding configuration file as needed and start training. For example, to train with MobileNetV3 as the backbone network for DB detection model :
```
-python3 tools/train.py -c configs/det/det_mv3_db.yml
+python3 tools/train.py -c configs/det/det_mv3_db.yml 2>&1 | tee det_db.log
```
For more details about data preparation and training tutorials, refer to the documentation [Text detection model training/evaluation/prediction](./detection_en.md)
@@ -14,7 +14,7 @@ For more details about data preparation and training tutorials, refer to the doc
PaddleOCR provides four text recognition algorithms: CRNN, Rosetta, STAR-Net, and RARE. They all support two backbone networks: MobileNetV3 and ResNet34_vd, select the corresponding configuration files as needed to start training. For example, to train a CRNN recognition model that uses MobileNetV3 as the backbone network:
```
-python3 tools/train.py -c configs/rec/rec_chinese_lite_train.yml
+python3 tools/train.py -c configs/rec/rec_chinese_lite_train.yml 2>&1 | tee rec_ch_lite.log
```
For more details about data preparation and training tutorials, refer to the documentation [Text recognition model training/evaluation/prediction](./recognition_en.md)
diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md
index 08e6b63b..401d7a9a 100644
--- a/doc/doc_en/detection_en.md
+++ b/doc/doc_en/detection_en.md
@@ -62,7 +62,7 @@ tar -xf ./pretrain_models/MobileNetV3_large_x0_5_pretrained.tar ./pretrain_model
#### START TRAINING
*If CPU version installed, please set the parameter `use_gpu` to `false` in the configuration.*
```shell
-python3 tools/train.py -c configs/det/det_mv3_db.yml
+python3 tools/train.py -c configs/det/det_mv3_db.yml 2>&1 | tee train_det.log
```
In the above instruction, use `-c` to select the training to use the `configs/det/det_db_mv3.yml` configuration file.
@@ -73,7 +73,7 @@ You can also use `-o` to change the training parameters without modifying the ym
python3 tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001
```
-#### load trained model and conntinue training
+#### load trained model and continue training
If you expect to load trained model and continue the training again, you can specify the parameter `Global.checkpoints` as the model path to be loaded.
For example:
diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md
index 83ec2a90..1cdad899 100644
--- a/doc/doc_en/inference_en.md
+++ b/doc/doc_en/inference_en.md
@@ -12,25 +12,29 @@ Next, we first introduce how to convert a trained model into an inference model,
- [CONVERT TRAINING MODEL TO INFERENCE MODEL](#CONVERT)
- [Convert detection model to inference model](#Convert_detection_model)
- [Convert recognition model to inference model](#Convert_recognition_model)
-
-
+ - [Convert angle classification model to inference model](#Convert_angle_class_model)
+
+
- [TEXT DETECTION MODEL INFERENCE](#DETECTION_MODEL_INFERENCE)
- [1. LIGHTWEIGHT CHINESE DETECTION MODEL INFERENCE](#LIGHTWEIGHT_DETECTION)
- [2. DB TEXT DETECTION MODEL INFERENCE](#DB_DETECTION)
- [3. EAST TEXT DETECTION MODEL INFERENCE](#EAST_DETECTION)
- [4. SAST TEXT DETECTION MODEL INFERENCE](#SAST_DETECTION)
-
+ - [5. Multilingual model inference](#Multilingual model inference)
+
- [TEXT RECOGNITION MODEL INFERENCE](#RECOGNITION_MODEL_INFERENCE)
- [1. LIGHTWEIGHT CHINESE MODEL](#LIGHTWEIGHT_RECOGNITION)
- [2. CTC-BASED TEXT RECOGNITION MODEL INFERENCE](#CTC-BASED_RECOGNITION)
- [3. ATTENTION-BASED TEXT RECOGNITION MODEL INFERENCE](#ATTENTION-BASED_RECOGNITION)
- [4. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY](#USING_CUSTOM_CHARACTERS)
-
-
-- [TEXT DETECTION AND RECOGNITION INFERENCE CONCATENATION](#CONCATENATION)
+
+- [ANGLE CLASSIFICATION MODEL INFERENCE](#ANGLE_CLASS_MODEL_INFERENCE)
+ - [1. ANGLE CLASSIFICATION MODEL INFERENCE](#ANGLE_CLASS_MODEL_INFERENCE)
+
+- [TEXT DETECTION ANGLE CLASSIFICATION AND RECOGNITION INFERENCE CONCATENATION](#CONCATENATION)
- [1. LIGHTWEIGHT CHINESE MODEL](#LIGHTWEIGHT_CHINESE_MODEL)
- [2. OTHER MODELS](#OTHER_MODELS)
-
+
## CONVERT TRAINING MODEL TO INFERENCE MODEL
@@ -87,6 +91,33 @@ After the conversion is successful, there are two files in the directory:
└─ params Identify the parameter files of the inference model
```
+
+### Convert angle classification model to inference model
+
+Download the angle classification model:
+```
+wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile-v1.1.cls_pre.tar && tar xf ./ch_lite/ch_ppocr_mobile-v1.1.cls_pre.tar -C ./ch_lite/
+```
+
+The angle classification model is converted to the inference model in the same way as the detection, as follows:
+```
+# -c Set the training algorithm yml configuration file
+# -o Set optional parameters
+# Global.checkpoints parameter Set the training model address to be converted without adding the file suffix .pdmodel, .pdopt or .pdparams.
+# Global.save_inference_dir Set the address where the converted model will be saved.
+
+python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.checkpoints=./ch_lite/cls_model/best_accuracy \
+ Global.save_inference_dir=./inference/cls/
+```
+
+After the conversion is successful, there are two files in the directory:
+```
+/inference/cls/
+ └─ model Identify the saved model files
+ └─ params Identify the parameter files of the inference model
+```
+
+
## TEXT DETECTION MODEL INFERENCE
@@ -276,16 +307,57 @@ If the chars dictionary is modified during training, you need to specify the new
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path"
```
+
+
+### 5. Multilingual Model Reasoning
+If you need to predict other language models, when using inference model prediction, you need to specify the dictionary path used by `--rec_char_dict_path`. At the same time, in order to get the correct visualization results,
+You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/` path, such as Korean recognition:
+
+```
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/ utils/korean_dict.txt" --vis_font_path="doc/korean.ttf"
+```
+
+
+After executing the command, the prediction result of the above figure is:
+
+``` text
+2020-09-19 16:15:05,076-INFO: index: [205 206 38 39]
+2020-09-19 16:15:05,077-INFO: word : 바탕으로
+2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
+```
+
+
+## ANGLE CLASSIFICATION MODEL INFERENCE
+
+The following will introduce the angle classification model inference.
+
+
+
+### 1.ANGLE CLASSIFICATION MODEL INFERENCE
+
+For angle classification model inference, you can execute the following commands:
+
+```
+python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
+```
+
+
+
+After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
+
+Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999963]
+
+
-## TEXT DETECTION AND RECOGNITION INFERENCE CONCATENATION
+## TEXT DETECTION ANGLE CLASSIFICATION AND RECOGNITION INFERENCE CONCATENATION
### 1. LIGHTWEIGHT CHINESE MODEL
-When performing prediction, you need to specify the path of a single image or a folder of images through the parameter `image_dir`, the parameter `det_model_dir` specifies the path to detect the inference model, and the parameter `rec_model_dir` specifies the path to identify the inference model. The visualized recognition results are saved to the `./inference_results` folder by default.
+When performing prediction, you need to specify the path of a single image or a folder of images through the parameter `image_dir`, the parameter `det_model_dir` specifies the path to detect the inference model, the parameter `cls_model_dir` specifies the path to angle classification inference model and the parameter `rec_model_dir` specifies the path to identify the inference model. The parameter `use_angle_cls` is used to control whether to enable the angle classification model.The visualized recognition results are saved to the `./inference_results` folder by default.
```
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --rec_model_dir="./inference/rec_crnn/"
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/2.jpg" --det_model_dir="./inference/det_db/" --cls_model_dir="./inference/cls/" --rec_model_dir="./inference/rec_crnn/" --use_angle_cls true
```
After executing the command, the recognition result image is as follows:
diff --git a/doc/doc_en/installation_en.md b/doc/doc_en/installation_en.md
index 37f66b05..b62d9b29 100644
--- a/doc/doc_en/installation_en.md
+++ b/doc/doc_en/installation_en.md
@@ -7,7 +7,7 @@ PaddleOCR working environment:
- python3.7
- glibc 2.23
-It is recommended to use the docker provided by us to run PaddleOCR, please refer to the use of docker [link](https://docs.docker.com/get-started/).
+It is recommended to use the docker provided by us to run PaddleOCR, please refer to the use of docker [link](https://www.runoob.com/docker/docker-tutorial.html/).
*If you want to directly run the prediction code on mac or windows, you can start from step 2.*
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
new file mode 100644
index 00000000..71878ac3
--- /dev/null
+++ b/doc/doc_en/models_list_en.md
@@ -0,0 +1,70 @@
+## OCR model list(V1.1, updated on 9.22)
+
+- [1. Text Detection Model](#Detection)
+- [2. Text Recognition Model](#Recognition)
+ - [Chinese Recognition Model](#Chinese)
+ - [English Recognition Model](#English)
+ - [Multilingual Recognition Model](#Multilingual)
+- [3. Text Angle Classification Model](#Angle)
+
+The downloadable models provided by PaddleOCR include `inference model`, `trained model`, `pre-trained model` and `slim model`. The differences between the models are as follows:
+
+|model type|model format|description|
+|-|-|-|
+|inference model|model、params|Used for reasoning based on Python prediction engine. [detail](./inference_en.md)|
+|trained model / pre-trained model|\*.pdmodel、\*.pdopt、\*.pdparams|The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.|
+|slim model|\*.nb|Generally used for Lite deployment|
+
+
+
+### 1. Text Detection Model
+|model name|description|model size|download|
+|-|-|-|-|
+|ch_ppocr_mobile_slim_v1.1_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|1.4M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_opt.nb)|
+|ch_ppocr_mobile_v1.1_det|Original lightweight model, supporting Chinese, English, multilingual text detection|2.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_train.tar)|
+|ch_ppocr_server_v1.1_det|General model, which is larger than the lightweight model, but achieved better performance|47.2M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_train.tar)|
+
+
+
+### 2. Text Recognition Model
+
+
+#### Chinese Recognition Model
+|model name|description|model size|download|
+|-|-|-|-|
+|ch_ppocr_mobile_slim_v1.1_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|1.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_opt.nb)|
+|ch_ppocr_mobile_v1.1_rec|Original lightweight model, supporting Chinese, English and number recognition|4.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_pre.tar)|
+|ch_ppocr_server_v1.1_rec|General model, supporting Chinese, English and number recognition|105M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_pre.tar)|
+
+**Note:** The `trained model` is finetuned on the `pre-trained model` with real data and synthsized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthsized data, which is more suitable for finetune on your own dataset.
+
+
+#### English Recognition Model
+|model name|description|model size|download|
+|-|-|-|-|
+|en_ppocr_mobile_slim_v1.1_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|0.9M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_opt.nb)|
+|en_ppocr_mobile_v1.1_rec|Original lightweight model, supporting English and number recognition|2.0M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_train.tar)|
+
+
+#### Multilingual Recognition Model(Updating...)
+|model name|description|model size|download|
+|-|-|-|-|
+| french_ppocr_mobile_v1.1_rec |Lightweight model for French recognition|2.1M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_train.tar)|
+| german_ppocr_mobile_v1.1_rec |German model for French recognition|2.1M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_train.tar)|
+| korean_ppocr_mobile_v1.1_rec |Lightweight model for Korean recognition|3.4M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_train.tar)|
+| japan_ppocr_mobile_v1.1_rec |Lightweight model for Japanese recognition|3.7M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_train.tar)|
+
+
+
+### 3. Text Angle Classification Model
+|model name|description|model size|download|
+|-|-|-|-|
+|ch_ppocr_mobile_v1.1_cls_quant|Slim quantized model|0.5M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_train.tar) / [slim model]()|
+|ch_ppocr_mobile_v1.1_cls|Original model|850kb|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_train.tar)|
+
+
+## OCR model list(V1.0, updated on 7.16)
+|model name|description|detection model|recognition model|recognition model supporting space recognition|
+|-|-|-|-|-|
+|chinese_db_crnn_mobile|8.6M lightweight OCR model|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance.tar)
+|chinese_db_crnn_server|General OCR model|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar)|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance.tar)
diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md
index b9c42afa..c9d69b3d 100644
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -130,8 +130,8 @@ Start training:
export PYTHONPATH=$PYTHONPATH:.
# GPU training Support single card and multi-card training, specify the card number through CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0,1,2,3
-# Training icdar15 English data
-python3 tools/train.py -c configs/rec/rec_icdar15_train.yml
+# Training icdar15 English data and saving the log as train_rec.log
+python3 tools/train.py -c configs/rec/rec_icdar15_train.yml 2>&1 | tee train_rec.log
```
- Data Augmentation
@@ -201,7 +201,19 @@ Optimizer:
```
**Note that the configuration file for prediction/evaluation must be consistent with the training.**
+-Minor language
+PaddleOCR also provides multi-language. The configuration file in `configs/rec/multi_languages` provides multi-language configuration files. Currently, the multi-language algorithms supported by PaddleOCR are:
+
+| Configuration file | Algorithm name | backbone | trans | seq | pred | language |
+| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
+| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English |
+| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French |
+| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German |
+| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese |
+| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean |
+
+The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded on [Baidu Netdisk]().
### EVALUATION
diff --git a/doc/doc_en/tree_en.md b/doc/doc_en/tree_en.md
new file mode 100644
index 00000000..5919013b
--- /dev/null
+++ b/doc/doc_en/tree_en.md
@@ -0,0 +1,208 @@
+# Overall directory structure
+
+The overall directory structure of PaddleOCR is introduced as follows:
+
+```
+PaddleOCR
+├── configs // configuration file, you can select model structure and modify hyperparameters through yml file
+│ ├── cls // Related configuration files of direction classifier
+│ │ ├── cls_mv3.yml // training configuration related, including backbone network, head, loss, optimizer
+│ │ └── cls_reader.yml // Data reading related, data reading method, data storage path
+│ ├── det // Detection related configuration files
+│ │ ├── det_db_icdar15_reader.yml // data read
+│ │ ├── det_mv3_db.yml // training configuration
+│ │ ...
+│ └── rec // Identify related configuration files
+│ ├── rec_benchmark_reader.yml // LMDB format data reading related
+│ ├── rec_chinese_common_train.yml // General Chinese training configuration
+│ ├── rec_icdar15_reader.yml // simple data reading related, including data reading function, data path, label file
+│ ...
+├── deploy // deployment related
+│ ├── android_demo // android_demo
+│ │ ...
+│ ├── cpp_infer // C++ infer
+│ │ ├── CMakeLists.txt // Cmake file
+│ │ ├── docs // documentation
+│ │ │ └── windows_vs2019_build.md
+│ │ ├── include
+│ │ │ ├── clipper.h // clipper library
+│ │ │ ├── config.h // infer configuration
+│ │ │ ├── ocr_cls.h // direction classifier
+│ │ │ ├── ocr_det.h // text detection
+│ │ │ ├── ocr_rec.h // text recognition
+│ │ │ ├── postprocess_op.h // postprocess after detection
+│ │ │ ├── preprocess_op.h // preprocess detection
+│ │ │ └── utility.h // tools
+│ │ ├── readme.md // documentation
+│ │ ├── ...
+│ │ ├── src // source file
+│ │ │ ├── clipper.cpp
+│ │ │ ├── config.cpp
+│ │ │ ├── main.cpp
+│ │ │ ├── ocr_cls.cpp
+│ │ │ ├── ocr_det.cpp
+│ │ │ ├── ocr_rec.cpp
+│ │ │ ├── postprocess_op.cpp
+│ │ │ ├── preprocess_op.cpp
+│ │ │ └── utility.cpp
+│ │ └── tools // compile and execute script
+│ │ ├── build.sh // compile script
+│ │ ├── config.txt // configuration file
+│ │ └── run.sh // Test startup script
+│ ├── docker
+│ │ └── hubserving
+│ │ ├── cpu
+│ │ │ └── Dockerfile
+│ │ ├── gpu
+│ │ │ └── Dockerfile
+│ │ ├── README_cn.md
+│ │ ├── README.md
+│ │ └── sample_request.txt
+│ ├── hubserving // hubserving
+│ │ ├── ocr_det // text detection
+│ │ │ ├── config.json // serving configuration
+│ │ │ ├── __init__.py
+│ │ │ ├── module.py // prediction model
+│ │ │ └── params.py // prediction parameters
+│ │ ├── ocr_rec // text recognition
+│ │ │ ├── config.json
+│ │ │ ├── __init__.py
+│ │ │ ├── module.py
+│ │ │ └── params.py
+│ │ └── ocr_system // system forecast
+│ │ ├── config.json
+│ │ ├── __init__.py
+│ │ ├── module.py
+│ │ └── params.py
+│ ├── imgs // prediction picture
+│ │ ├── cpp_infer_pred_12.png
+│ │ └── demo.png
+│ ├── ios_demo // ios demo
+│ │ ...
+│ ├── lite // lite deployment
+│ │ ├── cls_process.cc // direction classifier data processing
+│ │ ├── cls_process.h
+│ │ ├── config.txt // check configuration parameters
+│ │ ├── crnn_process.cc // crnn data processing
+│ │ ├── crnn_process.h
+│ │ ├── db_post_process.cc // db data processing
+│ │ ├── db_post_process.h
+│ │ ├── Makefile // compile file
+│ │ ├── ocr_db_crnn.cc // series prediction
+│ │ ├── prepare.sh // data preparation
+│ │ ├── readme.md // documentation
+│ │ ...
+│ ├── pdserving // pdserving deployment
+│ │ ├── det_local_server.py // fast detection version, easy deployment and fast prediction
+│ │ ├── det_web_server.py // Full version of detection, high stability and distributed deployment
+│ │ ├── ocr_local_server.py // detection + identification quick version
+│ │ ├── ocr_web_client.py // client
+│ │ ├── ocr_web_server.py // detection + identification full version
+│ │ ├── readme.md // documentation
+│ │ ├── rec_local_server.py // recognize quick version
+│ │ └── rec_web_server.py // Identify the full version
+│ └── slim
+│ └── quantization // quantization related
+│ ├── export_model.py // export model
+│ ├── quant.py // quantization
+│ └── README.md // Documentation
+├── doc // Documentation tutorial
+│ ...
+├── paddleocr.py
+├── ppocr // network core code
+│ ├── data // data processing
+│ │ ├── cls // direction classifier
+│ │ │ ├── dataset_traversal.py // Data transmission, define data reader, read data and form batch
+│ │ │ └── randaugment.py // Random data augmentation operation
+│ │ ├── det // detection
+│ │ │ ├── data_augment.py // data augmentation operation
+│ │ │ ├── dataset_traversal.py // Data transmission, define data reader, read data and form batch
+│ │ │ ├── db_process.py // db data processing
+│ │ │ ├── east_process.py // east data processing
+│ │ │ ├── make_border_map.py // Generate boundary map
+│ │ │ ├── make_shrink_map.py // Generate shrink map
+│ │ │ ├── random_crop_data.py // random crop
+│ │ │ └── sast_process.py // sast data processing
+│ │ ├── reader_main.py // main function of data reader
+│ │ └── rec // recognation
+│ │ ├── dataset_traversal.py // Data transmission, define data reader, including LMDB_Reader and Simple_Reader
+│ │ └── img_tools.py // Data processing related, including data normalization and disturbance
+│ ├── __init__.py
+│ ├── modeling // networking related
+│ │ ├── architectures // Model architecture, which defines the various modules required by the model
+│ │ │ ├── cls_model.py // direction classifier
+│ │ │ ├── det_model.py // detection
+│ │ │ └── rec_model.py // recognition
+│ │ ├── backbones // backbone network
+│ │ │ ├── det_mobilenet_v3.py // detect mobilenet_v3
+│ │ │ ├── det_resnet_vd.py
+│ │ │ ├── det_resnet_vd_sast.py
+│ │ │ ├── rec_mobilenet_v3.py // recognize mobilenet_v3
+│ │ │ ├── rec_resnet_fpn.py
+│ │ │ └── rec_resnet_vd.py
+│ │ ├── common_functions.py // common functions
+│ │ ├── heads
+│ │ │ ├── cls_head.py // class header
+│ │ │ ├── det_db_head.py // db detection head
+│ │ │ ├── det_east_head.py // east detection head
+│ │ │ ├── det_sast_head.py // sast detection head
+│ │ │ ├── rec_attention_head.py // recognition attention
+│ │ │ ├── rec_ctc_head.py // recognition ctc
+│ │ │ ├── rec_seq_encoder.py // recognition sequence code
+│ │ │ ├── rec_srn_all_head.py // srn related
+│ │ │ └── self_attention // srn attention
+│ │ │ └── model.py
+│ │ ├── losses // loss function
+│ │ │ ├── cls_loss.py // Directional classifier loss function
+│ │ │ ├── det_basic_loss.py // detect basic loss
+│ │ │ ├── det_db_loss.py // DB loss
+│ │ │ ├── det_east_loss.py // EAST loss
+│ │ │ ├── det_sast_loss.py // SAST loss
+│ │ │ ├── rec_attention_loss.py // attention loss
+│ │ │ ├── rec_ctc_loss.py // ctc loss
+│ │ │ └── rec_srn_loss.py // srn loss
+│ │ └── stns // Spatial transformation network
+│ │ └── tps.py // TPS conversion
+│ ├── optimizer.py // optimizer
+│ ├── postprocess // post-processing
+│ │ ├── db_postprocess.py // DB postprocess
+│ │ ├── east_postprocess.py // East postprocess
+│ │ ├── lanms // lanms related
+│ │ │ ...
+│ │ ├── locality_aware_nms.py // nms
+│ │ └── sast_postprocess.py // sast post-processing
+│ └── utils // tools
+│ ├── character.py // Character processing, including text encoding and decoding, and calculation of prediction accuracy
+│ ├── check.py // parameter loading check
+│ ├── ic15_dict.txt // English number dictionary, case sensitive
+│ ├── ppocr_keys_v1.txt // Chinese dictionary, used to train Chinese models
+│ ├── save_load.py // model save and load function
+│ ├── stats.py // Statistics
+│ └── utility.py // Tool functions, including related check tools such as whether the input parameters are legal
+├── README_en.md // documentation
+├── README.md
+├── requirments.txt // installation dependencies
+├── setup.py // whl package packaging script
+└── tools // start tool
+ ├── eval.py // evaluation function
+ ├── eval_utils // evaluation tools
+ │ ├── eval_cls_utils.py // category related
+ │ ├── eval_det_iou.py // detect iou related
+ │ ├── eval_det_utils.py // detection related
+ │ ├── eval_rec_utils.py // recognition related
+ │ └── __init__.py
+ ├── export_model.py // export infer model
+ ├── infer // Forecast based on prediction engine
+ │ ├── predict_cls.py
+ │ ├── predict_det.py
+ │ ├── predict_rec.py
+ │ ├── predict_system.py
+ │ └── utility.py
+ ├── infer_cls.py // Predict classification based on training engine
+ ├── infer_det.py // Predictive detection based on training engine
+ ├── infer_rec.py // Predictive recognition based on training engine
+ ├── program.py // overall process
+ ├── test_hubserving.py
+ └── train.py // start training
+
+```
diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md
index 73ab78c1..4049d9dc 100644
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -10,14 +10,52 @@ pip install paddleocr
build own whl package and install
```bash
python setup.py bdist_wheel
-pip install dist/paddleocr-0.0.3-py3-none-any.whl
+pip install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x is the version of paddleocr
```
### 1. Use by code
+* detection classification and recognition
+```python
+from paddleocr import PaddleOCR,draw_ocr
+# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+# You can set the parameter `lang` as `zh`, `en`, `french`, `german`, `korean`, `japan`
+# to switch the language model in order.
+ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+ print(line)
+
+
+# draw result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
+Output will be a list, each item contains bounding box, text and recognition confidence
+```bash
+[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+......
+```
+
+Visualization of results
+
+
+
+
+
* detection and recognition
```python
from paddleocr import PaddleOCR,draw_ocr
-ocr = PaddleOCR() # need to run only once to download and load model into memory
+ocr = PaddleOCR(lang='en') # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
result = ocr.ocr(img_path)
for line in result:
@@ -48,6 +86,21 @@ Visualization of results
+* classification and recognition
+```python
+from paddleocr import PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to load model into memory
+img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
+result = ocr.ocr(img_path, det=False, cls=True)
+for line in result:
+ print(line)
+```
+
+Output will be a list, each item contains recognition text and confidence
+```bash
+['PAIN', 0.990372]
+```
+
* only detection
```python
from paddleocr import PaddleOCR,draw_ocr
@@ -83,18 +136,33 @@ Visualization of results
* only recognition
```python
from paddleocr import PaddleOCR
-ocr = PaddleOCR() # need to run only once to load model into memory
+ocr = PaddleOCR(lang='en') # need to run only once to load model into memory
img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
-result = ocr.ocr(img_path,det=False)
+result = ocr.ocr(img_path, det=False, cls=False)
for line in result:
print(line)
```
-Output will be a list, each item contains text and recognition confidence
+Output will be a list, each item contains recognition text and confidence
```bash
['PAIN', 0.990372]
```
+* only classification
+```python
+from paddleocr import PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True) # need to run only once to load model into memory
+img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
+result = ocr.ocr(img_path, det=False, rec=False, cls=True)
+for line in result:
+ print(line)
+```
+
+Output will be a list, each item contains classification result and confidence
+```bash
+['0', 0.99999964]
+```
+
### Use by command line
show help information
@@ -102,9 +170,9 @@ show help information
paddleocr -h
```
-* detection and recognition
+* detection classification and recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --use_angle_cls true -cls true --lang en
```
Output will be a list, each item contains bounding box, text and recognition confidence
@@ -115,6 +183,29 @@ Output will be a list, each item contains bounding box, text and recognition con
......
```
+* detection and recognition
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en
+```
+
+Output will be a list, each item contains bounding box, text and recognition confidence
+```bash
+[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+......
+```
+
+* classification and recognition
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --lang en
+```
+
+Output will be a list, each item contains text and recognition confidence
+```bash
+['PAIN', 0.990372]
+```
+
* only detection
```bash
paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false
@@ -130,7 +221,7 @@ Output will be a list, each item only contains bounding box
* only recognition
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false --cls false --lang en
```
Output will be a list, each item contains text and recognition confidence
@@ -138,6 +229,16 @@ Output will be a list, each item contains text and recognition confidence
['PAIN', 0.990372]
```
+* only classification
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --use_angle_cls true -cls true --det false --rec false
+```
+
+Output will be a list, each item contains classification result and confidence
+```bash
+['0', 0.99999964]
+```
+
## Use custom model
When the built-in model cannot meet the needs, you need to use your own trained model.
First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows
@@ -147,9 +248,9 @@ First, refer to the first section of [inference_en.md](./inference_en.md) to con
```python
from paddleocr import PaddleOCR,draw_ocr
# The path of detection and recognition model must contain model and params files
-ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}å')
+ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
-result = ocr.ocr(img_path)
+result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
@@ -167,7 +268,7 @@ im_show.save('result.jpg')
### Use by command line
```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir}
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
```
## Parameter Description
@@ -194,6 +295,14 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
| max_text_length | The maximum text length that the recognition algorithm can recognize | 25 |
| rec_char_dict_path | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2 | ./ppocr/utils/ppocr_keys_v1.txt |
| use_space_char | Whether to recognize spaces | TRUE |
+| use_angle_cls | Whether to load classification model | FALSE |
+| cls_model_dir | the classification inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/cls`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None |
+| cls_image_shape | image shape of classification algorithm | "3,48,192" |
+| label_list | label list of classification algorithm | ['0','180'] |
+| cls_batch_num | When performing classification, the batchsize of forward images | 30 |
| enable_mkldnn | Whether to enable mkldnn | FALSE |
+| use_zero_copy_run | Whether to forward by zero_copy_run | FALSE |
+| lang | The support language, now only chinese(ch) and english(en) are supported | ch |
| det | Enable detction when `ppocr.ocr` func exec | TRUE |
-| rec | Enable detction when `ppocr.ocr` func exec | TRUE |
+| rec | Enable recognition when `ppocr.ocr` func exec | TRUE |
+| cls | Enable classification when `ppocr.ocr` func exec | FALSE |
diff --git a/doc/french.ttf b/doc/french.ttf
new file mode 100644
index 00000000..ab68fb19
Binary files /dev/null and b/doc/french.ttf differ
diff --git a/doc/german.ttf b/doc/german.ttf
new file mode 100644
index 00000000..ab68fb19
Binary files /dev/null and b/doc/german.ttf differ
diff --git a/doc/imgs_results/1101.jpg b/doc/imgs_results/1101.jpg
new file mode 100644
index 00000000..fa8d809a
Binary files /dev/null and b/doc/imgs_results/1101.jpg differ
diff --git a/doc/imgs_results/1102.jpg b/doc/imgs_results/1102.jpg
new file mode 100644
index 00000000..6988b12c
Binary files /dev/null and b/doc/imgs_results/1102.jpg differ
diff --git a/doc/imgs_results/1103.jpg b/doc/imgs_results/1103.jpg
new file mode 100644
index 00000000..3437f60b
Binary files /dev/null and b/doc/imgs_results/1103.jpg differ
diff --git a/doc/imgs_results/1104.jpg b/doc/imgs_results/1104.jpg
new file mode 100644
index 00000000..9297be07
Binary files /dev/null and b/doc/imgs_results/1104.jpg differ
diff --git a/doc/imgs_results/1105.jpg b/doc/imgs_results/1105.jpg
new file mode 100644
index 00000000..6280e5ee
Binary files /dev/null and b/doc/imgs_results/1105.jpg differ
diff --git a/doc/imgs_results/1106.jpg b/doc/imgs_results/1106.jpg
new file mode 100644
index 00000000..61f3915d
Binary files /dev/null and b/doc/imgs_results/1106.jpg differ
diff --git a/doc/imgs_results/1110.jpg b/doc/imgs_results/1110.jpg
new file mode 100644
index 00000000..b0c63e7c
Binary files /dev/null and b/doc/imgs_results/1110.jpg differ
diff --git a/doc/imgs_results/1112.jpg b/doc/imgs_results/1112.jpg
new file mode 100644
index 00000000..35bec155
Binary files /dev/null and b/doc/imgs_results/1112.jpg differ
diff --git a/doc/imgs_words/french/1.jpg b/doc/imgs_words/french/1.jpg
new file mode 100644
index 00000000..077ca28e
Binary files /dev/null and b/doc/imgs_words/french/1.jpg differ
diff --git a/doc/imgs_words/french/2.jpg b/doc/imgs_words/french/2.jpg
new file mode 100644
index 00000000..38a73caa
Binary files /dev/null and b/doc/imgs_words/french/2.jpg differ
diff --git a/doc/imgs_words/german/1.jpg b/doc/imgs_words/german/1.jpg
new file mode 100644
index 00000000..d26ec9ed
Binary files /dev/null and b/doc/imgs_words/german/1.jpg differ
diff --git a/doc/imgs_words/japan/1.jpg b/doc/imgs_words/japan/1.jpg
new file mode 100644
index 00000000..68487974
Binary files /dev/null and b/doc/imgs_words/japan/1.jpg differ
diff --git a/doc/imgs_words/korean/1.jpg b/doc/imgs_words/korean/1.jpg
new file mode 100644
index 00000000..48a89389
Binary files /dev/null and b/doc/imgs_words/korean/1.jpg differ
diff --git a/doc/imgs_words/korean/2.jpg b/doc/imgs_words/korean/2.jpg
new file mode 100644
index 00000000..b24f2891
Binary files /dev/null and b/doc/imgs_words/korean/2.jpg differ
diff --git a/doc/japan.ttc b/doc/japan.ttc
new file mode 100644
index 00000000..ad68243b
Binary files /dev/null and b/doc/japan.ttc differ
diff --git a/doc/korean.ttf b/doc/korean.ttf
new file mode 100644
index 00000000..e638ce37
Binary files /dev/null and b/doc/korean.ttf differ
diff --git a/doc/ppocr_framework.png b/doc/ppocr_framework.png
new file mode 100644
index 00000000..ab51c88f
Binary files /dev/null and b/doc/ppocr_framework.png differ
diff --git a/paddleocr.py b/paddleocr.py
index d3d73cb1..7e9b2402 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -33,10 +33,43 @@ from ppocr.utils.utility import check_and_read_gif, get_image_file_list
__all__ = ['PaddleOCR']
-model_params = {
- 'det': 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar',
- 'rec':
- 'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar',
+model_urls = {
+ 'det':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar',
+ 'rec': {
+ 'ch': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
+ },
+ 'en': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/ic15_dict.txt'
+ },
+ 'french': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/french_dict.txt'
+ },
+ 'german': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/german_dict.txt'
+ },
+ 'korean': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/korean_dict.txt'
+ },
+ 'japan': {
+ 'url':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar',
+ 'dict_path': './ppocr/utils/japan_dict.txt'
+ }
+ },
+ 'cls':
+ 'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar'
}
SUPPORT_DET_MODEL = ['DB']
@@ -120,16 +153,24 @@ def parse_args():
parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=30)
parser.add_argument("--max_text_length", type=int, default=25)
- parser.add_argument(
- "--rec_char_dict_path",
- type=str,
- default="./ppocr/utils/ppocr_keys_v1.txt")
+ parser.add_argument("--rec_char_dict_path", type=str, default=None)
parser.add_argument("--use_space_char", type=bool, default=True)
- parser.add_argument("--enable_mkldnn", type=bool, default=False)
+ # params for text classifier
+ parser.add_argument("--use_angle_cls", type=str2bool, default=False)
+ parser.add_argument("--cls_model_dir", type=str, default=None)
+ parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
+ parser.add_argument("--label_list", type=list, default=['0', '180'])
+ parser.add_argument("--cls_batch_num", type=int, default=30)
+ parser.add_argument("--cls_thresh", type=float, default=0.9)
+
+ parser.add_argument("--enable_mkldnn", type=bool, default=False)
+ parser.add_argument("--use_zero_copy_run", type=bool, default=False)
+
+ parser.add_argument("--lang", type=str, default='ch')
parser.add_argument("--det", type=str2bool, default=True)
parser.add_argument("--rec", type=str2bool, default=True)
- parser.add_argument("--use_zero_copy_run", type=bool, default=False)
+ parser.add_argument("--cls", type=str2bool, default=False)
return parser.parse_args()
@@ -142,16 +183,30 @@ class PaddleOCR(predict_system.TextSystem):
"""
postprocess_params = parse_args()
postprocess_params.__dict__.update(**kwargs)
+ self.use_angle_cls = postprocess_params.use_angle_cls
+ lang = postprocess_params.lang
+ assert lang in model_urls[
+ 'rec'], 'param lang must in {}, but got {}'.format(
+ model_urls['rec'].keys(), lang)
+ if postprocess_params.rec_char_dict_path is None:
+ postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
+ 'dict_path']
# init model dir
if postprocess_params.det_model_dir is None:
postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
if postprocess_params.rec_model_dir is None:
- postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec')
+ postprocess_params.rec_model_dir = os.path.join(
+ BASE_DIR, 'rec/{}'.format(lang))
+ if postprocess_params.cls_model_dir is None:
+ postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
print(postprocess_params)
# download model
- maybe_download(postprocess_params.det_model_dir, model_params['det'])
- maybe_download(postprocess_params.rec_model_dir, model_params['rec'])
+ maybe_download(postprocess_params.det_model_dir, model_urls['det'])
+ maybe_download(postprocess_params.rec_model_dir,
+ model_urls['rec'][lang]['url'])
+ if self.use_angle_cls:
+ maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
@@ -166,7 +221,7 @@ class PaddleOCR(predict_system.TextSystem):
# init det_model and rec_model
super().__init__(postprocess_params)
- def ocr(self, img, det=True, rec=True):
+ def ocr(self, img, det=True, rec=True, cls=False):
"""
ocr with paddleocr
args:
@@ -175,6 +230,10 @@ class PaddleOCR(predict_system.TextSystem):
rec: use text recognition or not, if false, only det will be exec. default is True
"""
assert isinstance(img, (np.ndarray, list, str))
+ if cls and not self.use_angle_cls:
+ print('cls should be false when use_angle_cls is false')
+ exit(-1)
+ self.use_angle_cls = cls
if isinstance(img, str):
image_file = img
img, flag = check_and_read_gif(image_file)
@@ -194,6 +253,10 @@ class PaddleOCR(predict_system.TextSystem):
else:
if not isinstance(img, list):
img = [img]
+ if self.use_angle_cls:
+ img, cls_res, elapse = self.text_classifier(img)
+ if not rec:
+ return cls_res
rec_res, elapse = self.text_recognizer(img)
return rec_res
@@ -208,6 +271,9 @@ def main():
ocr_engine = PaddleOCR()
for img_path in image_file_list:
print(img_path)
- result = ocr_engine.ocr(img_path, det=args.det, rec=args.rec)
+ result = ocr_engine.ocr(img_path,
+ det=args.det,
+ rec=args.rec,
+ cls=args.cls)
for line in result:
- print(line)
\ No newline at end of file
+ print(line)
diff --git a/ppocr/data/cls/__init__.py b/ppocr/data/cls/__init__.py
new file mode 100755
index 00000000..abf198b9
--- /dev/null
+++ b/ppocr/data/cls/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ppocr/data/cls/dataset_traversal.py b/ppocr/data/cls/dataset_traversal.py
new file mode 100755
index 00000000..01f8c89c
--- /dev/null
+++ b/ppocr/data/cls/dataset_traversal.py
@@ -0,0 +1,144 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import math
+import random
+import numpy as np
+import cv2
+
+from ppocr.utils.utility import initial_logger
+from ppocr.utils.utility import get_image_file_list
+
+logger = initial_logger()
+
+from ppocr.data.rec.img_tools import resize_norm_img, warp
+from ppocr.data.cls.randaugment import RandAugment
+
+
+def random_crop(img):
+ img_h, img_w = img.shape[:2]
+ if img_w > img_h * 4:
+ w = random.randint(img_h * 2, img_w)
+ i = random.randint(0, img_w - w)
+
+ img = img[:, i:i + w, :]
+ return img
+
+
+class SimpleReader(object):
+ def __init__(self, params):
+ if params['mode'] != 'train':
+ self.num_workers = 1
+ else:
+ self.num_workers = params['num_workers']
+ if params['mode'] != 'test':
+ self.img_set_dir = params['img_set_dir']
+ self.label_file_path = params['label_file_path']
+ self.use_gpu = params['use_gpu']
+ self.image_shape = params['image_shape']
+ self.mode = params['mode']
+ self.infer_img = params['infer_img']
+ self.use_distort = params['mode'] == 'train' and params['distort']
+ self.randaug = RandAugment()
+ self.label_list = params['label_list']
+ if "distort" in params:
+ self.use_distort = params['distort'] and params['use_gpu']
+ if not params['use_gpu']:
+ logger.info(
+ "Distort operation can only support in GPU.Distort will be set to False."
+ )
+ if params['mode'] == 'train':
+ self.batch_size = params['train_batch_size_per_card']
+ self.drop_last = True
+ else:
+ self.batch_size = params['test_batch_size_per_card']
+ self.drop_last = False
+ self.use_distort = False
+
+ def __call__(self, process_id):
+ if self.mode != 'train':
+ process_id = 0
+
+ def get_device_num():
+ if self.use_gpu:
+ gpus = os.environ.get("CUDA_VISIBLE_DEVICES", 1)
+ gpu_num = len(gpus.split(','))
+ return gpu_num
+ else:
+ cpu_num = os.environ.get("CPU_NUM", 1)
+ return int(cpu_num)
+
+ def sample_iter_reader():
+ if self.mode != 'train' and self.infer_img is not None:
+ image_file_list = get_image_file_list(self.infer_img)
+ for single_img in image_file_list:
+ img = cv2.imread(single_img)
+ if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ norm_img = resize_norm_img(img, self.image_shape)
+
+ norm_img = norm_img[np.newaxis, :]
+ yield norm_img
+ else:
+ with open(self.label_file_path, "rb") as fin:
+ label_infor_list = fin.readlines()
+ img_num = len(label_infor_list)
+ img_id_list = list(range(img_num))
+ random.shuffle(img_id_list)
+ if sys.platform == "win32" and self.num_workers != 1:
+ print("multiprocess is not fully compatible with Windows."
+ "num_workers will be 1.")
+ self.num_workers = 1
+ if self.batch_size * get_device_num(
+ ) * self.num_workers > img_num:
+ raise Exception(
+ "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
+ format(img_num, self.batch_size * get_device_num() *
+ self.num_workers))
+ for img_id in range(process_id, img_num, self.num_workers):
+ label_infor = label_infor_list[img_id_list[img_id]]
+ substr = label_infor.decode('utf-8').strip("\n").split("\t")
+ label = self.label_list.index(substr[1])
+
+ img_path = self.img_set_dir + "/" + substr[0]
+ img = cv2.imread(img_path)
+ if img is None:
+ logger.info("{} does not exist!".format(img_path))
+ continue
+ if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+ if self.use_distort:
+ img = warp(img, 10)
+ img = self.randaug(img)
+ norm_img = resize_norm_img(img, self.image_shape)
+ norm_img = norm_img[np.newaxis, :]
+ yield (norm_img, label)
+
+ def batch_iter_reader():
+ batch_outs = []
+ for outs in sample_iter_reader():
+ batch_outs.append(outs)
+ if len(batch_outs) == self.batch_size:
+ yield batch_outs
+ batch_outs = []
+ if not self.drop_last:
+ if len(batch_outs) != 0:
+ yield batch_outs
+
+ if self.infer_img is None:
+ return batch_iter_reader
+ return sample_iter_reader
diff --git a/ppocr/data/cls/randaugment.py b/ppocr/data/cls/randaugment.py
new file mode 100644
index 00000000..21345c05
--- /dev/null
+++ b/ppocr/data/cls/randaugment.py
@@ -0,0 +1,135 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from PIL import Image, ImageEnhance, ImageOps
+import numpy as np
+import random
+import six
+
+
+class RawRandAugment(object):
+ def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)):
+ self.num_layers = num_layers
+ self.magnitude = magnitude
+ self.max_level = 10
+
+ abso_level = self.magnitude / self.max_level
+ self.level_map = {
+ "shearX": 0.3 * abso_level,
+ "shearY": 0.3 * abso_level,
+ "translateX": 150.0 / 331 * abso_level,
+ "translateY": 150.0 / 331 * abso_level,
+ "rotate": 30 * abso_level,
+ "color": 0.9 * abso_level,
+ "posterize": int(4.0 * abso_level),
+ "solarize": 256.0 * abso_level,
+ "contrast": 0.9 * abso_level,
+ "sharpness": 0.9 * abso_level,
+ "brightness": 0.9 * abso_level,
+ "autocontrast": 0,
+ "equalize": 0,
+ "invert": 0
+ }
+
+ # from https://stackoverflow.com/questions/5252170/
+ # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
+ def rotate_with_fill(img, magnitude):
+ rot = img.convert("RGBA").rotate(magnitude)
+ return Image.composite(rot,
+ Image.new("RGBA", rot.size, (128, ) * 4),
+ rot).convert(img.mode)
+
+ rnd_ch_op = random.choice
+
+ self.func = {
+ "shearX": lambda img, magnitude: img.transform(
+ img.size,
+ Image.AFFINE,
+ (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0),
+ Image.BICUBIC,
+ fillcolor=fillcolor),
+ "shearY": lambda img, magnitude: img.transform(
+ img.size,
+ Image.AFFINE,
+ (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0),
+ Image.BICUBIC,
+ fillcolor=fillcolor),
+ "translateX": lambda img, magnitude: img.transform(
+ img.size,
+ Image.AFFINE,
+ (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0),
+ fillcolor=fillcolor),
+ "translateY": lambda img, magnitude: img.transform(
+ img.size,
+ Image.AFFINE,
+ (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])),
+ fillcolor=fillcolor),
+ "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
+ "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(
+ 1 + magnitude * rnd_ch_op([-1, 1])),
+ "posterize": lambda img, magnitude:
+ ImageOps.posterize(img, magnitude),
+ "solarize": lambda img, magnitude:
+ ImageOps.solarize(img, magnitude),
+ "contrast": lambda img, magnitude:
+ ImageEnhance.Contrast(img).enhance(
+ 1 + magnitude * rnd_ch_op([-1, 1])),
+ "sharpness": lambda img, magnitude:
+ ImageEnhance.Sharpness(img).enhance(
+ 1 + magnitude * rnd_ch_op([-1, 1])),
+ "brightness": lambda img, magnitude:
+ ImageEnhance.Brightness(img).enhance(
+ 1 + magnitude * rnd_ch_op([-1, 1])),
+ "autocontrast": lambda img, magnitude:
+ ImageOps.autocontrast(img),
+ "equalize": lambda img, magnitude: ImageOps.equalize(img),
+ "invert": lambda img, magnitude: ImageOps.invert(img)
+ }
+
+ def __call__(self, img):
+ avaiable_op_names = list(self.level_map.keys())
+ for layer_num in range(self.num_layers):
+ op_name = np.random.choice(avaiable_op_names)
+ img = self.func[op_name](img, self.level_map[op_name])
+ return img
+
+
+class RandAugment(RawRandAugment):
+ """ RandAugment wrapper to auto fit different img types """
+
+ def __init__(self, *args, **kwargs):
+ if six.PY2:
+ super(RandAugment, self).__init__(*args, **kwargs)
+ else:
+ super().__init__(*args, **kwargs)
+
+ def __call__(self, img):
+ if not isinstance(img, Image.Image):
+ img = np.ascontiguousarray(img)
+ img = Image.fromarray(img)
+
+ if six.PY2:
+ img = super(RandAugment, self).__call__(img)
+ else:
+ img = super().__call__(img)
+
+ if isinstance(img, Image.Image):
+ img = np.asarray(img)
+
+ return img
diff --git a/ppocr/modeling/architectures/cls_model.py b/ppocr/modeling/architectures/cls_model.py
new file mode 100755
index 00000000..ad3ad0e7
--- /dev/null
+++ b/ppocr/modeling/architectures/cls_model.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import fluid
+
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()
+from copy import deepcopy
+
+
+class ClsModel(object):
+ def __init__(self, params):
+ super(ClsModel, self).__init__()
+ global_params = params['Global']
+ self.infer_img = global_params['infer_img']
+
+ backbone_params = deepcopy(params["Backbone"])
+ backbone_params.update(global_params)
+ self.backbone = create_module(backbone_params['function']) \
+ (params=backbone_params)
+
+ head_params = deepcopy(params["Head"])
+ head_params.update(global_params)
+ self.head = create_module(head_params['function']) \
+ (params=head_params)
+
+ loss_params = deepcopy(params["Loss"])
+ loss_params.update(global_params)
+ self.loss = create_module(loss_params['function']) \
+ (params=loss_params)
+
+ self.image_shape = global_params['image_shape']
+
+ def create_feed(self, mode):
+ image_shape = deepcopy(self.image_shape)
+ image_shape.insert(0, -1)
+ if mode == "train":
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+ feed_list = [image, label]
+ labels = {'label': label}
+ loader = fluid.io.DataLoader.from_generator(
+ feed_list=feed_list,
+ capacity=64,
+ use_double_buffer=True,
+ iterable=False)
+ else:
+ labels = None
+ loader = None
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
+ return image, labels, loader
+
+ def __call__(self, mode):
+ image, labels, loader = self.create_feed(mode)
+ inputs = image
+ conv_feas = self.backbone(inputs)
+ predicts = self.head(conv_feas, labels, mode)
+ if mode == "train":
+ loss = self.loss(predicts, labels)
+ label = labels['label']
+ acc = fluid.layers.accuracy(predicts['predict'], label, k=1)
+ outputs = {'total_loss': loss, 'decoded_out': \
+ predicts['decoded_out'], 'label': label, 'acc': acc}
+ return loader, outputs
+ elif mode == "export":
+ return [image, predicts]
+ else:
+ return loader, predicts
diff --git a/ppocr/modeling/backbones/det_mobilenet_v3.py b/ppocr/modeling/backbones/det_mobilenet_v3.py
index 87f5dd72..508f2bbf 100755
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
@@ -79,6 +79,8 @@ class MobileNetV3():
assert self.scale in supported_scale, \
"supported scale are {} but input scale is {}".format(supported_scale, self.scale)
+ self.disable_se = params.get('disable_se', False)
+
def __call__(self, input):
scale = self.scale
inplanes = self.inplanes
@@ -232,7 +234,7 @@ class MobileNetV3():
num_groups=num_mid_filter,
use_cudnn=False,
name=name + '_depthwise')
- if use_se:
+ if use_se and not self.disable_se:
conv1 = self.se_block(
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
diff --git a/ppocr/modeling/heads/cls_head.py b/ppocr/modeling/heads/cls_head.py
new file mode 100644
index 00000000..4567adcb
--- /dev/null
+++ b/ppocr/modeling/heads/cls_head.py
@@ -0,0 +1,46 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+
+
+class ClsHead(object):
+ def __init__(self, params):
+ super(ClsHead, self).__init__()
+ self.class_dim = params['class_dim']
+
+ def __call__(self, inputs, labels=None, mode=None):
+ pool = fluid.layers.pool2d(
+ input=inputs, pool_type='avg', global_pooling=True)
+ stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+
+ out = fluid.layers.fc(
+ input=pool,
+ size=self.class_dim,
+ param_attr=fluid.param_attr.ParamAttr(
+ name="fc_0.w_0",
+ initializer=fluid.initializer.Uniform(-stdv, stdv)),
+ bias_attr=fluid.param_attr.ParamAttr(name="fc_0.b_0"))
+
+ softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+ out_label = fluid.layers.argmax(out, axis=1)
+ predicts = {'predict': softmax_out, 'decoded_out': out_label}
+ return predicts
diff --git a/ppocr/modeling/heads/det_db_head.py b/ppocr/modeling/heads/det_db_head.py
index 56998044..59b3a160 100644
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -123,6 +123,13 @@ class DBHead(object):
return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
def __call__(self, conv_features, mode="train"):
+ """
+ Fuse different levels of feature map from backbone in the FPN manner.
+ Args:
+ conv_features(list): feature maps from backbone
+ mode(str): runtime mode, can be "train", "eval" or "test"
+ Return: predicts
+ """
c2, c3, c4, c5 = conv_features
param_attr = fluid.initializer.MSRAInitializer(uniform=False)
in5 = fluid.layers.conv2d(
diff --git a/ppocr/modeling/losses/cls_loss.py b/ppocr/modeling/losses/cls_loss.py
new file mode 100755
index 00000000..c187dce3
--- /dev/null
+++ b/ppocr/modeling/losses/cls_loss.py
@@ -0,0 +1,33 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+
+
+class ClsLoss(object):
+ def __init__(self, params):
+ super(ClsLoss, self).__init__()
+ self.loss_func = fluid.layers.cross_entropy
+
+ def __call__(self, predicts, labels):
+ predict = predicts['predict']
+ label = labels['label']
+ # softmax_out = fluid.layers.softmax(predict, use_cudnn=False)
+ cost = fluid.layers.cross_entropy(input=predict, label=label)
+ sum_cost = fluid.layers.mean(cost)
+ return sum_cost
diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py
index f115f12e..0792cde0 100644
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -37,6 +37,7 @@ class DBPostProcess(object):
self.max_candidates = params['max_candidates']
self.unclip_ratio = params['unclip_ratio']
self.min_size = 3
+ self.dilation_kernel = np.array([[1, 1], [1, 1]])
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
'''
@@ -140,8 +141,9 @@ class DBPostProcess(object):
boxes_batch = []
for batch_index in range(pred.shape[0]):
height, width = pred.shape[-2:]
- tmp_boxes, tmp_scores = self.boxes_from_bitmap(
- pred[batch_index], segmentation[batch_index], width, height)
+
+ mask = cv2.dilate(np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel)
+ tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height)
boxes = []
for k in range(len(tmp_boxes)):
diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py
index b4b2021e..97237cfa 100755
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@@ -29,7 +29,9 @@ class CharacterOps(object):
if self.character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
- elif self.character_type == "ch":
+ elif self.character_type in [
+ "ch", 'japan', 'korean', 'french', 'german'
+ ]:
character_dict_path = config['character_dict_path']
add_space = False
if 'use_space_char' in config:
@@ -166,7 +168,7 @@ def cal_predicts_accuracy_srn(char_ops,
cur_label = []
cur_pred = []
for j in range(max_text_len):
- if labels[j + i * max_text_len] != int(char_num-1): #0
+ if labels[j + i * max_text_len] != int(char_num - 1): #0
cur_label.append(labels[j + i * max_text_len][0])
else:
break
@@ -178,7 +180,8 @@ def cal_predicts_accuracy_srn(char_ops,
elif j == len(cur_label) and j == max_text_len:
acc_num += 1
break
- elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num-1):
+ elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(
+ char_num - 1):
acc_num += 1
break
acc = acc_num * 1.0 / img_num
diff --git a/ppocr/utils/french_dict.txt b/ppocr/utils/french_dict.txt
new file mode 100644
index 00000000..c7cd8ec5
--- /dev/null
+++ b/ppocr/utils/french_dict.txt
@@ -0,0 +1,118 @@
+!
+"
+%
+&
+'
+(
+)
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+«
+³
+µ
+º
+»
+À
+Á
+Â
+Å
+É
+Ê
+Î
+Ö
+ß
+à
+á
+â
+ä
+å
+æ
+ç
+è
+é
+ê
+ë
+í
+î
+ï
+ñ
+ò
+ó
+ô
+ö
+ø
+ù
+ú
+û
+ü
+
diff --git a/ppocr/utils/german_dict.txt b/ppocr/utils/german_dict.txt
new file mode 100644
index 00000000..30c4d421
--- /dev/null
+++ b/ppocr/utils/german_dict.txt
@@ -0,0 +1,131 @@
+!
+"
+$
+%
+&
+'
+(
+)
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+>
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+£
+§
+
+²
+´
+µ
+·
+º
+¼
+½
+¿
+À
+Á
+Ä
+Å
+Ç
+É
+Í
+Ï
+Ô
+Ö
+Ø
+Ù
+Ü
+ß
+à
+á
+â
+ã
+ä
+å
+æ
+ç
+è
+é
+ê
+ë
+í
+ï
+ñ
+ò
+ó
+ô
+ö
+ø
+ù
+ú
+û
+ü
+
diff --git a/ppocr/utils/ic15_dict.txt b/ppocr/utils/ic15_dict.txt
index 71043689..6fbd99f4 100644
--- a/ppocr/utils/ic15_dict.txt
+++ b/ppocr/utils/ic15_dict.txt
@@ -34,3 +34,30 @@ w
x
y
z
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+
diff --git a/ppocr/utils/japan_dict.txt b/ppocr/utils/japan_dict.txt
new file mode 100644
index 00000000..ddcc89f7
--- /dev/null
+++ b/ppocr/utils/japan_dict.txt
@@ -0,0 +1,4399 @@
+誰
+が
+一
+番
+に
+着
+く
+か
+私
+は
+分
+り
+ま
+せ
+ん
+。
+多
+の
+動
+物
+人
+間
+よ
+っ
+て
+滅
+ぼ
+さ
+れ
+た
+テ
+ニ
+ス
+部
+員
+で
+す
+エ
+ミ
+幸
+そ
+う
+見
+え
+こ
+事
+実
+を
+心
+留
+め
+お
+い
+下
+彼
+女
+ち
+世
+話
+し
+る
+達
+国
+際
+な
+と
+思
+約
+束
+破
+べ
+き
+あ
+道
+路
+横
+切
+車
+注
+意
+生
+甲
+斐
+父
+外
+へ
+行
+承
+知
+弁
+当
+食
+ょ
+小
+説
+1
+つ
+も
+読
+ど
+ら
+、
+母
+親
+少
+似
+卑
+屈
+奴
+曇
+日
+音
+楽
+好
+本
+ず
+仕
+引
+受
+け
+サ
+ッ
+カ
+ー
+以
+前
+今
+気
+混
+ば
+問
+題
+2
+時
+待
+ボ
+ブ
+友
+だ
+ぞ
+午
+後
+家
+来
+子
+供
+ろ
+申
+告
+何
+夕
+済
+み
+雪
+降
+陰
+口
+言
+的
+年
+馬
+鹿
+ね
+大
+変
+忙
+危
+険
+遅
+刻
+度
+学
+校
+入
+電
+々
+酒
+飲
+む
+顔
+奇
+妙
+聞
+自
+慢
+声
+ク
+ラ
+男
+数
+3
+0
+歴
+史
+試
+験
+計
+画
+反
+対
+づ
+先
+渡
+連
+恐
+羽
+振
+ロ
+ン
+ド
+合
+由
+舞
+靴
+向
+手
+紙
+週
+休
+釣
+ひ
+わ
+?
+頼
+ア
+メ
+リ
+婦
+結
+婚
+猫
+木
+登
+ぶ
+ジ
+ョ
+駅
+方
+歩
+怒
+必
+要
+折
+返
+ケ
+キ
+召
+上
+成
+功
+努
+力
+選
+び
+屋
+坂
+東
+京
+育
+月
+曜
+終
+買
+戦
+争
+起
+目
+覚
+病
+院
+元
+無
+セ
+フ
+阪
+や
+格
+祝
+ゆ
+十
+中
+八
+九
+勘
+定
+我
+ほ
+叫
+耳
+通
+書
+帽
+5
+ル
+朝
+君
+兄
+交
+故
+亡
+単
+純
+列
+止
+老
+全
+新
+忠
+尊
+重
+解
+決
+欲
+ざ
+僕
+浮
+件
+裁
+持
+イ
+ギ
+味
+夢
+ぎ
+続
+ぜ
+直
+接
+考
+頭
+別
+住
+辞
+役
+立
+明
+違
+指
+摘
+勇
+答
+適
+冷
+売
+旅
+疲
+辺
+鄙
+村
+訪
+水
+回
+両
+昨
+映
+空
+太
+陽
+輝
+正
+賛
+町
+案
+内
+助
+会
+次
+延
+期
+チ
+ム
+最
+価
+値
+タ
+シ
+歌
+他
+劣
+勉
+強
+ェ
+喜
+伝
+職
+業
+関
+係
+誉
+犬
+近
+座
+転
+使
+妹
+建
+奥
+損
+野
+球
+緒
+繊
+細
+出
+教
+マ
+駄
+石
+油
+不
+足
+震
+ト
+同
+じ
+ガ
+ツ
+発
+表
+然
+失
+敗
+滞
+在
+バ
+コ
+吸
+平
+和
+泳
+寒
+秋
+社
+台
+短
+死
+情
+報
+民
+政
+府
+作
+帰
+宅
+順
+調
+真
+仮
+命
+用
+箱
+階
+運
+長
+ご
+腕
+放
+乗
+利
+程
+腹
+悪
+念
+怖
+形
+抗
+議
+ゲ
+面
+白
+記
+憶
+姉
+都
+劇
+残
+早
+信
+懸
+ャ
+品
+パ
+初
+開
+理
+誤
+急
+ぐ
+簡
+易
+英
+語
+娘
+寝
+赤
+ゃ
+願
+障
+金
+支
+払
+冒
+論
+ぱ
+確
+ヒ
+産
+火
+散
+守
+有
+名
+医
+者
+毎
+渋
+レ
+ビ
+消
+服
+宿
+署
+齢
+ポ
+突
+げ
+良
+徒
+貧
+戻
+牛
+二
+夫
+脱
+暑
+湖
+深
+普
+段
+謝
+優
+甘
+ソ
+非
+常
+場
+警
+察
+呼
+誘
+惑
+坊
+字
+可
+能
+料
+涙
+落
+量
+妻
+愛
+界
+温
+授
+船
+酔
+万
+仲
+付
+捜
+静
+若
+ダ
+招
+追
+夜
+雨
+述
+山
+獲
+訳
+居
+異
+熱
+息
+点
+主
+質
+始
+花
+飾
+幼
+寂
+興
+プ
+触
+活
+オ
+青
+春
+尋
+盲
+燃
+傷
+科
+晩
+騒
+機
+限
+予
+想
+望
+代
+薬
+効
+「
+風
+共
+去
+ぬ
+」
+孤
+独
+店
+径
+ュ
+態
+銀
+認
+至
+驚
+美
+添
+光
+ピ
+ノ
+伴
+奏
+嬉
+7
+除
+席
+吹
+創
+設
+市
+!
+髪
+悔
+秘
+密
+高
+地
+丸
+悲
+極
+暇
+葉
+速
+走
+三
+企
+天
+茶
+働
+演
+8
+泣
+公
+園
+原
+因
+勝
+標
+進
+ヘ
+郵
+便
+局
+農
+遊
+到
+笑
+冬
+ィ
+参
+加
+版
+暗
+絶
+誕
+6
+歳
+鍵
+絵
+栄
+将
+張
+過
+弾
+ふ
+等
+具
+雇
+賞
+得
+絞
+取
+健
+康
+身
+頂
+客
+迎
+所
+夏
+海
+草
+ヨ
+配
+古
+鳴
+庭
+探
+偶
+眠
+昼
+禁
+煙
+提
+閉
+飛
+魚
+捕
+断
+武
+装
+背
+街
+丘
+ホ
+泊
+快
+求
+怠
+惰
+グ
+欠
+片
+視
+特
+難
+締
+半
+壁
+困
+冗
+談
+族
+神
+戸
+遣
+状
+握
+第
+ザ
+師
+倍
+級
+逆
+為
+化
+恩
+諸
+貸
+卒
+諦
+互
+依
+存
+円
+嫌
+紳
+士
+凍
+誇
+例
+迷
+否
+準
+備
+専
+晴
+満
+邪
+腰
+痛
+菜
+商
+離
+疑
+資
+送
+忘
+暖
+素
+敵
+窓
+色
+写
+途
+文
+防
+識
+4
+側
+叱
+裕
+福
+果
+移
+停
+百
+魔
+性
+郎
+感
+耐
+治
+恋
+敬
+様
+診
+橋
+安
+慰
+貴
+眺
+討
+処
+逃
+符
+許
+狂
+審
+軽
+率
+恵
+規
+則
+猶
+借
+歯
+録
+責
+任
+絡
+爆
+撃
+趣
+替
+芝
+捨
+抜
+費
+ペ
+黙
+床
+個
+裏
+暮
+叔
+ベ
+尽
+迫
+材
+田
+舎
+没
+侮
+辱
+施
+歓
+援
+滑
+恥
+飯
+置
+徹
+廃
+机
+退
+拝
+詰
+ぁ
+蝙
+蝠
+従
+王
+札
+幌
+氏
+随
+縁
+整
+頓
+ぽ
+概
+六
+川
+岸
+博
+館
+図
+慣
+儀
+像
+緻
+昇
+土
+伏
+悩
+敷
+包
+囲
+善
+肉
+担
+偽
+呂
+盛
+噂
+希
+序
+焼
+狭
+掌
+苦
+避
+積
+港
+復
+荷
+御
+嘘
+徳
+ゴ
+再
+粧
+卵
+繰
+習
+畑
+匹
+是
+星
+景
+余
+永
+久
+デ
+盗
+ヤ
+巨
+遠
+械
+愚
+洗
+濯
+珍
+溶
+込
+履
+昔
+千
+泥
+棒
+号
+乏
+偉
+継
+ウ
+崩
+勤
+務
+術
+克
+聴
+権
+惚
+弟
+体
+飼
+軒
+犯
+課
+修
+四
+鮮
+汽
+鳥
+現
+旧
+塔
+冊
+五
+塩
+経
+批
+判
+簿
+棚
+才
+研
+究
+ぷ
+類
+覆
+祈
+往
+妨
+柄
+財
+捧
+衛
+距
+闇
+著
+区
+隣
+相
+比
+頃
+就
+矛
+盾
+広
+掛
+典
+型
+制
+憎
+殺
+モ
+嘆
+雄
+鼓
+負
+右
+窮
+法
+掃
+繕
+篠
+流
+章
+看
+杯
+植
+評
+枚
+叶
+抑
+滴
+斎
+森
+額
+蛮
+ナ
+攻
+雅
+米
+雑
+編
+換
+構
+詳
+帳
+厳
+ワ
+預
+室
+更
+銘
+与
+濃
+臭
+布
+衆
+撮
+舌
+容
+貌
+乳
+喫
+固
+巣
+懇
+奈
+群
+集
+皆
+影
+響
+ネ
+悟
+弱
+ハ
+嗅
+飽
+完
+了
+浴
+昆
+虫
+ヌ
+乱
+描
+俺
+首
+嵐
+給
+低
+派
+衝
+団
+投
+函
+礼
+島
+委
+官
+周
+鋭
+宝
+契
+採
+致
+漏
+翻
+洋
+恒
+保
+証
+筆
+潜
+ォ
+枯
+打
+憩
+護
+尾
+埋
+紹
+介
+城
+谷
+沈
+季
+節
+巻
+倒
+巡
+姿
+踏
+黒
+己
+沿
+ぇ
+懐
+扮
+詩
+労
+左
+底
+占
+差
+架
+壊
+ゼ
+欺
+検
+造
+寄
+庫
+眼
+鏡
+慮
+郊
+購
+営
+駐
+血
+模
+ズ
+収
+越
+板
+癌
+飢
+井
+罵
+忍
+増
+賢
+涼
+荒
+踊
+些
+並
+省
+銃
+州
+症
+麻
+雀
+濡
+般
+展
+覧
+紅
+統
+領
+ぴ
+松
+江
+c
+d
+講
+義
+熟
+扶
+養
+属
+9
+鹸
+遇
+寿
+司
+憂
+乾
+唱
+割
+皿
+拭
+貯
+箇
+殴
+鉛
+狙
+蒸
+雲
+椅
+未
+練
+卓
+ぺ
+淑
+壇
+憲
+末
+沙
+汰
+操
+匙
+抱
+候
+鼻
+ヶ
+蔑
+毛
+勢
+償
+浜
+激
+倹
+製
+宛
+ゅ
+翌
+稿
+鞄
+届
+慌
+扱
+式
+組
+瓶
+渉
+句
+技
+陸
+器
+河
+衰
+納
+律
+罰
+譲
+旨
+補
+傘
+贈
+請
+駆
+腐
+ァ
+線
+丁
+骨
+筋
+伺
+丈
+祖
+孫
+犠
+牲
+遭
+肌
+綺
+麗
+魂
+種
+減
+唯
+婆
+推
+薦
+訓
+曲
+睡
+頑
+s
+f
+勧
+印
+刷
+錠
+励
+胆
+糧
+績
+排
+剣
+岳
+涯
+競
+精
+敏
+衣
+赦
+志
+位
+胸
+堅
+販
+査
+税
+壷
+暴
+露
+益
+敢
+撒
+喧
+嘩
+蹴
+沢
+妊
+娠
+芸
+航
+催
+射
+超
+改
+戒
+n
+a
+泉
+奪
+零
+咲
+隠
+遺
+憾
+漱
+肥
+輩
+房
+寺
+奨
+脚
+汚
+煩
+弥
+怪
+免
+氷
+灯
+総
+ユ
+戚
+掘
+維
+釈
+拾
+凝
+漫
+兵
+痔
+馳
+粋
+微
+訴
+浅
+緩
+崖
+覗
+塞
+虚
+北
+湘
+南
+賭
+腎
+臓
+仰
+仙
+筈
+砂
+糖
+干
+唾
+観
+娯
+臆
+門
+宇
+宙
+複
+毒
+奮
+患
+撲
+控
+液
+貫
+禄
+辛
+郷
+稼
+餓
+痙
+攣
+秀
+澄
+遂
+挨
+拶
+慈
+富
+豪
+溺
+県
+緑
+籠
+刑
+根
+脅
+誌
+訂
+揺
+築
+罪
+喋
+陥
+姫
+髭
+剃
+害
+疎
+銭
+墓
+賦
+押
+穴
+淡
+噛
+賃
+導
+域
+肩
+尻
+伯
+牧
+傾
+基
+又
+咳
+邦
+貨
+豊
+挑
+偏
+溜
+傲
+樹
+含
+滝
+魅
+嫉
+妬
+脇
+謎
+磨
+括
+佐
+猛
+烈
+玄
+吉
+執
+応
+及
+拒
+顎
+鬚
+既
+狐
+浣
+腸
+隅
+拡
+吠
+璧
+ヴ
+顧
+睦
+湯
+幾
+輪
+七
+絹
+湿
+疹
+池
+袋
+灰
+摂
+即
+紛
+刈
+況
+染
+矢
+聖
+塗
+伸
+浪
+岩
+餌
+戴
+鎖
+宣
+測
+工
+被
+象
+痩
+搭
+妥
+協
+汗
+救
+跳
+裂
+林
+檎
+棲
+帝
+潮
+侵
+略
+柔
+票
+蝶
+肯
+筒
+呆
+沼
+厚
+宗
+梨
+軍
+蔵
+較
+羨
+粛
+痢
+愉
+儲
+癒
+鬱
+幹
+掴
+鎮
+縫
+炎
+示
+諾
+寛
+虜
+瀬
+鉄
+祭
+醜
+菓
+項
+岡
+胎
+拠
+択
+網
+拳
+党
+繁
+熊
+爪
+慎
+墜
+穏
+募
+縦
+伊
+藤
+胃
+惜
+芽
+誠
+薄
+嫁
+譜
+寮
+薔
+薇
+賜
+1
+2
+l
+i
+y
+潔
+充
+据
+舟
+遮
+寸
+猿
+・
+抵
+暢
+錆
+脈
+挙
+瞬
+萎
+聡
+埠
+琵
+琶
+黄
+策
+宜
+梅
+各
+匂
+清
+撥
+載
+境
+吐
+怯
+唸
+却
+拍
+端
+吻
+惨
+剤
+甥
+核
+緊
+香
+層
+系
+躍
+嬢
+縛
+酸
+t
+〆
+鱗
+堂
+算
+貢
+献
+威
+監
+督
+針
+襲
+銅
+姪
+幽
+霊
+癖
+綾
+扉
+雹
+崎
+条
+療
+封
+癇
+癪
+揮
+碁
+瓜
+泰
+嘲
+錯
+凡
+碗
+豚
+哀
+児
+童
+虐
+蕩
+刺
+波
+貰
+凪
+炭
+嚢
+索
+圧
+均
+帯
+u
+o
+峠
+西
+騙
+肘
+砕
+黍
+革
+棄
+俳
+秩
+如
+宵
+竜
+姓
+噴
+閑
+幅
+虎
+塀
+堪
+鈴
+双
+照
+淋
+葬
+悠
+蝿
+鳩
+獄
+晒
+j
+仏
+某
+享
+尿
+慶
+裸
+丹
+(
+)
+杖
+逮
+徴
+災
+〔
+〕
+酷
+角
+炉
+僚
+揚
+馴
+珠
+霧
+詞
+潟
+陣
+鍋
+拘
+焦
+h
+k
+蜜
+蜂
+穂
+湾
+弄
+跡
+麓
+蔭
+讐
+弊
+董
+〜
+綴
+ゾ
+膳
+称
+痒
+倉
+怨
+掻
+蓄
+茨
+摩
+厄
+陳
+詫
+贔
+屓
+桃
+赴
+墟
+湧
+逢
+隻
+―
+伎
+潰
+鯔
+鑑
+鯨
+炊
+腑
+獣
+勿
+禎
+沖
+縄
+蕾
+股
+娩
+枝
+殆
+氾
+濫
+乞
+恨
+豆
+禿
+釧
+扇
+誓
+躊
+躇
+徐
+貿
+雷
+鋳
+飴
+洞
+窟
+粗
+鎌
+鈍
+刊
+狼
+煎
+幻
+旗
+狩
+耕
+範
+掲
+源
+漢
+枕
+嬌
+莫
+券
+崇
+隔
+袈
+裟
+里
+暫
+虹
+櫛
+硬
+此
+縮
+m
+兆
+轢
+帆
+這
+央
+俗
+瞼
+頻
+需
+餐
+琴
+羊
+令
+薫
+勃
+朽
+虻
+賑
+刀
+籍
+漂
+煽
+斉
+株
+褒
+膝
+,
+C
+D
+叩
+鶏
+N
+A
+S
+糸
+.
+挟
+
+胡
+椒
+玩
+祉
+"
+0
+—
+併
+蛾
+ゥ
+郡
+`
+'
+・
+9
+6
+8
+3
+-
+拿
+爵
+准
+幕
+5
+~
+副
+鞭
+7
+兼
+:
+á
+ň
+宮
+廷
+磁
+4
+ó
+菌
+卿
+皇
+峰
+%
+貝
+軟
+,
+把
+携
+/
+析
+ž
+盤
+斑
+輸
+託
+隊
+蓋
+『
+』
+彩
+&
+詠
+篇
+騎
+_
+晋
+釜
+尚
+欧
+紀
+管
+渓
+韓
+李
+栽
+培
+尉
+骸
+ă
+ş
+剖
+翼
+亜
+羅
+奉
+畔
+拓
+環
+礁
+枢
+斜
+漕
+艇
+稀
+臣
+勲
+棘
+艦
+盟
+粒
+闘
+å
+戯
+∇
+柵
+醸
+礎
+旬
+聘
+矮
+棟
+碑
+殿
+億
+!
+惧
+抽
+迭
+%
+
+垂
+還
+澤
+輔
+粉
+齊
+秦
+砲
+屯
+織
+胞
+諮
+殊
+媒
+嫡
+綱
+搬
+該
+透
+禽
+弦
+瞭
+坦
+浸
+韻
+竪
+墳
+隷
+撤
+哲
+叙
+é
+庶
+紡
+禍
+肺
+婉
+$
+沃
+鬼
+棋
+揃
+楊
+綿
+訟
+遁
+妄
+玉
+軌
+榴
+蘇
+臨
+疇
+披
+顕
+圏
+Ș
+融
+擦
+Č
+č
+埃
+曖
+昧
+旋
+瞳
+謡
+衡
+槍
+茎
+唐
+轄
+郴
+捉
+覇
+嘉
+陵
+嘴
+蔓
+嘱
+閲
+征
+謄
+胚
+陶
+浦
+勅
+芻
+疾
+昏
+;
+耗
+践
+禅
+襟
+曹
+瞑
+ș
+偵
+酬
+駿
+蔡
+諷
+瑁
+í
+è
+:
+ø
+呈
+笠
+岬
+洛
+聾
+唖
+溝
+堀
+雌
+牝
+仔
+尼
+庁
+穫
+妖
+曽
+=
+=
+嗜
+珊
+瑚
+軸
+#
+紋
+劉
+璿
+胤
+墉
+彫
+盆
+饗
+宴
+挿
+蔽
+脳
+暦
+ä
+õ
+廊
+讃
+ë
+促
+峻
+壌
+訛
+鉱
+姦
+唆
+舗
+迂
+ñ
+弘
+昌
+舶
+箔
+冠
+溢
+鶴
+肛
+脊
+柱
+傑
+智
+彦
+朋
+昪
+靖
+姻
+哨
+尺
+冥
+
+剪
+“
+”
+L
+P
+-
+瀕
+ö
+津
+汐
+泌
+皮
+膚
+肢
+只
+鍮
+斧
+壮
+倫
+幣
+儒
+遷
+殻
+惹
+累
+ß
+珪
+弛
+曝
+浙
+華
+柿
+哺
+ü
+&
+W
+Z
+X
+I
+薪
+E
+M
+ę
+雰
+媚
+艶
+蹄
+拐
+ř
+â
+塊
+箋
+漠
+呪
+Ł
+ą
+ł
+挽
+灌
+漑
+煉
+瓦
+G
+μ
+迅
++
+猥
+褻
+頬
+逐
+廠
+ć
+邸
+疼
+伐
+燥
+凌
+駕
+錐
+尖
+û
+呉
+翔
+憤
+慨
+琥
+珀
+漸
+堆
+ā
+亀
+肖
+T
+R
+à
+枠
+桁
+剰
+匿
+秤
+厩
+褐
+Ž
+đ
+Ä
+趙
+š
+餃
+擁
+脆
+脂
+肪
+漿
+×
+晶
+岐
+遍
+謙
+殉
+弓
+Ü
+昭
+Å
+*
+澎
+擬
+債
+秒
+猟
+歪
+阻
+砦
+凸
+諜
+ı
+…
+腫
+晃
+也
+龍
+燕
+閣
+ê
+眉
+牡
+旺
+ç
+ō
+恣
+疆
+坐
+孵
+搾
+傍
+■
+削
+唇
+釉
+凹
+囚
+魏
+腱
+謀
+ţ
+堤
+#
+笛
+靭
+V
+B
+崗
+О
+с
+т
+р
+о
+в
+Г
+а
+л
+я
+膜
+椎
+帥
+剛
+梢
+俊
+蟹
+腿
+牽
+粘
+葦
+ń
+劾
+祥
+紺
+ヵ
+芳
+須
+賀
+填
+殖
+痺
+浚
+渫
+H
+F
+ī
+匯
+Š
+寡
+閃
+É
+疫
+庇
+而
+頁
+侯
+挺
+畳
+浄
+淘
+杭
+K
+縞
+牙
+循
+髄
+Á
+屑
+朴
+p
+隆
+傭
+紫
+峡
+謬
+ã
+膠
+瘍
+瞞
+鋸
+塁
+鋼
+雛
+弧
+ğ
+桂
+½
+唄
+扁
+α
+酵
+’
+;
+肝
+Ö
+孔
+彙
+φ
+梁
+栖
+妃
+蛹
+勾
+欄
+茂
+漁
+晦
+遼
+寧
+吊
+刃
+彰
+之
+濁
+喪
+僧
+萬
+膣
+那
+蛍
+鍛
+麦
+腺
+ô
+Ó
+λ
+尤
+z
+Δ
+ż
+ò
+℃
+肋
+臍
+丼
+´
+踵
+宏
+朱
+燻
+漬
+霜
++
+巧
+鐘
+冶
+膿
+疱
+寓
+蚊
+匠
+檻
+桟
+洪
+后
+ū
+楕
+垣
+孝
+e
+r
+O
+耽
+©
+鴨
+杉
+烏
+啓
+Ç
+痴
+祀
+贅
+荘
+濾
+ú
+瞰
+U
+埼
+窒
+沸
+騰
+閾
+È
+樽
+→
+陪
+Ş
+酢
+ė
+漆
+喰
+汎
+<
+æ
+乙
+²
+倣
+−
+葛
+墨
+腔
+坑
+緋
+稚
+潤
+侶
+喚
+踪
+穀
+膨
+畜
+陛
+巾
+鉢
+彗
+臼
+杵
+Í
+罹
+狡
+猾
+凱
+塑
+頸
+梱
+矯
+竹
+焙
+窄
+剥
+捗
+憧
+袖
+ð
+榮
+ț
+閥
+窩
+沌
+抄
+遡
+>
+鳳
+凰
+痕
+蛇
+矩
+罠
+詐
+ý
+楼
+庵
+ē
+°
+賊
+ồ
+爬
+柑
+橘
+曾
+郭
+措
+栗
+桐
+粥
+C
+O
+E
+卯
+詮
+忌
+
+倭
+禰
+菖
+蒲
+條
+祓
+幡
+A
+B
+L
+G
+T
+M
+S
+u
+(
+)
+a
+.
+W
+i
+V
+b
+c
+f
+e
+N
+K
+R
+U
+D
+g
+P
+醍
+醐
+F
+Z
+I
+H
+Q
+y
+o
+t
+J
+ヂ
+J
+槙
+嵯
+峨
+畿
+塚
+Y
+X
+淀
+伽
+s
+ヅ
+餅
+蒡
+穣
+ゞ
+絲
+p
+鯖
+n
+琳
+柳
+髷
+閤
+稲
+菊
+巌
+迦
+抹
+曳
+叡
+壺
+苑
+羌
+狗
+ヰ
+醤
+ぉ
+硝
+袴
+倶
+汁
+但
+杮
+葺
+煮
+爺
+夙
+桜
+亭
+ゑ
+苗
+m
+曼
+荼
+簪
+☆
+辻
+鑢
+ゝ
+稗
+蹊
+貼
+獅
+廟
+阿
+陀
+蘭
+妓
+翠
+柚
+賓
+芦
+拉
+麺
+帷
+或
+槐
+屎
+j
+惟
+撫
+瑞
+侍
+巴
+廉
+峯
+菩
+薩
+吽
+弖
+彌
+佛
+耨
+閇
+貞
+闍
+閦
+洲
+妾
+仁
+宕
+媛
+隧
+笥
+葵
+茜
+譚
+渥
+旭
+綬
+霰
+楓
+雁
+朗
+渕
+梓
+巫
+姐
+鉾
+囃
+藩
+藺
+鮎
+粟
+袷
+篤
+杏
+遵
+徽
+宍
+瓊
+堵
+猷
+馨
+與
+麿
+冨
+彷
+徨
+湊
+菅
+按
+渠
+龗
+鞍
+采
+琢
+枳
+詣
+祇
+稙
+祐
+毅
+冲
+坡
+阯
+堯
+庄
+掾
+牟
+豫
+尹
+弉
+牌
+鑒
+夷
+俘
+喬
+暁
+允
+亮
+緯
+繋
+偈
+誡
+諡
+瑠
+璃
+弼
+岑
+亥
+郁
+媞
+磯
+佳
+翁
+蹟
+揆
+槻
+嗣
+恭
+熈
+畝
+噌
+燈
+脩
+佩
+閻
+壱
+逸
+眷
+誼
+籌
+芋
+鰯
+璽
+旛
+鑰
+摺
+鉤
+淫
+祠
+凉
+牒
+款
+蟄
+丞
+鋒
+檗
+帖
+菟
+荻
+邨
+厨
+佑
+乃
+鷺
+屏
+柴
+於
+箒
+祷
+蓮
+鵜
+丑
+寅
+碓
+渦
+蔚
+鰻
+姥
+毘
+閏
+涌
+庸
+樂
+祚
+邵
+虞
+邇
+悦
+栃
+怡
+斯
+榎
+厭
+爾
+圓
+應
+吏
+并
+堰
+奄
+掩
+壕
+稔
+焔
+w
+猴
+@
+薗
+諏
+窯
+甚
+麹
+竈
+无
+穢
+窠
+廻
+寇
+鈞
+菴
+鍍
+珉
+慕
+詢
+肇
+羲
+莽
+襖
+鴎
+錦
+紗
+胴
+輿
+玲
+畷
+窪
+徂
+徠
+對
+桶
+螺
+鈿
+麝
+巳
+卸
+寵
+狛
+裳
+剋
+喩
+樋
+噺
+藍
+婢
+梵
+樫
+鷲
+嶽
+憐
+宰
+塾
+蔬
+涅
+槃
+址
+耆
+穎
+糠
+鰭
+俣
+咒
+鼠
+裘
+筯
+繍
+宸
+翰
+魁
+隈
+匡
+熙
+翫
+畠
+瓢
+壽
+卉
+筐
+僑
+蝦
+蹉
+k
+v
+跋
+釐
+堕
+h
+r
+d
+哩
+l
+樓
+霞
+韶
+碩
+皓
+臥
+鷹
+淵
+篭
+收
+桑
+誅
+國
+竄
+煕
+苔
+晏
+韋
+芥
+墾
+闔
+梆
+拵
+舅
+鎧
+蛙
+播
+楯
+廓
+暹
+惠
+瑜
+鑁
+舘
+恂
+衞
+嶋
+駒
+箏
+悼
+橿
+梶
+箸
+烹
+喝
+稽
+餡
+鰹
+樺
+㈱
+兜
+竃
+炒
+盒
+茅
+萱
+嶺
+藉
+苅
+坤
+闥
+懲
+湛
+藁
+衙
+饉
+戈
+桓
+衫
+聚
+潅
+藷
+糟
+妍
+竿
+絃
+罷
+擾
+疏
+鈔
+銕
+亟
+瀧
+勒
+躰
+佶
+錬
+慧
+檀
+聨
+頴
+亘
+尭
+愿
+贋
+證
+撰
+附
+阜
+毫
+漉
+惣
+蘂
+爐
+賎
+祢
+刹
+叉
+饅
+茲
+菱
+筮
+澳
+纂
+楚
+辰
+詔
+遐
+蟻
+吾
+萩
+鞠
+謹
+叢
+伍
+卜
+吃
+桔
+梗
+砧
+敦
+仇
+宥
+飫
+粂
+廿
+鼎
+逕
+嬪
+箭
+恤
+杣
+舖
+汲
+竟
+邃
+糾
+邑
+哇
+〈
+〉
+圀
+盡
+儼
+椋
+籃
+芹
+滋
+蛤
+淳
+駝
+猪
+沂
+稜
+莵
+藏
+經
+筍
+茗
+侠
+凶
+蓆
+紐
+蕎
+魯
+朔
+澗
+藻
+甫
+琮
+鬘
+欣
+欽
+笙
+舜
+闕
+煇
+鈎
+騨
+蒔
+鰐
+埵
+幢
+鑽
+嵌
+楷
+榛
+錍
+鈷
+笈
+鐸
+磬
+碧
+熨
+斗
+翅
+襴
+鑚
+鵄
+吟
+垢
+掟
+卦
+筑
+茄
+葱
+竴
+廼
+玖
+珂
+跏
+蝉
+誄
+串
+沓
+游
+蕃
+蕪
+鍬
+粮
+諭
+盃
+葩
+迪
+圭
+廬
+諶
+德
+祕
+裃
+荊
+洒
+蟷
+螂
+腋
+袍
+髮
+禮
+趺
+堺
+嘗
+甞
+帛
+蝕
+芿
+讀
+褌
+坪
+簒
+鋤
+硯
+翺
+棺
+胝
+篩
+磐
+隋
+諫
+亨
+旦
+孚
+叟
+曉
+盈
+澪
+懿
+爲
+琦
+愔
+圃
+濱
+奘
+諺
+藪
+註
+蜘
+蛛
+鞏
+篷
+閨
+裡
+糊
+賁
+跨
+劫
+壬
+絁
+釘
+譬
+聯
+傳
+芒
+體
+髻
+悉
+荏
+綸
+柏
+珣
+撹
+芬
+裔
+焚
+廂
+饌
+嵩
+簾
+匣
+禊
+籤
+奠
+鯉
+幟
+脛
+巷
+楳
+胖
+庚
+浩
+諒
+溥
+丙
+楠
+冑
+班
+學
+麞
+緬
+肱
+砥
+縢
+耶
+舂
+靈
+砌
+樗
+暉
+蛸
+鞆
+芙
+蓉
+雙
+鴻
+臚
+褄
+濠
+奢
+槌
+紘
+框
+蓑
+甑
+忽
+淆
+艮
+樵
+竭
+羯
+牢
+櫃
+鸞
+拙
+椿
+榊
+肴
+萠
+綜
+鮭
+笹
+苞
+硫
+奸
+徭
+躯
+戟
+襷
+閘
+櫓
+嘯
+臂
+實
+椏
+潴
+藐
+麒
+麟
+烝
+杜
+籐
+槇
+曰
+筰
+懺
+縣
+褥
+輯
+蚕
+斬
+庖
+謌
+璞
+屍
+團
+哉
+畏
+塵
+什
+鳶
+鴉
+濤
+縒
+趾
+櫻
+麩
+曠
+愍
+彊
+驕
+姶
+兎
+鴫
+竺
+僊
+雫
+彭
+灘
+餝
+棗
+蔀
+侑
+弗
+婬
+牘
+訶
+衍
+錫
+惺
+熹
+顛
+呑
+粕
+楞
+咀
+詛
+釋
+瑋
+曄
+筧
+誾
+徧
+虔
+蒐
+酋
+會
+頌
+齋
+誦
+戎
+袿
+繹
+榱
+酥
+碕
+汪
+奔
+曙
+鶯
+囀
+裾
+楮
+歎
+嬬
+婿
+升
+晧
+娼
+祟
+楢
+蓬
+杢
+篁
+柯
+弐
+几
+渤
+憙
+蜀
+芭
+蕉
+恕
+谿
+樟
+訢
+蒋
+鉦
+鍾
+馗
+鞘
+殷
+臈
+檄
+滿
+憑
+埴
+劔
+寶
+鐵
+姨
+耀
+僭
+襄
+疋
+蘆
+靺
+鞨
+悌
+仍
+枡
+鱈
+籬
+芯
+酉
+姜
+陞
+睿
+逗
+頚
+迹
+掬
+巒
+槽
+滸
+魄
+錘
+饋
+椙
+彬
+狄
+躬
+瀋
+奎
+悍
+總
+瑛
+禧
+廣
+塘
+蓼
+兌
+碾
+桝
+瞿
+醒
+苧
+嶂
+韮
+薙
+皺
+莞
+膏
+贄
+咋
+啄
+鎚
+汀
+鏃
+龕
+衷
+諱
+駈
+笄
+酌
+觀
+礙
+杓
+决
+覲
+甕
+栴
+絅
+晟
+銑
+珈
+琲
+膩
+愷
+蕭
+戮
+租
+戔
+嗚
+盞
+鵞
+軾
+昉
+爽
+宋
+匝
+瑳
+逝
+蕨
+欅
+黌
+蒼
+鎗
+惇
+其
+攘
+杲
+斥
+傅
+鞁
+毬
+璋
+賈
+蹲
+踞
+黛
+鯛
+鉋
+姞
+葡
+萄
+訥
+輌
+閬
+鬯
+靜
+瑩
+孁
+洹
+闡
+盧
+猩
+岫
+套
+巖
+篳
+篥
+舩
+覺
+沅
+衒
+凞
+祺
+袱
+托
+蟇
+巽
+藹
+狸
+衾
+ぢ
+蘊
+顗
+鮒
+遥
+邊
+箆
+簀
+雍
+筌
+漣
+筅
+鈦
+夾
+紵
+梧
+賣
+凋
+弔
+霖
+劭
+餉
+ぃ
+篋
+諚
+朕
+茸
+栂
+佃
+柘
+蔦
+鍔
+逍
+綏
+碇
+逓
+鄭
+鏑
+簺
+棹
+卍
+痘
+闢
+籟
+饂
+飩
+澱
+汝
+邉
+儛
+暾
+屠
+祁
+砺
+俵
+蒙
+藝
+熾
+洽
+榜
+莱
+璵
+蕊
+髙
+鄰
+z
+穆
+姚
+忻
+竝
+苡
+諟
+媓
+嫄
+忯
+鐙
+撞
+綽
+璨
+鑼
+苫
+煌
+皋
+當
+捺
+邁
+瞻
+舍
+[
+]
+糜
+輦
+啼
+捻
+襠
+涛
+瀾
+娑
+諧
+毀
+簫
+溪
+煤
+賠
+奕
+蜷
+雉
+咫
+暲
+艘
+拏
+筏
+塙
+蜊
+隼
+纏
+叛
+彈
+枇
+杷
+柊
+畢
+逼
+桧
+鴛
+鴦
+蝋
+燭
+箪
+豹
+鋲
+蛭
+囉
+羂
+羈
+逞
+單
+蛎
+萍
+糞
+站
+騏
+鮫
+昂
+袒
+且
+鎬
+戊
+瓔
+珞
+俸
+檜
+萌
+萊
+俔
+潭
+鵬
+翡
+柾
+亦
+玅
+箕
+咸
+獏
+瞋
+聊
+礬
+孟
+氈
+銚
+葭
+橇
+籾
+澂
+匁
+嬾
+淇
+薮
+愈
+茹
+揖
+僮
+渾
+蜻
+蛉
+羹
+酪
+洸
+嶠
+癡
+畺
+謫
+琉
+瀑
+湫
+賤
+摸
+濟
+淄
+伶
+聲
+莬
+禖
+韜
+彝
+珎
+賄
+賂
+亙
+彎
+椀
+丿
+舒
+仗
+佚
+估
+侏
+侘
+俯
+偃
+偕
+偐
+傀
+儡
+遜
+儺
+兀
+冤
+菫
+刎
+畸
+剽
+窃
+辨
+號
+匏
+厠
+吝
+嗇
+咄
+哭
+唳
+嗔
+嚆
+譯
+乘
+圜
+埒
+壹
+夥
+夬
+夭
+妲
+沆
+娟
+媽
+嫗
+岷
+帚
+幄
+幔
+幇
+淨
+繼
+徑
+忿
+恬
+懽
+戌
+截
+拇
+挂
+掖
+掣
+揉
+揶
+揄
+搦
+攝
+斟
+旁
+旡
+旻
+昵
+暈
+朏
+朧
+杁
+杞
+枅
+矧
+梟
+梔
+梛
+桴
+桾
+椥
+楫
+椹
+楡
+楪
+槿
+檐
+檣
+檸
+檬
+櫟
+殯
+麾
+沐
+沽
+涵
+淤
+滄
+滕
+滌
+澁
+眞
+瀟
+灑
+炮
+烙
+煬
+燔
+犂
+狷
+猊
+祗
+瑾
+瑪
+瑙
+甍
+瘡
+瘧
+盂
+鉉
+睨
+矍
+鑠
+矜
+碌
+碣
+磔
+礒
+礫
+禹
+稠
+稱
+笏
+笞
+筥
+筬
+箜
+篌
+筝
+箙
+篆
+籀
+篝
+簧
+粳
+糯
+糺
+絖
+絽
+綟
+縅
+繦
+緥
+縹
+繧
+繝
+纐
+纈
+纛
+罔
+罧
+羇
+聰
+肄
+膀
+胱
+膵
+膾
+臘
+舳
+范
+鷄
+苻
+苴
+擔
+莪
+蒟
+蒻
+薨
+薛
+茘
+蠣
+蛟
+蜆
+蜃
+雖
+蟠
+蠢
+衵
+衽
+袙
+袰
+裙
+裹
+褂
+裲
+褪
+褶
+襞
+襦
+袢
+襪
+誥
+誣
+諌
+謚
+謗
+譛
+譴
+讒
+豐
+貪
+賽
+贖
+扈
+跪
+踐
+躑
+躅
+躙
+躪
+軋
+軻
+輜
+辟
+檮
+邂
+逅
+邀
+邯
+鄲
+郢
+鄂
+醪
+醵
+釿
+銜
+鋏
+鋺
+錵
+鍼
+灸
+鎰
+鎹
+鐔
+鐃
+鈸
+鐇
+鑷
+鑿
+閔
+閼
+崛
+阮
+陬
+雊
+霍
+靫
+靱
+乎
+顆
+餘
+饒
+騁
+驛
+驢
+髢
+鬢
+鬨
+鮑
+鯱
+鰒
+鰰
+鱧
+鳰
+鴟
+鶉
+鵺
+鷙
+鸚
+鵡
+麁
+黠
+鼈
+齟
+齬
+棠
+遙
+瑤
+銈
+禔
+禛
+鈐
+儇
+匲
+媄
+尪
+巀
+辥
+忉
+掄
+枓
+栻
+梲
+檥
+滹
+沱
+潙
+炷
+猨
+璜
+穜
+竽
+筇
+翛
+薭
+螣
+/
+豅
+辦
+鉇
+鍑
+鑊
+鼉
+磧
+寔
+拈
+轍
+泯
+諍
+?
+錣
+爼
+纒
+鑵
+櫨
+酎
+泡
+俄
+燗
+鞋
+鵲
+茵
+缶
+紬
+絣
+衿
+鴈
+盥
+凛
+燎
+袞
+淹
+瀉
+聟
+嫐
+俤
+薊
+衢
+醗
+斂
+懌
+袁
+渟
+杼
+鱒
+瀞
+鐚
+苛
+陌
+侈
+旌
+筵
+泗
+槊
+稷
+鐐
+頒
+斤
+勺
+嶼
+篦
+埔
+假
+墺
+刪
+于
+鯰
+穗
+渚
+崑
+轟
+皐
+關
+晁
+迢
+崋
+榕
+楨
+菘
+呰
+蒿
+憬
+雋
+珥
+羆
+弌
+墻
+鮪
+陂
+裴
+顯
+鐡
+臺
+煥
+稻
+肆
+遯
+鹽
+暘
+栲
+洩
+抓
+覈
+豎
+禦
+
diff --git a/ppocr/utils/korean_dict.txt b/ppocr/utils/korean_dict.txt
new file mode 100644
index 00000000..0edec5fe
--- /dev/null
+++ b/ppocr/utils/korean_dict.txt
@@ -0,0 +1,3636 @@
+저
+자
+명
+:
+신
+효
+필
+<
+국
+문
+초
+록
+2
+5
+한
+어
+관
+계
+구
+의
+통
+사
+와
+미
+조
+-
+합
+법
+적
+접
+근
+본
+논
+은
+형
+성
+일
+종
+으
+로
+오
+래
+전
+부
+터
+되
+온
+인
+특
+을
+살
+피
+고
+다
+시
+이
+를
+정
+보
+기
+반
+머
+리
+중
+심
+하
+여
+가
+상
+호
+작
+용
+는
+모
+안
+에
+서
+련
+된
+러
+현
+들
+술
+해
+것
+목
+표
+삼
+론
+과
+두
+함
+께
+복
+면
+더
+나
+아
+화
+황
+까
+지
+요
+측
+므
+재
+느
+른
+및
+포
+괄
+할
+수
+있
+잘
+착
+장
+뒤
+식
+절
+차
+위
+범
+주
+그
+유
+6
+3
+동
+격
+설
+징
+찰
+존
+9
+라
+분
+류
+양
+였
+출
+발
+개
+념
+공
+백
+대
+귀
+등
+펴
+략
+연
+도
+울
+핀
+많
+영
+역
+니
+제
+능
+내
+만
+충
+첨
+점
+핵
+'
+않
+높
+체
+낮
+섬
+약
+드
+난
+또
+순
+진
+언
+타
+소
+편
+르
+데
+7
+별
+립
+야
+외
+밀
+맺
+방
+속
+행
+배
+경
+건
+려
+운
+원
+따
+후
+규
+짓
+바
+탕
+우
+선
+달
+활
+질
+채
+택
+임
+단
+히
+벗
+될
+색
+았
+간
+극
+루
+세
+파
+악
+게
+1
+말
+었
+집
+생
+입
+밝
+혀
+졌
+맥
+락
+쪽
+왔
+검
+토
+던
+확
+새
+란
+음
+치
+마
+못
+했
+맞
+춘
+며
+급
+거
+석
+남
+8
+누
+든
+완
+갖
+추
+앞
+쓰
+익
+섭
+홍
+빈
+같
+눈
+{
+0
+런
+낸
+열
+람
+네
+떤
+렵
+때
+닌
+}
+학
+당
+혼
+준
+즉
+불
+없
+취
+비
+강
+변
+결
+렇
+겨
+키
+무
+받
+4
+항
+흔
+처
+직
+뿌
+엄
+축
+휘
+담
+컴
+퓨
+향
+몇
+둔
+박
+병
+참
+잡
+율
+금
+긴
+태
+각
+값
+렬
+예
++
+|
+[
+]
+큰
+갈
+칙
+됨
+산
+매
+크
+증
+막
+뿐
+럼
+청
+층
+롯
+랜
+떻
+독
+력
+응
+감
+틀
+롭
+낼
+최
+희
+돈
+겹
+친
+쉽
+삭
+킨
+놓
+실
+"
+폭
+넓
+료
+허
+메
+교
+*
+ㄴ
+붙
+스
+싸
+환
+찬
+=
+흐
+름
+물
+켰
+뀌
+삽
+#
+첫
+번
+째
+억
+너
+멀
+떨
+져
+밑
+줄
+냥
+움
+볼
+둘
+깊
+탈
+낳
+왜
+벽
+족
+책
+읽
+겠
+찾
+큼
+투
+곳
+판
+끼
+철
+쉬
+칭
+;
+견
+빠
+섯
+린
+습
+흥
+객
+묘
+꼴
+쉼
+쓸
+끝
+올
+령
+풀
+?
+몰
+냐
+년
+권
+씩
+길
+밖
+알
+떠
+옆
+슷
+룬
+윤
+_
+랑
+났
+침
+먹
+찌
+꺼
+곰
+죽
+풍
+탄
+냄
+듯
+엇
+꾼
+회
+트
+날
+빼
+닐
+승
+맏
+딸
+버
+>
+켜
+덕
+총
+꾸
+ㄹ
+혹
+김
+균
+밥
+폐
+쇄
+평
+깝
+쉘
+옛
+\
+품
+ㄸ
+얻
+돌
+셨
+킬
+득
+뜻
+갔
+봉
+넘
+뺏
+민
+워
+렸
+써
+림
+찍
+척
+잃
+답
+앗
+널
+송
+혜
+얼
+천
+셈
+녀
+골
+옮
+겼
+씨
+놀
+좌
+쳐
+좁
+님
+옷
+멋
+업
+월
+디
+늘
+창
+닭
+랐
+봄
+손
+왼
+코
+끌
+잉
+펄
+뛰
+낚
+對
+象
+化
+훈
+퍽
+쌍
+몸
+쯤
+걸
+!
+쓴
+샀
+노
+좋
+컬
+쥐
+쫓
+혔
+잠
+깐
+좀
+깨
+웠
+군
+찔
+렀
+딕
+암
+룰
+맛
+카
+훨
+씬
+꼭
+럽
+촘
+광
+눌
+뒷
+팔
+망
+꺾
+먼
+뀐
+짐
+넣
+짜
+킴
+슴
+슨
+걷
+뉜
+`
+숙
+글
+例
+同
+名
+異
+人
+럿
+퍼
+뜨
+험
+북
+끄
+짝
+칼
+닮
+짧
+쁜
+앉
+춥
+픈
+밉
+프
+둥
+싫
+애
+힌
+깎
+융
+앤
+똑
+깥
+껴
+싼
+잊
+낡
+봐
+욱
+케
+커
+곤
+낌
+헐
+긋
+테
+&
+윈
+닥
+슬
+셋
+맨
+럴
+흡
+홀
+잖
+힘
+닫
+뮤
+션
+칠
+쉐
+량
+획
+혁
+협
+웨
+샹
+즘
+쏟
+쟁
+컨
+띠
+례
+플
+농
+낙
+탐
+육
+뇌
+팽
+궁
+늦
+춰
+탁
+패
+긍
+텔
+레
+젼
+뉴
+高
+빨
+퇴
+맡
+컫
+욕
+곽
+염
+~
+팩
+베
+곧
+職
+뚜
+렷
+닦
+겪
+냉
+헌
+죄
+쳤
+젊
+엘
+냈
+맑
+쿠
+푸
+믿
+뎨
+웬
+멸
+츠
+끊
+윌
+릴
+밟
+브
+삶
+끔
+률
+깃
+듦
+딘
+램
+펀
+웅
+훗
+콜
+촉
+즈
+벨
+꾀
+궤
+펜
+쿨
+뢰
+톤
+륙
+젝
+젠
+딪
+묵
+됐
+곡
+빚
+템
+父
+系
+權
+혈
+첩
+압
+괴
+숭
+뽑
+숨
+벼
+즐
+쾌
+륜
+三
+從
+之
+道
+七
+去
+惡
+잔
+쉴
+낱
+흉
+낀
+얽
+납
+볍
+헤
+촌
+뻗
+%
+뭐
+홉
+떼
+뻔
+쨌
+걱
+쌓
+튼
+썩
+덮
+굴
+엮
+곁
+델
+쯧
+갑
+괜
+찮
+땅
+랫
+얌
+왠
+껏
+녕
+쑥
+섞
+렴
+풋
+뗀
+벌
+얘
+닉
+횟
+클
+컸
+밤
+싶
+겉
+푼
+꼈
+릇
+쩍
+녁
+쩌
+멈
+눕
+겁
+듣
+낭
+얇
+꿈
+틴
+엷
+젓
+귄
+굉
+옳
+몹
+뚫
+떡
+죠
+훌
+륭
+앓
+팬
+티
+액
+묻
+흘
+텃
+밭
+핏
+엔
+쇠
+페
+댔
+톱
+깍
+땠
+땐
+툭
+멍
+붉
+빛
+띤
+쭐
+댄
+숱
+샤
+툰
+줍
+윽
+딱
+솔
+뭔
+뜬
+덥
+덜
+뜩
+줌
+떳
+십
+팼
+쌀
+꼬
+듬
+꼽
+쁘
+꿔
+몫
+쁨
+엽
+셔
+헛
+꽤
+툴
+숲
+덤
+엿
+쏘
+낄
+팠
+色
+톨
+릭
+랄
+섹
+훑
+띄
+돼
+봤
+홧
+끗
+룻
+到
+達
+度
+推
+論
+變
+革
+樸
+根
+低
+作
+爲
+個
+原
+點
+밈
+賢
+明
+둑
+偏
+見
+者
+룩
+文
+質
+心
+身
+富
+利
+華
+美
+僞
+巧
+困
+惑
+飾
+無
+極
+仁
+萬
+物
+짚
+草
+犬
+不
+而
+不
+魏
+晋
+時
+代
+왕
+王
+弼
+開
+券
+常
+差
+別
+相
+一
+般
+窮
+稱
+大
+言
+辭
+當
+體
+實
+德
+上
+日
+證
+市
+씌
+老
+子
+秦
+漢
+源
+流
+生
+沒
+年
+宇
+宙
+著
+假
+託
+集
+積
+빗
+透
+徹
+前
+中
+期
+司
+馬
+遷
+史
+記
+韓
+非
+列
+傳
+學
+問
+經
+書
+諸
+百
+家
+儒
+思
+想
+武
+帝
+董
+仲
+舒
+朝
+國
+敎
+的
+官
+典
+訓
+枯
+風
+始
+皇
+갱
+焚
+坑
+紀
+獻
+先
+濟
+南
+伏
+故
+老
+新
+今
+舊
+古
+尙
+텍
+룹
+뉘
+易
+五
+專
+門
+墨
+守
+數
+融
+鄭
+玄
+章
+建
+初
+白
+虎
+觀
+議
+奏
+通
+義
+誥
+周
+禮
+儀
+禮
+春
+秋
+鞏
+羊
+穀
+梁
+佐
+氏
+論
+語
+班
+固
+筍
+悅
+凞
+衡
+太
+談
+憤
+滿
+公
+自
+序
+宣
+室
+令
+天
+星
+歷
+卜
+祝
+丞
+曆
+揚
+何
+黃
+元
+封
+泰
+山
+禪
+地
+治
+平
+閣
+딜
+河
+洛
+虞
+夏
+死
+西
+方
+關
+잇
+操
+縱
+發
+千
+歲
+海
+內
+紬
+君
+士
+載
+修
+事
+業
+淡
+六
+陰
+陽
+刑
+致
+廬
+歸
+法
+省
+下
+本
+四
+季
+多
+面
+臣
+夫
+婦
+長
+幼
+꿀
+節
+儉
+形
+善
+俗
+主
+旨
+功
+述
+点
+短
+卓
+說
+굳
+然
+久
+合
+虛
+聖
+텅
+因
+行
+端
+寬
+正
+肖
+是
+政
+渾
+冥
+統
+循
+消
+綱
+龍
+陝
+城
+縣
+楊
+祖
+來
+蹟
+郎
+小
+聞
+石
+遺
+抽
+出
+룡
+李
+龍
+禍
+匈
+奴
+宮
+옥
+갇
+廣
+卷
+찢
+腸
+땀
+젖
+끓
+任
+安
+悲
+境
+詩
+簡
+略
+屈
+離
+騷
+左
+丘
+意
+鬱
+結
+惟
+逝
+涇
+壺
+遂
+表
+理
+혐
+世
+再
+興
+徑
+川
+溪
+谷
+禽
+獸
+木
+牝
+牡
+雌
+雄
+樂
+和
+잣
+指
+散
+侯
+奔
+走
+里
+照
+夕
+ㄷ
+웃
+纂
+弑
+孝
+롤
+빙
+轉
+寫
+版
+註
+釋
+戰
+術
+脚
+맹
+唐
+解
+貞
+索
+隱
+張
+北
+宋
+遽
+뻐
+刊
+校
+訂
+耳
+伯
+뼈
+車
+流
+哲
+愚
+俠
+氣
+得
+雲
+尹
+喜
+萊
+用
+宗
+段
+干
+住
+骸
+앙
+膠
+仰
+傅
+淸
+淨
+口
+譯
+聃
+欄
+外
+交
+所
+在
+鄕
+曲
+膽
+函
+後
+邊
+韶
+銘
+曾
+陳
+敍
+倫
+몽
+蒙
+申
+害
+京
+궐
+闕
+沛
+捌
+志
+廟
+녹
+읍
+鹿
+邑
+江
+펼
+擔
+刻
+疑
+梁
+玉
+繩
+讀
+雜
+念
+孫
+왈
+諡
+曰
+字
+選
+楚
+桓
+덧
+幽
+尼
+曼
+귓
+福
+哀
+齒
+敬
+案
+與
+判
+二
+藝
+畢
+沅
+駒
+禦
+寇
+商
+弟
+嚴
+憺
+音
+澹
+蟬
+欌
+遊
+性
+魯
+叔
+랍
+貴
+辯
+舌
+칫
+執
+峻
+烈
+近
+閻
+若
+據
+昭
+續
+葬
+巷
+黨
+食
+곱
+喪
+孔
+十
+有
+葉
+適
+識
+寓
+崔
+東
+壁
+洙
+泗
+考
+信
+錄
+戴
+朱
+핑
+尊
+崇
+堯
+舜
+設
+類
+驕
+浴
+態
+淫
+盛
+我
+引
+存
+眞
+路
+庫
+굽
+欲
+禹
+立
+篇
+神
+仙
+應
+注
+哮
+景
+吳
+誅
+殺
+資
+鑑
+威
+定
+壽
+箱
+養
+쳇
+퀴
+씻
+私
+贍
+足
+移
+各
+博
+句
+韻
+陶
+冶
+탠
+核
+連
+智
+壯
+荀
+呂
+管
+愼
+策
+鬼
+喩
+末
+乾
+괘
+卦
+告
+界
+藩
+屛
+器
+第
+莫
+終
+也
+比
+庇
+役
+可
+線
+造
+츰
+切
+部
+偈
+頌
+벳
+要
+誦
+曜
+끈
+읊
+씀
+劫
+뾰
+틈
+妄
+챙
+뛸
+샘
+늪
+솟
+늙
+쭙
+苦
+솜
+삐
+꽃
+흩
+맙
+붓
+픔
+빌
+겸
+돋
+뽐
+팁
+돕
+흙
+랴
+坐
+뱀
+뿔
+숫
+댐
+읜
+짊
+깔
+듭
+ㄱ
+엉
+붕
+넌
+貪
+瞋
+痔
+脫
+밴
+엎
+큽
+덩
+읠
+姓
+階
+級
+힐
+콩
+묶
+훔
+肉
+넷
+뇨
+갚
+흑
+꽁
+휴
+껌
+씹
+뱉
+랬
+九
+涅
+槃
+入
+廷
+空
+惺
+具
+以
+둠
+求
+菩
+衆
+果
+벅
+짖
+센
+꼼
+똥
+뜸
+믐
+뜯
+털
+낯
+넬
+ㅎ
+늑
+캐
+큐
+렌
+텐
+쿵
+흠
+핌
+탓
+턱
+뚤
+멕
+켈
+졸
+쪼
+ㅂ
+앳
+탬
+즙
+휩
+폴
+뭉
+뚱
+빅
+슈
+셀
+둬
+캉
+튜
+ㅅ
+뭇
+얗
+핍
+썼
+場
+뀔
+숴
+像
+띨
+科
+屬
+種
+괸
+롱
+띈
+횡
+킹
+웰
+닷
+얕
+탱
+팡
+꿨
+펌
+헨
+콰
+링
+벤
+콘
+빔
+둡
+뚝
+헬
+콥
+펠
+쏠
+잦
+탑
+멩
+튀
+뽀
+돔
+꽝
+돗
+빽
+펭
+ㅇ
+짹
+렁
+옴
+껍
+옇
+윙
+햇
+닿
+얀
+흰
+윗
+굶
+둣
+깰
+맴
+뺨
+컷
+탔
+렐
+덟
+팥
+맘
+썰
+샌
+닝
+갯
+쩔
+캬
+춤
+릉
+싱
+캔
+깡
+킷
+뎠
+랭
+릎
+꽉
+첸
+췬
+랩
+옹
+뛴
+쐐
+믹
+찝
+댓
+걀
+쌘
+쉰
+갓
+틱
+폈
+냘
+랗
+늬
+빤
+톰
+맣
+/
+촬
+럭
+깬
+깜
+튕
+틋
+떴
+藻
+類
+잎
+셉
+싹
+캤
+훼
+틔
+놨
+얹
+젯
+캄
+師
+迦
+葉
+쯔
+붐
+僧
+茶
+弓
+醫
+팀
+臨
+曹
+洞
+겐
+昧
+魔
+旋
+씽
+柱
+趙
+州
+껄
+촛
+臥
+딴
+呵
+笑
+護
+位
+ㅌ
+漸
+認
+都
+寺
+딛
+콤
+렉
+副
+聰
+持
+阿
+蜀
+佛
+育
+受
+蘊
+慧
+갠
+잿
+렝
+女
+뗑
+慈
+앎
+휼
+겅
+됩
+닙
+힙
+짠
+덴
+블
+맷
+重
+옵
+멜
+봅
+겔
+ㅈ
+칩
+렘
+뵈
+삯
+몬
+暑
+싣
+찜
+퉁
+겟
+놋
+創
+컹
+렛
+花
+紅
+엡
+巢
+能
+꼐
+롬
+팍
+섰
+봇
+툼
+폼
+슥
+팎
+舟
+돛
+닻
+뗏
+엣
+칸
+知
+延
+批
+評
+理
+賞
+享
+뤄
+味
+浦
+筆
+漫
+쌩
+엠
+쇼
+흄
+뮈
+왓
+審
+分
+過
+間
+렙
+틸
+뭘
+뮐
+얏
+밋
+헉
+밧
+콧
+듸
+뿜
+앵
+쨍
+쭉
+誤
+덱
+愛
+샅
+밍
+눔
+룸
+엥
+폄
+꿰
+룐
+냇
+쑤
+릿
+圖
+盆
+勢
+坊
+民
+局
+承
+喆
+橋
+土
+保
+水
+濯
+멱
+獵
+頭
+踏
+깅
+李
+岸
+强
+占
+排
+뺀
+渠
+껑
+暗
+力
+銀
+鑛
+鐘
+樓
+共
+涌
+則
+精
+秩
+樣
+式
+聲
+畏
+脈
+絡
+찡
+뜰
+픽
+엌
+誠
+母
+胎
+其
+盤
+伴
+侶
+加
+工
+反
+車
+洋
+輪
+廻
+禾
+乘
+動
+땡
+볕
+캠
+귈
+넉
+感
+視
+覺
+댁
+늠
+戶
+棟
+뷰
+費
+얄
+廳
+往
+倍
+格
+斜
+젤
+客
+顚
+倒
+此
+彼
+步
+릅
+낫
+未
+靴
+샐
+핸
+켤
+줘
+톡
+맬
+넨
+巫
+슭
+兀
+瓦
+骨
+斯
+盟
+劃
+麗
+쿄
+뭍
+辰
+成
+族
+塞
+赤
+峰
+녔
+昔
+波
+角
+杯
+製
+꽂
+헝
+겊
+솥
+銅
+鏡
+줏
+鳥
+社
+陵
+處
+텡
+堆
+秘
+悖
+兒
+罕
+짙
+꿩
+쥬
+酒
+俱
+뭄
+홱
+靑
+鷹
+앴
+뽈
+튿
+卍
+騫
+域
+樺
+漁
+쟉
+八
+寶
+雙
+紋
+싯
+쩐
+욤
+丹
+뒬
+槍
+츨
+뱅
+泡
+疹
+哨
+눠
+톈
+샴
+캘
+쏜
+셰
+켯
+毛
+ㅓ
+斷
+層
+푹
+숀
+멧
+鰐
+梨
+늄
+遍
+超
+턴
+옐
+쿼
+랙
+球
+슘
+뷔
+퐁
+윅
+벙
+멘
+産
+줬
+콕
+팅
+잽
+닛
+쌉
+텁
+헙
+乎
+옭
+派
+띌
+꾹
+遠
+챌
+썽
+씁
+훤
+칵
+곬
+딩
+團
+連
+삿
+갸
+잭
+뗄
+쥔
+光
+庭
+漆
+옻
+닯
+寄
+回
+羽
+狀
+複
+燁
+樗
+樹
+땔
+綠
+雖
+危
+最
+好
+啼
+影
+侵
+綠
+衣
+濕
+夢
+賣
+臨
+魚
+月
+軒
+菜
+妊
+雪
+深
+夜
+愁
+귤
+펑
+柑
+橘
+亞
+金
+쌌
+橄
+攬
+欖
+薺
+멎
+腋
+媒
+鹽
+藏
+油
+쐬
+쪄
+桑
+童
+奇
+짇
+뽕
+供
+犧
+섣
+냅
+굵
+찧
+蓮
+詵
+巖
+液
+藥
+盧
+命
+賦
+髮
+香
+囊
+燕
+楓
+歌
+謠
+永
+金
+澤
+霜
+뫼
+勸
+뻑
+굿
+雀
+配
+糖
+松
+障
+幹
+궂
+홈
+꿋
+꺽
+雅
+苕
+云
+矣
+憂
+維
+傷
+如
+웁
+칡
+凌
+女
+紫
+墜
+瘀
+血
+乳
+蔡
+絹
+蠶
+繭
+紙
+蘭
+亭
+竹
+麻
+房
+友
+謝
+箋
+燈
+堂
+薛
+濤
+杜
+甫
+苔
+楮
+蘚
+植
+넋
+錦
+썹
+病
+빳
+阪
+組
+柳
+쬐
+又
+會
+놈
+밸
+홋
+島
+岡
+덫
+폰
+놔
+췄
+찐
+켓
+켄
+텄
+野
+村
+뻘
+쌈
+큘
+쨋
+콱
+座
+쥘
+田
+登
+井
+兵
+鬪
+멤
+黑
+넸
+由
+쳔
+軍
+情
+뿍
+댕
+技
+쩡
+貫
+ㅋ
+탤
+偶
+앰
+뷸
+핫
+郞
+店
+햄
+牛
+찼
+넛
+宅
+便
+急
+渡
+播
+磨
+齋
+藤
+忠
+次
+긁
+林
+晴
+띔
+낵
+吉
+祥
+짭
+짤
+隆
+勝
+茂
+務
+펫
+森
+良
+靖
+팸
+玲
+헹
+굼
+쉭
+륵
+쏙
+磁
+火
+印
+핥
+볐
+뎌
+現
+顯
+딤
+궈
+켠
+恨
+늉
+캇
+롸
+쎄
+헴
+誕
+탯
+夷
+낟
+殷
+슐
+燧
+農
+頊
+괭
+빻
+墟
+湯
+傑
+后
+稷
+戎
+越
+晉
+翟
+셜
+엊
+誌
+利
+賓
+盡
+把
+習
+全
+於
+챠
+뱍
+즌
+셍
+園
+츄
+墳
+엑
+雇
+岳
+퓬
+蕓
+촨
+뻬
+虹
+豫
+蔬
+杭
+蘇
+桂
+林
+秀
+璃
+臺
+潭
+烈
+輸
+特
+區
+鳳
+榮
+池
+魯
+蓮
+溫
+泉
+슝
+膨
+湖
+墾
+丁
+恒
+췌
+進
+옌
+텨
+냔
+ㅊ
+팜
+提
+羅
+弘
+益
+輯
+鄒
+牟
+奄
+넜
+랏
+留
+樂
+뼘
+曉
+잤
+諍
+薩
+柏
+逐
+鹿
+惠
+施
+꿴
+댈
+弱
+隨
+뱃
+汎
+兼
+支
+離
+損
+깻
+뭣
+鵲
+醯
+診
+臟
+뭡
+紂
+己
+抱
+烙
+樓
+쿡
+卿
+竅
+箕
+微
+祭
+康
+桀
+右
+땜
+逆
+滑
+釐
+攻
+煬
+辨
+拇
+枝
+目
+刺
+繡
+律
+律
+姑
+磬
+呂
+曠
+蔘
+輿
+衛
+靈
+堅
+居
+畸
+鳧
+脛
+鶴
+앨
+켐
+品
+少
+六
+孤
+齊
+首
+雷
+懸
+財
+貨
+눴
+챈
+參
+鰍
+臾
+盜
+拓
+麗
+縷
+躬
+穆
+調
+放
+至
+泊
+伐
+慾
+素
+朴
+樽
+珪
+璋
+쁠
+赫
+胥
+腹
+醴
+屋
+閭
+壬
+罰
+逢
+諫
+靈
+劣
+伍
+暈
+戮
+勇
+脣
+竭
+寒
+亡
+鄲
+薄
+圍
+起
+淵
+斗
+斛
+璽
+候
+爵
+恩
+斧
+鉞
+示
+絶
+乃
+止
+珠
+芋
+瑟
+琴
+僥
+匠
+拙
+妙
+容
+央
+栗
+陸
+畜
+轅
+盧
+炎
+曦
+跡
+짱
+좽
+沼
+莊
+彿
+舍
+塔
+婆
+摩
+벵
+若
+密
+蜜
+펙
+群
+剛
+趣
+改
+盂
+蘭
+鎭
+卽
+屍
+눗
+컵
+緣
+謙
+姚
+祇
+坵
+秤
+胡
+忍
+鈍
+梵
+뇩
+먁
+等
+直
+幻
+捨
+男
+願
+陸
+默
+寂
+甘
+露
+抄
+他
+肇
+菴
+뵙
+閔
+累
+皆
+奉
+講
+邪
+$
+芳
+듀
+갬
+맵
+뎀
+値
+稼
+價
+輕
+際
+갭
+網
+靜
+依
+互
+癖
+鈴
+蕉
+俳
+滅
+件
+퀘
+話
+皮
+電
+荷
+活
+降
+台
+佈
+彌
+陀
+疏
+唯
+攝
+燮
+跋
+親
+普
+叉
+難
+堤
+順
+儼
+澄
+苑
+昌
+院
+奈
+孺
+蘆
+絲
+茶
+趨
+伊
+列
+災
+厄
+英
+運
+歐
+參
+岩
+倉
+攘
+幕
+府
+潑
+殖
+猩
+條
+約
+諭
+沖
+峽
+休
+培
+艦
+馨
+防
+督
+弁
+桎
+梏
+征
+峙
+쵸
+兆
+梓
+朋
+隣
+搗
+嘗
+薪
+栗
+遼
+半
+沿
+灣
+立
+圈
+瓜
+恐
+熱
+醉
+綸
+答
+豪
+紳
+岐
+菫
+津
+袁
+凱
+純
+鐵
+洲
+企
+針
+隊
+瀋
+暘
+總
+領
+亥
+緖
+丸
+助
+敗
+猥
+獨
+望
+隷
+厦
+澳
+澎
+制
+祺
+瑞
+萍
+毅
+閥
+打
+破
+졍
+웹
+뙤
+튄
+쾰
+쏭
+뤼
+짰
+뭏
+看
+譜
+갛
+첼
+벡
+똘
+뺄
+잴
+잰
+偉
+勳
+寃
+掌
+布
+接
+亨
+甑
+姜
+淳
+報
+彬
+鼎
+奎
+倫
+訣
+吐
+蕃
+帽
+殿
+遡
+橡
+還
+領
+綽
+顔
+譚
+稽
+瑪
+壇
+彛
+꿇
+숯
+ㅆ
+녘
+來
+裕
+唱
+媚
+繪
+畵
+崖
+羅
+服
+料
+圓
+煌
+冠
+ㅣ
+船
+傾
+耕
+伎
+샬
+妖
+閃
+쩨
+몄
+맸
+晶
+ㅜ
+矢
+쓱
+髓
+뺑
+鷄
+揭
+巨
+龜
+햐
+딧
+拜
+겡
+眼
+緯
+契
+鮮
+卑
+落
+蒿
+准
+黎
+댑
+깟
+빕
+툇
+춧
+뼉
+킵
+깼
+숟
+뭅
+낏
+섶
+뱁
+돝
+杖
+왱
+삵
+갉
+烏
+飛
+梨
+뒹
+쇳
+홰
+짢
+擧
+兩
+뺐
+펐
+쩜
+홑
+윳
+允
+좇
+쇤
+룽
+챘
+흣
+裔
+엾
+뒀
+갗
+묽
+넙
+꼿
+뻤
+꿍
+컥
+뎅
+겋
+뢸
+쏴
+쭈
+쾅
+혓
+겻
+쫀
+뗐
+蝕
+臆
+荇
+∼
+쾡
+얍
+곶
+닳
+꿎
+켕
+캥
+탉
+곯
+짬
+뻣
+믈
+빡
+겄
+갤
+횃
+卒
+륨
+껐
+캡
+肥
+빴
+훅
+材
+翁
+뗍
+枰
+慣
+틉
+켭
+탭
+끽
+웜
+넝
+賊
+均
+米
+稀
+炭
+빵
+찹
+胚
+芽
+멥
+볶
+”
+곪
+酸
+沙
+麥
+궜
+貧
+怡
+찻
+肝
+豆
+壓
+疫
+午
+郡
+拾
+療
+滯
+痛
+菊
+症
+崩
+蔓
+葛
+粉
+救
+荒
+떫
+灰
+茵
+癌
+毒
+基
+脂
+授
+機
+滋
+補
+腎
+汗
+疼
+暈
+飮
+랒
+桔
+梗
+肺
+咽
+喉
+痺
+拘
+杞
+’
+菌
+燐
+板
+埴
+壤
+甲
+椒
+썬
+徐
+帶
+咳
+粘
+軟
+裂
+片
+援
+洛
+卵
+抗
+腫
+瘍
+粥
+伸
+將
+趾
+孟
+茹
+瀝
+튤
+苞
+蒲
+쫙
+番
+蠻
+倭
+擘
+煎
+苛
+劑
+符
+檀
+禁
+忌
+蒜
+必
+須
+量
+薑
+咸
+早
+隋
+챗
+棗
+떰
+枾
+飢
+餓
+滄
+옅
+檎
+捿
+秉
+垢
+溶
+整
+焦
+脾
+擒
+栢
+鋼
+潤
+稗
+耐
+晩
+燥
+游
+燔
+珍
+蝶
+裙
+刀
+借
+料
+煮
+胞
+那
+쫄
+佃
+濁
+輻
+貝
+쥴
+丑
+灸
+脯
+脩
+熟
+輓
+鴨
+逵
+凉
+胃
+瘡
+蟲
+髥
+쫑
+蒸
+糞
+屎
+볏
+덖
+豚
+猪
+쌔
+蜂
+餘
+豊
+寅
+獵
+牌
+使
+停
+碍
+狗
+塚
+吠
+飯
+숍
+錢
+雨
+追
+慕
+碑
+폿
+뵐
+쪘
+핼
+깁
+밌
+쩝
+떱
+넥
+짼
+씸
+겆
+휙
+깽
+뜀
+숩
+끙
+젭
+됴
+팝
+앱
+딨
+걔
+꺄
+눅
+쒔
+戀
+吏
+녜
+旱
+뺌
+샜
+꽥
+뻥
+걘
+떵
+뀄
+왁
+菽
+댜
+訊
+戟
+置
+睡
+삘
+샛
+낍
+才
+낑
+퀸
+꼍
+쟤
+待
+寸
+뎃
+浮
+沈
+쑨
+塵
+奮
+惡
+쨀
+떽
+쟈
+貸
+씰
+쒀
+좍
+휭
+뱄
+얜
+썸
+텀
+껀
+곗
+휠
+숄
+괌
+퉜
+꿉
+벚
+샷
+뷴
+웸
+킥
+슛
+챔
+뤘
+셸
+팻
+텝
+퀵
+콸
+뮬
+튈
+윔
+젬
+뮌
+욜
+갰
+휑
+퀭
+퉈
+헷
+탰
+랠
+븐
+퓰
+픕
+끕
+삔
+띵
+뀝
+헥
+휜
+룃
+셌
+흽
+챕
+땝
+톳
+쟀
+띕
+졀
+쨉
+뱐
+윱
+햅
+띱
+꾜
+궝
+늅
+붇
+곕
+횝
+푭
+샙
+벱
+닢
+뀜
+솝
+뜁
+쿤
+듐
+펩
+旗
+手
+患
+凡
+膜
+失
+型
+優
+尿
+襄
+限
+婚
+股
+臼
+細
+織
+卵
+尿
+늡
+^
+헀
+á
+ň
+ó
+ž
+“
+ç
+ü
+í
+é
+ã
+튠
+ä
+ć
+ă
+ş
+땄
+넹
+ö
+Š
+ě
+ñ
+퀀
+å
+ř
+ý
+캅
+∇
+è
+퀼
+쳄
+헵
+ê
+ō
+ø
+뢴
+î
+쩄
+롹
+옙
+Č
+č
+샨
+Ș
+쾨
+듈
+벰
+ș
+팰
+셴
+쳉
+â
+욘
+ë
+퓸
+É
+먀
+쪾
+
+Ö
+팟
+禅
+퀄
+ß
+ę
+Ł
+ź
+ą
+ł
+Α
+û
+ā
+à
+튬
+Ž
+đ
+浅
+克
+Ä
+š
+넴
+×
+뉩
+쐈
+Ü
+Å
+ì
+왑
+힉
+휄
+ı
+ţ
+웡
+İ
+О
+с
+т
+р
+о
+в
+Г
+а
+л
+я
+샵
+ė
+ń
+Á
+딥
+ī
+ğ
+힝
+½
+Ç
+φ
+ż
+ô
+Ó
+λ
+웍
+Δ
+ò
+ū
+캣
+嶋
+淑
+α
+ニ
+カ
+ラ
+グ
+ア
+ン
+©
+챤
+ï
+ú
+Ş
+→
+죤
+æ
+펨
+²
+õ
+뇽
+쎈
+°
+펍
+Í
+콴
+ð
+첵
+Î
+넵
+ē
+쿰
+「
+」
+
diff --git a/setup.py b/setup.py
index 7141f170..2cea853d 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''},
include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
- version='0.0.3',
+ version='1.0.0',
install_requires=requirements,
license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
diff --git a/tools/eval.py b/tools/eval.py
index 22185911..aff5fc71 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -45,10 +45,12 @@ from ppocr.utils.save_load import init_model
from eval_utils.eval_det_utils import eval_det_run
from eval_utils.eval_rec_utils import test_rec_benchmark
from eval_utils.eval_rec_utils import eval_rec_run
+from eval_utils.eval_cls_utils import eval_cls_run
def main():
- startup_prog, eval_program, place, config, train_alg_type = program.preprocess()
+ startup_prog, eval_program, place, config, train_alg_type = program.preprocess(
+ )
eval_build_outputs = program.build(
config, eval_program, startup_prog, mode='test')
eval_fetch_name_list = eval_build_outputs[1]
@@ -67,6 +69,14 @@ def main():
'fetch_varname_list':eval_fetch_varname_list}
metrics = eval_det_run(exe, config, eval_info_dict, "eval")
logger.info("Eval result: {}".format(metrics))
+ elif train_alg_type == 'cls':
+ eval_reader = reader_main(config=config, mode="eval")
+ eval_info_dict = {'program': eval_program, \
+ 'reader': eval_reader, \
+ 'fetch_name_list': eval_fetch_name_list, \
+ 'fetch_varname_list': eval_fetch_varname_list}
+ metrics = eval_cls_run(exe, eval_info_dict)
+ logger.info("Eval result: {}".format(metrics))
else:
reader_type = config['Global']['reader_yml']
if "benchmark" not in reader_type:
diff --git a/tools/eval_utils/eval_cls_utils.py b/tools/eval_utils/eval_cls_utils.py
new file mode 100644
index 00000000..9c9b2667
--- /dev/null
+++ b/tools/eval_utils/eval_cls_utils.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+__all__ = ['eval_cls_run']
+
+import logging
+
+FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+
+
+def eval_cls_run(exe, eval_info_dict):
+ """
+ Run evaluation program, return program outputs.
+ """
+ total_sample_num = 0
+ total_acc_num = 0
+ total_batch_num = 0
+
+ for data in eval_info_dict['reader']():
+ img_num = len(data)
+ img_list = []
+ label_list = []
+ for ino in range(img_num):
+ img_list.append(data[ino][0])
+ label_list.append(data[ino][1])
+
+ img_list = np.concatenate(img_list, axis=0)
+ outs = exe.run(eval_info_dict['program'], \
+ feed={'image': img_list}, \
+ fetch_list=eval_info_dict['fetch_varname_list'], \
+ return_numpy=False)
+ softmax_outs = np.array(outs[1])
+ if len(softmax_outs.shape) != 1:
+ softmax_outs = np.array(outs[0])
+ acc, acc_num = cal_cls_acc(softmax_outs, label_list)
+ total_acc_num += acc_num
+ total_sample_num += len(label_list)
+ # logger.info("eval batch id: {}, acc: {}".format(total_batch_num, acc))
+ total_batch_num += 1
+ avg_acc = total_acc_num * 1.0 / total_sample_num
+ metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \
+ "total_sample_num": total_sample_num}
+ return metrics
+
+
+def cal_cls_acc(preds, labels):
+ acc_num = 0
+ for pred, label in zip(preds, labels):
+ if pred == label:
+ acc_num += 1
+ return acc_num / len(preds), acc_num
diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py
new file mode 100755
index 00000000..3c14011a
--- /dev/null
+++ b/tools/infer/predict_cls.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import tools.infer.utility as utility
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
+import cv2
+import copy
+import numpy as np
+import math
+import time
+from paddle import fluid
+
+
+class TextClassifier(object):
+ def __init__(self, args):
+ self.predictor, self.input_tensor, self.output_tensors = \
+ utility.create_predictor(args, mode="cls")
+ self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
+ self.cls_batch_num = args.rec_batch_num
+ self.label_list = args.label_list
+ self.use_zero_copy_run = args.use_zero_copy_run
+ self.cls_thresh = args.cls_thresh
+
+ def resize_norm_img(self, img):
+ imgC, imgH, imgW = self.cls_image_shape
+ h = img.shape[0]
+ w = img.shape[1]
+ ratio = w / float(h)
+ if math.ceil(imgH * ratio) > imgW:
+ resized_w = imgW
+ else:
+ resized_w = int(math.ceil(imgH * ratio))
+ resized_image = cv2.resize(img, (resized_w, imgH))
+ resized_image = resized_image.astype('float32')
+ if self.cls_image_shape[0] == 1:
+ resized_image = resized_image / 255
+ resized_image = resized_image[np.newaxis, :]
+ else:
+ resized_image = resized_image.transpose((2, 0, 1)) / 255
+ resized_image -= 0.5
+ resized_image /= 0.5
+ padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+ padding_im[:, :, 0:resized_w] = resized_image
+ return padding_im
+
+ def __call__(self, img_list):
+ img_list = copy.deepcopy(img_list)
+ img_num = len(img_list)
+ # Calculate the aspect ratio of all text bars
+ width_list = []
+ for img in img_list:
+ width_list.append(img.shape[1] / float(img.shape[0]))
+ # Sorting can speed up the cls process
+ indices = np.argsort(np.array(width_list))
+
+ cls_res = [['', 0.0]] * img_num
+ batch_num = self.cls_batch_num
+ predict_time = 0
+ for beg_img_no in range(0, img_num, batch_num):
+ end_img_no = min(img_num, beg_img_no + batch_num)
+ norm_img_batch = []
+ max_wh_ratio = 0
+ for ino in range(beg_img_no, end_img_no):
+ h, w = img_list[indices[ino]].shape[0:2]
+ wh_ratio = w * 1.0 / h
+ max_wh_ratio = max(max_wh_ratio, wh_ratio)
+ for ino in range(beg_img_no, end_img_no):
+ norm_img = self.resize_norm_img(img_list[indices[ino]])
+ norm_img = norm_img[np.newaxis, :]
+ norm_img_batch.append(norm_img)
+ norm_img_batch = np.concatenate(norm_img_batch)
+ norm_img_batch = norm_img_batch.copy()
+ starttime = time.time()
+
+ if self.use_zero_copy_run:
+ self.input_tensor.copy_from_cpu(norm_img_batch)
+ self.predictor.zero_copy_run()
+ else:
+ norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
+ self.predictor.run([norm_img_batch])
+
+ prob_out = self.output_tensors[0].copy_to_cpu()
+ label_out = self.output_tensors[1].copy_to_cpu()
+ if len(label_out.shape) != 1:
+ prob_out, label_out = label_out, prob_out
+
+ elapse = time.time() - starttime
+ predict_time += elapse
+ for rno in range(len(label_out)):
+ label_idx = label_out[rno]
+ score = prob_out[rno][label_idx]
+ label = self.label_list[label_idx]
+ cls_res[indices[beg_img_no + rno]] = [label, score]
+ if '180' in label and score > self.cls_thresh:
+ img_list[indices[beg_img_no + rno]] = cv2.rotate(
+ img_list[indices[beg_img_no + rno]], 1)
+ return img_list, cls_res, predict_time
+
+
+def main(args):
+ image_file_list = get_image_file_list(args.image_dir)
+ text_classifier = TextClassifier(args)
+ valid_image_file_list = []
+ img_list = []
+ for image_file in image_file_list[:10]:
+ img, flag = check_and_read_gif(image_file)
+ if not flag:
+ img = cv2.imread(image_file)
+ if img is None:
+ logger.info("error in loading image:{}".format(image_file))
+ continue
+ valid_image_file_list.append(image_file)
+ img_list.append(img)
+ try:
+ img_list, cls_res, predict_time = text_classifier(img_list)
+ except Exception as e:
+ print(e)
+ exit()
+ for ino in range(len(img_list)):
+ print("Predicts of %s:%s" % (valid_image_file_list[ino], cls_res[ino]))
+ print("Total predict time for %d images:%.3f" %
+ (len(img_list), predict_time))
+
+
+if __name__ == "__main__":
+ main(utility.parse_args())
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index ff5d53e9..29c4d7e8 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -13,16 +13,19 @@
# limitations under the License.
import os
import sys
+
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
import tools.infer.utility as utility
from ppocr.utils.utility import initial_logger
+
logger = initial_logger()
import cv2
import tools.infer.predict_det as predict_det
import tools.infer.predict_rec as predict_rec
+import tools.infer.predict_cls as predict_cls
import copy
import numpy as np
import math
@@ -37,6 +40,9 @@ class TextSystem(object):
def __init__(self, args):
self.text_detector = predict_det.TextDetector(args)
self.text_recognizer = predict_rec.TextRecognizer(args)
+ self.use_angle_cls = args.use_angle_cls
+ if self.use_angle_cls:
+ self.text_classifier = predict_cls.TextClassifier(args)
def get_rotate_crop_image(self, img, points):
'''
@@ -91,6 +97,11 @@ class TextSystem(object):
tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
img_crop_list.append(img_crop)
+ if self.use_angle_cls:
+ img_crop_list, angle_list, elapse = self.text_classifier(
+ img_crop_list)
+ print("cls num : {}, elapse : {}".format(
+ len(img_crop_list), elapse))
rec_res, elapse = self.text_recognizer(img_crop_list)
print("rec_res num : {}, elapse : {}".format(len(rec_res), elapse))
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
@@ -110,8 +121,8 @@ def sorted_boxes(dt_boxes):
_boxes = list(sorted_boxes)
for i in range(num_boxes - 1):
- if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \
- (_boxes[i + 1][0][0] < _boxes[i][0][0]):
+ if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
+ (_boxes[i + 1][0][0] < _boxes[i][0][0]):
tmp = _boxes[i]
_boxes[i] = _boxes[i + 1]
_boxes[i + 1] = tmp
@@ -122,6 +133,7 @@ def main(args):
image_file_list = get_image_file_list(args.image_dir)
text_sys = TextSystem(args)
is_visualize = True
+ font_path = args.vis_font_path
for image_file in image_file_list:
img, flag = check_and_read_gif(image_file)
if not flag:
@@ -149,7 +161,7 @@ def main(args):
scores = [rec_res[i][1] for i in range(len(rec_res))]
draw_img = draw_ocr(
- image, boxes, txts, scores, drop_score=drop_score)
+ image, boxes, txts, scores, drop_score=drop_score, font_path=font_path)
draw_img_save = "./inference_results/"
if not os.path.exists(draw_img_save):
os.makedirs(draw_img_save)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 3e1f07b8..45d7b737 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -15,6 +15,7 @@
import argparse
import os, sys
from ppocr.utils.utility import initial_logger
+
logger = initial_logger()
from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig
@@ -31,53 +32,68 @@ def parse_args():
return v.lower() in ("true", "t", "1")
parser = argparse.ArgumentParser()
- #params for prediction engine
+ # params for prediction engine
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--gpu_mem", type=int, default=8000)
- #params for text detector
+ # params for text detector
parser.add_argument("--image_dir", type=str)
parser.add_argument("--det_algorithm", type=str, default='DB')
parser.add_argument("--det_model_dir", type=str)
parser.add_argument("--det_max_side_len", type=float, default=960)
- #DB parmas
+ # DB parmas
parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
- parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)
+ parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
- #EAST parmas
+ # EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
- #SAST parmas
+ # SAST parmas
parser.add_argument("--det_sast_score_thresh", type=float, default=0.5)
parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2)
parser.add_argument("--det_sast_polygon", type=bool, default=False)
- #params for text recognizer
+ # params for text recognizer
parser.add_argument("--rec_algorithm", type=str, default='CRNN')
parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
- parser.add_argument("--rec_batch_num", type=int, default=30)
+ parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument(
"--rec_char_dict_path",
type=str,
default="./ppocr/utils/ppocr_keys_v1.txt")
- parser.add_argument("--use_space_char", type=bool, default=True)
- parser.add_argument("--enable_mkldnn", type=bool, default=False)
- parser.add_argument("--use_zero_copy_run", type=bool, default=False)
+ parser.add_argument("--use_space_char", type=str2bool, default=True)
+ parser.add_argument(
+ "--vis_font_path",
+ type=str,
+ default="./doc/simfang.ttf")
+
+ # params for text classifier
+ parser.add_argument("--use_angle_cls", type=str2bool, default=False)
+ parser.add_argument("--cls_model_dir", type=str)
+ parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
+ parser.add_argument("--label_list", type=list, default=['0', '180'])
+ parser.add_argument("--cls_batch_num", type=int, default=30)
+ parser.add_argument("--cls_thresh", type=float, default=0.9)
+
+ parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
+ parser.add_argument("--use_zero_copy_run", type=str2bool, default=False)
return parser.parse_args()
def create_predictor(args, mode):
if mode == "det":
model_dir = args.det_model_dir
+ elif mode == 'cls':
+ model_dir = args.cls_model_dir
else:
model_dir = args.rec_model_dir
@@ -105,7 +121,7 @@ def create_predictor(args, mode):
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
- #config.enable_memory_optim()
+ # config.enable_memory_optim()
config.disable_glog_info()
if args.use_zero_copy_run:
@@ -187,7 +203,7 @@ def draw_ocr(image,
return image
-def draw_ocr_box_txt(image, boxes, txts):
+def draw_ocr_box_txt(image, boxes, txts, font_path="./doc/simfang.ttf"):
h, w = image.height, image.width
img_left = image.copy()
img_right = Image.new('RGB', (w, h), (255, 255, 255))
@@ -214,7 +230,7 @@ def draw_ocr_box_txt(image, boxes, txts):
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(
- "./doc/simfang.ttf", font_size, encoding="utf-8")
+ font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
@@ -224,7 +240,7 @@ def draw_ocr_box_txt(image, boxes, txts):
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(
- "./doc/simfang.ttf", font_size, encoding="utf-8")
+ font_path, font_size, encoding="utf-8")
draw_right.text(
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
img_left = Image.blend(image, img_left, 0.5)
diff --git a/tools/infer_cls.py b/tools/infer_cls.py
new file mode 100755
index 00000000..aebdc076
--- /dev/null
+++ b/tools/infer_cls.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import os
+import sys
+
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+
+
+def set_paddle_flags(**kwargs):
+ for key, value in kwargs.items():
+ if os.environ.get(key, None) is None:
+ os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect.
+set_paddle_flags(
+ FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
+)
+
+import tools.program as program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import get_image_file_list
+
+
+def main():
+ config = program.load_config(FLAGS.config)
+ program.merge_config(FLAGS.opt)
+ logger.info(config)
+
+ # check if set use_gpu=True in paddlepaddle cpu version
+ use_gpu = config['Global']['use_gpu']
+ # check_gpu(use_gpu)
+
+ place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+
+ rec_model = create_module(config['Architecture']['function'])(params=config)
+ startup_prog = fluid.Program()
+ eval_prog = fluid.Program()
+ with fluid.program_guard(eval_prog, startup_prog):
+ with fluid.unique_name.guard():
+ _, outputs = rec_model(mode="test")
+ fetch_name_list = list(outputs.keys())
+ fetch_varname_list = [outputs[v].name for v in fetch_name_list]
+ eval_prog = eval_prog.clone(for_test=True)
+ exe.run(startup_prog)
+
+ init_model(config, eval_prog, exe)
+
+ blobs = reader_main(config, 'test')()
+ infer_img = config['Global']['infer_img']
+ infer_list = get_image_file_list(infer_img)
+ max_img_num = len(infer_list)
+ if len(infer_list) == 0:
+ logger.info("Can not find img in infer_img dir.")
+ for i in range(max_img_num):
+ logger.info("infer_img:%s" % infer_list[i])
+ img = next(blobs)
+ predict = exe.run(program=eval_prog,
+ feed={"image": img},
+ fetch_list=fetch_varname_list,
+ return_numpy=False)
+ scores = np.array(predict[0])
+ label = np.array(predict[1])
+ if len(label.shape) != 1:
+ label, scores = scores, label
+ logger.info('\t scores: {}'.format(scores))
+ logger.info('\t label: {}'.format(label))
+ # save for inference model
+ target_var = []
+ for key, values in outputs.items():
+ target_var.append(values)
+
+ fluid.io.save_inference_model(
+ "./output",
+ feeded_var_names=['image'],
+ target_vars=target_var,
+ executor=exe,
+ main_program=eval_prog,
+ model_filename="model",
+ params_filename="params")
+
+
+if __name__ == '__main__':
+ parser = program.ArgsParser()
+ FLAGS = parser.parse_args()
+ main()
diff --git a/tools/program.py b/tools/program.py
index be133ac2..2ef203f4 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -30,6 +30,7 @@ import time
from ppocr.utils.stats import TrainingStats
from eval_utils.eval_det_utils import eval_det_run
from eval_utils.eval_rec_utils import eval_rec_run
+from eval_utils.eval_cls_utils import eval_cls_run
from ppocr.utils.save_load import save_model
import numpy as np
from ppocr.utils.character import cal_predicts_accuracy, cal_predicts_accuracy_srn, CharacterOps
@@ -203,6 +204,15 @@ def build(config, main_prog, startup_prog, mode):
def build_export(config, main_prog, startup_prog):
"""
+ Build input and output for exporting a checkpoints model to an inference model
+ Args:
+ config(dict): config
+ main_prog(): main program
+ startup_prog(): startup program
+ Returns:
+ feeded_var_names(list[str]): var names of input for exported inference model
+ target_vars(list[Variable]): output vars for exported inference model
+ fetches_var_name: dict of checkpoints model outputs(included loss and measures)
"""
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
@@ -240,7 +250,14 @@ def create_multi_devices_program(program, loss_var_name, for_quant=False):
return compile_program
-def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
+def train_eval_det_run(config,
+ exe,
+ train_info_dict,
+ eval_info_dict,
+ is_pruning=False):
+ '''
+ main program of evaluation for detection
+ '''
train_batch_id = 0
log_smooth_window = config['Global']['log_smooth_window']
epoch_num = config['Global']['epoch_num']
@@ -296,7 +313,14 @@ def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
best_batch_id = train_batch_id
best_epoch = epoch
save_path = save_model_dir + "/best_accuracy"
- save_model(train_info_dict['train_program'], save_path)
+ if is_pruning:
+ import paddleslim as slim
+ slim.prune.save_model(
+ exe, train_info_dict['train_program'],
+ save_path)
+ else:
+ save_model(train_info_dict['train_program'],
+ save_path)
strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
train_batch_id, metrics, best_eval_hmean, best_epoch,
best_batch_id)
@@ -307,14 +331,27 @@ def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
train_loader.reset()
if epoch == 0 and save_epoch_step == 1:
save_path = save_model_dir + "/iter_epoch_0"
- save_model(train_info_dict['train_program'], save_path)
+ if is_pruning:
+ import paddleslim as slim
+ slim.prune.save_model(exe, train_info_dict['train_program'],
+ save_path)
+ else:
+ save_model(train_info_dict['train_program'], save_path)
if epoch > 0 and epoch % save_epoch_step == 0:
save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
- save_model(train_info_dict['train_program'], save_path)
+ if is_pruning:
+ import paddleslim as slim
+ slim.prune.save_model(exe, train_info_dict['train_program'],
+ save_path)
+ else:
+ save_model(train_info_dict['train_program'], save_path)
return
def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
+ '''
+ main program of evaluation for recognition
+ '''
train_batch_id = 0
log_smooth_window = config['Global']['log_smooth_window']
epoch_num = config['Global']['epoch_num']
@@ -409,7 +446,89 @@ def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
return
+def train_eval_cls_run(config, exe, train_info_dict, eval_info_dict):
+ train_batch_id = 0
+ log_smooth_window = config['Global']['log_smooth_window']
+ epoch_num = config['Global']['epoch_num']
+ print_batch_step = config['Global']['print_batch_step']
+ eval_batch_step = config['Global']['eval_batch_step']
+ start_eval_step = 0
+ if type(eval_batch_step) == list and len(eval_batch_step) >= 2:
+ start_eval_step = eval_batch_step[0]
+ eval_batch_step = eval_batch_step[1]
+ logger.info(
+ "During the training process, after the {}th iteration, an evaluation is run every {} iterations".
+ format(start_eval_step, eval_batch_step))
+ save_epoch_step = config['Global']['save_epoch_step']
+ save_model_dir = config['Global']['save_model_dir']
+ if not os.path.exists(save_model_dir):
+ os.makedirs(save_model_dir)
+ train_stats = TrainingStats(log_smooth_window, ['loss', 'acc'])
+ best_eval_acc = -1
+ best_batch_id = 0
+ best_epoch = 0
+ train_loader = train_info_dict['reader']
+ for epoch in range(epoch_num):
+ train_loader.start()
+ try:
+ while True:
+ t1 = time.time()
+ train_outs = exe.run(
+ program=train_info_dict['compile_program'],
+ fetch_list=train_info_dict['fetch_varname_list'],
+ return_numpy=False)
+ fetch_map = dict(
+ zip(train_info_dict['fetch_name_list'],
+ range(len(train_outs))))
+
+ loss = np.mean(np.array(train_outs[fetch_map['total_loss']]))
+ lr = np.mean(np.array(train_outs[fetch_map['lr']]))
+ acc = np.mean(np.array(train_outs[fetch_map['acc']]))
+
+ t2 = time.time()
+ train_batch_elapse = t2 - t1
+ stats = {'loss': loss, 'acc': acc}
+ train_stats.update(stats)
+ if train_batch_id > start_eval_step and (train_batch_id - start_eval_step) \
+ % print_batch_step == 0:
+ logs = train_stats.log()
+ strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
+ epoch, train_batch_id, lr, logs, train_batch_elapse)
+ logger.info(strs)
+
+ if train_batch_id > 0 and\
+ train_batch_id % eval_batch_step == 0:
+ model_average = train_info_dict['model_average']
+ if model_average != None:
+ model_average.apply(exe)
+ metrics = eval_cls_run(exe, eval_info_dict)
+ eval_acc = metrics['avg_acc']
+ eval_sample_num = metrics['total_sample_num']
+ if eval_acc > best_eval_acc:
+ best_eval_acc = eval_acc
+ best_batch_id = train_batch_id
+ best_epoch = epoch
+ save_path = save_model_dir + "/best_accuracy"
+ save_model(train_info_dict['train_program'], save_path)
+ strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, eval_sample_num:{}'.format(
+ train_batch_id, eval_acc, best_eval_acc, best_epoch,
+ best_batch_id, eval_sample_num)
+ logger.info(strs)
+ train_batch_id += 1
+
+ except fluid.core.EOFException:
+ train_loader.reset()
+ if epoch == 0 and save_epoch_step == 1:
+ save_path = save_model_dir + "/iter_epoch_0"
+ save_model(train_info_dict['train_program'], save_path)
+ if epoch > 0 and epoch % save_epoch_step == 0:
+ save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
+ save_model(train_info_dict['train_program'], save_path)
+ return
+
+
def preprocess():
+ # load config from yml file
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
@@ -419,9 +538,10 @@ def preprocess():
use_gpu = config['Global']['use_gpu']
check_gpu(use_gpu)
+ # check whether the set algorithm belongs to the supported algorithm list
alg = config['Global']['algorithm']
assert alg in [
- 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN'
+ 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'CLS'
]
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']:
config['Global']['char_ops'] = CharacterOps(config['Global'])
@@ -432,7 +552,9 @@ def preprocess():
if alg in ['EAST', 'DB', 'SAST']:
train_alg_type = 'det'
- else:
+ elif alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']:
train_alg_type = 'rec'
+ else:
+ train_alg_type = 'cls'
return startup_program, train_program, place, config, train_alg_type
diff --git a/tools/train.py b/tools/train.py
index 300705e0..cf0171b3 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -46,6 +46,7 @@ from paddle.fluid.contrib.model_stat import summary
def main():
+ # build train program
train_build_outputs = program.build(
config, train_program, startup_program, mode='train')
train_loader = train_build_outputs[0]
@@ -54,6 +55,7 @@ def main():
train_opt_loss_name = train_build_outputs[3]
model_average = train_build_outputs[-1]
+ # build eval program
eval_program = fluid.Program()
eval_build_outputs = program.build(
config, eval_program, startup_program, mode='eval')
@@ -61,9 +63,11 @@ def main():
eval_fetch_varname_list = eval_build_outputs[2]
eval_program = eval_program.clone(for_test=True)
+ # initialize train reader
train_reader = reader_main(config=config, mode="train")
train_loader.set_sample_list_generator(train_reader, places=place)
+ # initialize eval reader
eval_reader = reader_main(config=config, mode="eval")
exe = fluid.Executor(place)
@@ -75,7 +79,8 @@ def main():
# dump mode structure
if config['Global']['debug']:
- if train_alg_type == 'rec' and 'attention' in config['Global']['loss_type']:
+ if train_alg_type == 'rec' and 'attention' in config['Global'][
+ 'loss_type']:
logger.warning('Does not suport dump attention...')
else:
summary(train_program)
@@ -96,8 +101,10 @@ def main():
if train_alg_type == 'det':
program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
- else:
+ elif train_alg_type == 'rec':
program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
+ else:
+ program.train_eval_cls_run(config, exe, train_info_dict, eval_info_dict)
def test_reader():
@@ -119,6 +126,7 @@ def test_reader():
if __name__ == '__main__':
- startup_program, train_program, place, config, train_alg_type = program.preprocess()
+ startup_program, train_program, place, config, train_alg_type = program.preprocess(
+ )
main()
# test_reader()
diff --git a/train_data/gen_label.py b/train_data/gen_label.py
new file mode 100644
index 00000000..552f279f
--- /dev/null
+++ b/train_data/gen_label.py
@@ -0,0 +1,74 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
+import argparse
+
+
+def gen_rec_label(input_path, out_label):
+ with open(out_label, 'w') as out_file:
+ with open(input_path, 'r') as f:
+ for line in f.readlines():
+ tmp = line.strip('\n').replace(" ", "").split(',')
+ img_path, label = tmp[0], tmp[1]
+ label = label.replace("\"", "")
+ out_file.write(img_path + '\t' + label + '\n')
+
+
+def gen_det_label(root_path, input_dir, out_label):
+ with open(out_label, 'w') as out_file:
+ for label_file in os.listdir(input_dir):
+ img_path = root_path + label_file[3:-4] + ".jpg"
+ label = []
+ with open(os.path.join(input_dir, label_file), 'r') as f:
+ for line in f.readlines():
+ tmp = line.strip("\n\r").replace("\xef\xbb\xbf", "").split(',')
+ points = tmp[:-2]
+ s = []
+ for i in range(0, len(points), 2):
+ b = points[i:i + 2]
+ s.append(b)
+ result = {"transcription": tmp[-1], "points": s}
+ label.append(result)
+ out_file.write(img_path + '\t' + str(label) + '\n')
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--mode',
+ type=str,
+ default="rec",
+ help='Generate rec_label or det_label, can be set rec or det')
+ parser.add_argument(
+ '--root_path',
+ type=str,
+ default=".",
+ help='The root directory of images.Only takes effect when mode=det ')
+ parser.add_argument(
+ '--input_path',
+ type=str,
+ default=".",
+ help='Input_label or input path to be converted')
+ parser.add_argument(
+ '--output_label',
+ type=str,
+ default="out_label.txt",
+ help='Output file name')
+
+ args = parser.parse_args()
+ if args.mode == "rec":
+ print("Generate rec label")
+ gen_rec_label(args.input_path, args.output_label)
+ elif args.mode == "det":
+ gen_det_label(args.root_path, args.input_path, args.output_label)