Add polygon score param to config (#2646)

* Add polygon score param to config
2021-04-27 10:32:33 +08:00 · 2021-04-27 10:32:33 +08:00 · aab10b4faf
parent 099957c907
commit aab10b4faf
10 changed files with 45 additions and 23 deletions
--- a/README_ch.md
+++ b/README_ch.md
@ -8,7 +8,6 @@ PaddleOCR同时支持动态图与静态图两种编程范式
 - 静态图版本：develop分支

 **近期更新**
- 【预告】 PaddleOCR研发团队对最新发版内容技术深入解读，4月13日晚上19:00，[直播地址](https://live.bilibili.com/21689802)
 - 2021.4.8 release 2.1版本，新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源，[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
 - 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题，总数162个，每周一都会更新，欢迎大家持续关注。
 - 2021.1.21 更新多语言识别模型，目前支持语种超过27种，包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等，后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
--- a/deploy/cpp_infer/include/config.h
+++ b/deploy/cpp_infer/include/config.h
@ -49,6 +49,8 @@ public:

    this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]);

+    this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"]));
+
    this->det_model_dir.assign(config_map_["det_model_dir"]);

    this->rec_model_dir.assign(config_map_["rec_model_dir"]);
@ -86,6 +88,8 @@ public:

  double det_db_unclip_ratio = 2.0;

+  bool use_polygon_score = false;
+
  std::string det_model_dir;

  std::string rec_model_dir;
--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@ -44,7 +44,8 @@ public:
                      const bool &use_mkldnn, const int &max_side_len,
                      const double &det_db_thresh,
                      const double &det_db_box_thresh,
-                      const double &det_db_unclip_ratio, const bool &visualize,
+                      const double &det_db_unclip_ratio,
+                      const bool &use_polygon_score, const bool &visualize,
                      const bool &use_tensorrt, const bool &use_fp16) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
@ -57,6 +58,7 @@ public:
    this->det_db_thresh_ = det_db_thresh;
    this->det_db_box_thresh_ = det_db_box_thresh;
    this->det_db_unclip_ratio_ = det_db_unclip_ratio;
+    this->use_polygon_score_ = use_polygon_score;

    this->visualize_ = visualize;
    this->use_tensorrt_ = use_tensorrt;
@ -85,6 +87,7 @@ private:
  double det_db_thresh_ = 0.3;
  double det_db_box_thresh_ = 0.5;
  double det_db_unclip_ratio_ = 2.0;
+  bool use_polygon_score_ = false;

  bool visualize_ = true;
  bool use_tensorrt_ = false;
--- a/deploy/cpp_infer/include/postprocess_op.h
+++ b/deploy/cpp_infer/include/postprocess_op.h
@ -55,7 +55,8 @@ public:

  std::vector<std::vector<std::vector<int>>>
  BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
-                  const float &box_thresh, const float &det_db_unclip_ratio);
+                  const float &box_thresh, const float &det_db_unclip_ratio,
+                  const bool &use_polygon_score);

  std::vector<std::vector<std::vector<int>>>
  FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@ -183,7 +183,7 @@ cmake .. \
 make -j
 ```

-`OPENCV_DIR`为opencv编译安装的地址；`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹)；`CUDA_LIB_DIR`为cuda库文件地址，在docker中；为`/usr/local/cuda/lib64`；`CUDNN_LIB_DIR`为cudnn库文件地址，在docker中为`/usr/lib/x86_64-linux-gnu/`。
+`OPENCV_DIR`为opencv编译安装的地址；`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹)；`CUDA_LIB_DIR`为cuda库文件地址，在docker中为`/usr/local/cuda/lib64`；`CUDNN_LIB_DIR`为cudnn库文件地址，在docker中为`/usr/lib/x86_64-linux-gnu/`。


 * 编译完成之后，会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。
@ -211,6 +211,7 @@ max_side_len  960 # 输入图像长宽大于960时，等比例缩放图像，使
 det_db_thresh  0.3 # 用于过滤DB预测的二值化图像，设置为0.-0.3对结果影响不明显
 det_db_box_thresh  0.5 # DB后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小
 det_db_unclip_ratio  1.6 # 表示文本框的紧致程度，越小则文本框更靠近文本
+use_polygon_score 1 # 是否使用多边形框计算bbox score，0表示使用矩形框计算。矩形框计算速度更快，多边形框对弯曲文本区域计算更准确。
 det_model_dir  ./inference/det_db # 检测模型inference model地址

 # cls config
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@ -217,6 +217,7 @@ max_side_len  960 #  Limit the maximum image height and width to 960
 det_db_thresh  0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result
 det_db_box_thresh  0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
 det_db_unclip_ratio  1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
+use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.
 det_model_dir  ./inference/det_db # Address of detection inference model

 # cls config
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@ -59,7 +59,8 @@ int main(int argc, char **argv) {
                 config.gpu_mem, config.cpu_math_library_num_threads,
                 config.use_mkldnn, config.max_side_len, config.det_db_thresh,
                 config.det_db_box_thresh, config.det_db_unclip_ratio,
-                 config.visualize, config.use_tensorrt, config.use_fp16);
+                 config.use_polygon_score, config.visualize,
+                 config.use_tensorrt, config.use_fp16);

  Classifier *cls = nullptr;
  if (config.use_angle_cls == true) {
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@ -109,9 +109,9 @@ void DBDetector::Run(cv::Mat &img,
  cv::Mat dilation_map;
  cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
  cv::dilate(bit_map, dilation_map, dila_ele);
-  boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map,
-                                          this->det_db_box_thresh_,
-                                          this->det_db_unclip_ratio_);
+  boxes = post_processor_.BoxesFromBitmap(
+      pred_map, dilation_map, this->det_db_box_thresh_,
+      this->det_db_unclip_ratio_, this->use_polygon_score_);

  boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);

--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@ -160,26 +160,34 @@ std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
 }

 float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
-		                  cv::Mat pred){
+                                     cv::Mat pred) {
  int width = pred.cols;
  int height = pred.rows;
  std::vector<float> box_x;
  std::vector<float> box_y;
-  for(int i=0; i<contour.size(); ++i){
+  for (int i = 0; i < contour.size(); ++i) {
    box_x.push_back(contour[i].x);
    box_y.push_back(contour[i].y);
  }

-  int xmin = clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, width - 1);
-  int xmax = clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, width - 1);
-  int ymin = clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, height - 1);
-  int ymax = clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, height - 1);
+  int xmin =
+      clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int xmax =
+      clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
+            width - 1);
+  int ymin =
+      clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);
+  int ymax =
+      clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
+            height - 1);

  cv::Mat mask;
  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);

  cv::Point rook_point[contour.size()];
-  for(int i=0; i<contour.size(); ++i){
+  for (int i = 0; i < contour.size(); ++i) {
    rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
  }
  const cv::Point *ppt[1] = {rook_point};
@ -187,7 +195,8 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
  cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));

  cv::Mat croppedImg;
-  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg);
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
  float score = cv::mean(croppedImg, mask)[0];
  return score;
 }
@ -230,10 +239,9 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
  return score;
 }

-std::vector<std::vector<std::vector<int>>>
-PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
-                               const float &box_thresh,
-                               const float &det_db_unclip_ratio) {
+std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
+    const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
+    const float &det_db_unclip_ratio, const bool &use_polygon_score) {
  const int min_size = 3;
  const int max_candidates = 1000;

@ -267,9 +275,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
    }

    float score;
-    score = BoxScoreFast(array, pred);
-    /* compute using polygon*/ 
-    // score = PolygonScoreAcc(contours[_i], pred);
+    if (use_polygon_score)
+      /* compute using polygon*/
+      score = PolygonScoreAcc(contours[_i], pred);
+    else
+      score = BoxScoreFast(array, pred);
+
    if (score < box_thresh)
      continue;

--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@ -10,6 +10,7 @@ max_side_len  960
 det_db_thresh  0.3
 det_db_box_thresh  0.5
 det_db_unclip_ratio  1.6
+use_polygon_score 1
 det_model_dir  ./inference/ch_ppocr_mobile_v2.0_det_infer/

 # cls config