Add polygon score param to config (#2646)
* Add polygon score param to config
This commit is contained in:
parent
099957c907
commit
aab10b4faf
|
@ -8,7 +8,6 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 静态图版本:develop分支
|
||||
|
||||
**近期更新**
|
||||
- 【预告】 PaddleOCR研发团队对最新发版内容技术深入解读,4月13日晚上19:00,[直播地址](https://live.bilibili.com/21689802)
|
||||
- 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
|
||||
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
|
|
|
@ -49,6 +49,8 @@ public:
|
|||
|
||||
this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]);
|
||||
|
||||
this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"]));
|
||||
|
||||
this->det_model_dir.assign(config_map_["det_model_dir"]);
|
||||
|
||||
this->rec_model_dir.assign(config_map_["rec_model_dir"]);
|
||||
|
@ -86,6 +88,8 @@ public:
|
|||
|
||||
double det_db_unclip_ratio = 2.0;
|
||||
|
||||
bool use_polygon_score = false;
|
||||
|
||||
std::string det_model_dir;
|
||||
|
||||
std::string rec_model_dir;
|
||||
|
|
|
@ -44,7 +44,8 @@ public:
|
|||
const bool &use_mkldnn, const int &max_side_len,
|
||||
const double &det_db_thresh,
|
||||
const double &det_db_box_thresh,
|
||||
const double &det_db_unclip_ratio, const bool &visualize,
|
||||
const double &det_db_unclip_ratio,
|
||||
const bool &use_polygon_score, const bool &visualize,
|
||||
const bool &use_tensorrt, const bool &use_fp16) {
|
||||
this->use_gpu_ = use_gpu;
|
||||
this->gpu_id_ = gpu_id;
|
||||
|
@ -57,6 +58,7 @@ public:
|
|||
this->det_db_thresh_ = det_db_thresh;
|
||||
this->det_db_box_thresh_ = det_db_box_thresh;
|
||||
this->det_db_unclip_ratio_ = det_db_unclip_ratio;
|
||||
this->use_polygon_score_ = use_polygon_score;
|
||||
|
||||
this->visualize_ = visualize;
|
||||
this->use_tensorrt_ = use_tensorrt;
|
||||
|
@ -85,6 +87,7 @@ private:
|
|||
double det_db_thresh_ = 0.3;
|
||||
double det_db_box_thresh_ = 0.5;
|
||||
double det_db_unclip_ratio_ = 2.0;
|
||||
bool use_polygon_score_ = false;
|
||||
|
||||
bool visualize_ = true;
|
||||
bool use_tensorrt_ = false;
|
||||
|
|
|
@ -55,7 +55,8 @@ public:
|
|||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
|
||||
const float &box_thresh, const float &det_db_unclip_ratio);
|
||||
const float &box_thresh, const float &det_db_unclip_ratio,
|
||||
const bool &use_polygon_score);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
|
||||
|
|
|
@ -183,7 +183,7 @@ cmake .. \
|
|||
make -j
|
||||
```
|
||||
|
||||
`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中;为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。
|
||||
`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。
|
||||
|
||||
|
||||
* 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。
|
||||
|
@ -211,6 +211,7 @@ max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使
|
|||
det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显
|
||||
det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小
|
||||
det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本
|
||||
use_polygon_score 1 # 是否使用多边形框计算bbox score,0表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。
|
||||
det_model_dir ./inference/det_db # 检测模型inference model地址
|
||||
|
||||
# cls config
|
||||
|
|
|
@ -217,6 +217,7 @@ max_side_len 960 # Limit the maximum image height and width to 960
|
|||
det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result
|
||||
det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
|
||||
det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
|
||||
use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.
|
||||
det_model_dir ./inference/det_db # Address of detection inference model
|
||||
|
||||
# cls config
|
||||
|
|
|
@ -59,7 +59,8 @@ int main(int argc, char **argv) {
|
|||
config.gpu_mem, config.cpu_math_library_num_threads,
|
||||
config.use_mkldnn, config.max_side_len, config.det_db_thresh,
|
||||
config.det_db_box_thresh, config.det_db_unclip_ratio,
|
||||
config.visualize, config.use_tensorrt, config.use_fp16);
|
||||
config.use_polygon_score, config.visualize,
|
||||
config.use_tensorrt, config.use_fp16);
|
||||
|
||||
Classifier *cls = nullptr;
|
||||
if (config.use_angle_cls == true) {
|
||||
|
|
|
@ -109,9 +109,9 @@ void DBDetector::Run(cv::Mat &img,
|
|||
cv::Mat dilation_map;
|
||||
cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
|
||||
cv::dilate(bit_map, dilation_map, dila_ele);
|
||||
boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map,
|
||||
this->det_db_box_thresh_,
|
||||
this->det_db_unclip_ratio_);
|
||||
boxes = post_processor_.BoxesFromBitmap(
|
||||
pred_map, dilation_map, this->det_db_box_thresh_,
|
||||
this->det_db_unclip_ratio_, this->use_polygon_score_);
|
||||
|
||||
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
|
||||
|
||||
|
|
|
@ -160,26 +160,34 @@ std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
|
|||
}
|
||||
|
||||
float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
|
||||
cv::Mat pred){
|
||||
cv::Mat pred) {
|
||||
int width = pred.cols;
|
||||
int height = pred.rows;
|
||||
std::vector<float> box_x;
|
||||
std::vector<float> box_y;
|
||||
for(int i=0; i<contour.size(); ++i){
|
||||
for (int i = 0; i < contour.size(); ++i) {
|
||||
box_x.push_back(contour[i].x);
|
||||
box_y.push_back(contour[i].y);
|
||||
}
|
||||
|
||||
int xmin = clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, width - 1);
|
||||
int xmax = clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, width - 1);
|
||||
int ymin = clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, height - 1);
|
||||
int ymax = clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, height - 1);
|
||||
int xmin =
|
||||
clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
|
||||
width - 1);
|
||||
int xmax =
|
||||
clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
|
||||
width - 1);
|
||||
int ymin =
|
||||
clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
|
||||
height - 1);
|
||||
int ymax =
|
||||
clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
|
||||
height - 1);
|
||||
|
||||
cv::Mat mask;
|
||||
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
cv::Point rook_point[contour.size()];
|
||||
for(int i=0; i<contour.size(); ++i){
|
||||
for (int i = 0; i < contour.size(); ++i) {
|
||||
rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
|
||||
}
|
||||
const cv::Point *ppt[1] = {rook_point};
|
||||
|
@ -187,7 +195,8 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
|
|||
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
|
||||
|
||||
cv::Mat croppedImg;
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg);
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
|
||||
.copyTo(croppedImg);
|
||||
float score = cv::mean(croppedImg, mask)[0];
|
||||
return score;
|
||||
}
|
||||
|
@ -230,10 +239,9 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
|
|||
return score;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
|
||||
const float &box_thresh,
|
||||
const float &det_db_unclip_ratio) {
|
||||
std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
|
||||
const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
|
||||
const float &det_db_unclip_ratio, const bool &use_polygon_score) {
|
||||
const int min_size = 3;
|
||||
const int max_candidates = 1000;
|
||||
|
||||
|
@ -267,9 +275,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
|
|||
}
|
||||
|
||||
float score;
|
||||
score = BoxScoreFast(array, pred);
|
||||
/* compute using polygon*/
|
||||
// score = PolygonScoreAcc(contours[_i], pred);
|
||||
if (use_polygon_score)
|
||||
/* compute using polygon*/
|
||||
score = PolygonScoreAcc(contours[_i], pred);
|
||||
else
|
||||
score = BoxScoreFast(array, pred);
|
||||
|
||||
if (score < box_thresh)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ max_side_len 960
|
|||
det_db_thresh 0.3
|
||||
det_db_box_thresh 0.5
|
||||
det_db_unclip_ratio 1.6
|
||||
use_polygon_score 1
|
||||
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
|
||||
|
||||
# cls config
|
||||
|
|
Loading…
Reference in New Issue