merge upstream
This commit is contained in:
commit
f896d5afa4
|
@ -44,6 +44,9 @@ public:
|
|||
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
}
|
||||
|
||||
static void GetAllFiles(const char *dir_name,
|
||||
std::vector<std::string> &all_inputs);
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
|
@ -27,9 +27,12 @@
|
|||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <include/config.h>
|
||||
#include <include/ocr_det.h>
|
||||
#include <include/ocr_rec.h>
|
||||
#include <include/utility.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
@ -47,13 +50,8 @@ int main(int argc, char **argv) {
|
|||
config.PrintConfigInfo();
|
||||
|
||||
std::string img_path(argv[2]);
|
||||
|
||||
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
|
||||
|
||||
if (!srcimg.data) {
|
||||
std::cerr << "[ERROR] image read failed! image path: " << img_path << "\n";
|
||||
exit(1);
|
||||
}
|
||||
std::vector<std::string> all_img_names;
|
||||
Utility::GetAllFiles((char *)img_path.c_str(), all_img_names);
|
||||
|
||||
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
|
||||
config.gpu_mem, config.cpu_math_library_num_threads,
|
||||
|
@ -76,18 +74,30 @@ int main(int argc, char **argv) {
|
|||
config.use_tensorrt, config.use_fp16);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
det.Run(srcimg, boxes);
|
||||
|
||||
rec.Run(boxes, srcimg, cls);
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||
std::cout << "Cost "
|
||||
<< double(duration.count()) *
|
||||
std::chrono::microseconds::period::num /
|
||||
std::chrono::microseconds::period::den
|
||||
<< "s" << std::endl;
|
||||
for (auto img_dir : all_img_names) {
|
||||
LOG(INFO) << "The predict img: " << img_dir;
|
||||
|
||||
cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR);
|
||||
if (!srcimg.data) {
|
||||
std::cerr << "[ERROR] image read failed! image path: " << img_path
|
||||
<< "\n";
|
||||
exit(1);
|
||||
}
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
|
||||
det.Run(srcimg, boxes);
|
||||
|
||||
rec.Run(boxes, srcimg, cls);
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||
std::cout << "Cost "
|
||||
<< double(duration.count()) *
|
||||
std::chrono::microseconds::period::num /
|
||||
std::chrono::microseconds::period::den
|
||||
<< "s" << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) {
|
|||
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
|
||||
: paddle_infer::Config::Precision::kFloat32,
|
||||
false, false);
|
||||
std::map<std::string, std::vector<int>> min_input_shape = {
|
||||
{"x", {1, 3, 50, 50}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 20, 20}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 10, 10}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 10, 10}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 20, 20}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 20, 20}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 20, 20}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 20, 20}},
|
||||
{"elementwise_add_7", {1, 56, 2, 2}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 2, 2}}};
|
||||
std::map<std::string, std::vector<int>> max_input_shape = {
|
||||
{"x", {1, 3, this->max_side_len_, this->max_side_len_}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 400, 400}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 200, 200}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 200, 200}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 400, 400}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 400, 400}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 400, 400}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 400, 400}},
|
||||
{"elementwise_add_7", {1, 56, 400, 400}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 400, 400}}};
|
||||
std::map<std::string, std::vector<int>> opt_input_shape = {
|
||||
{"x", {1, 3, 640, 640}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 160, 160}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 80, 80}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 80, 80}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 160, 160}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 160, 160}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 160, 160}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 160, 160}},
|
||||
{"elementwise_add_7", {1, 56, 40, 40}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 40, 40}}};
|
||||
|
||||
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
|
||||
opt_input_shape);
|
||||
}
|
||||
} else {
|
||||
config.DisableGpu();
|
||||
|
@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
|
|||
config.SwitchIrOptim(true);
|
||||
|
||||
config.EnableMemoryOptim();
|
||||
config.DisableGlogInfo();
|
||||
// config.DisableGlogInfo();
|
||||
|
||||
this->predictor_ = CreatePredictor(config);
|
||||
}
|
||||
|
|
|
@ -25,8 +25,9 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
|
|||
|
||||
std::cout << "The predicted text is :" << std::endl;
|
||||
int index = 0;
|
||||
for (int i = boxes.size() - 1; i >= 0; i--) {
|
||||
for (int i = 0; i < boxes.size(); i++) {
|
||||
crop_img = GetRotateCropImage(srcimg, boxes[i]);
|
||||
|
||||
if (cls != nullptr) {
|
||||
crop_img = cls->Run(crop_img);
|
||||
}
|
||||
|
@ -105,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
|
|||
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
|
||||
: paddle_infer::Config::Precision::kFloat32,
|
||||
false, false);
|
||||
std::map<std::string, std::vector<int>> min_input_shape = {
|
||||
{"x", {1, 3, 32, 10}}};
|
||||
std::map<std::string, std::vector<int>> max_input_shape = {
|
||||
{"x", {1, 3, 32, 2000}}};
|
||||
std::map<std::string, std::vector<int>> opt_input_shape = {
|
||||
{"x", {1, 3, 32, 320}}};
|
||||
|
||||
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
|
||||
opt_input_shape);
|
||||
}
|
||||
} else {
|
||||
config.DisableGpu();
|
||||
|
|
|
@ -77,19 +77,13 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
|
|||
|
||||
int resize_h = int(float(h) * ratio);
|
||||
int resize_w = int(float(w) * ratio);
|
||||
|
||||
|
||||
resize_h = max(int(round(float(resize_h) / 32) * 32), 32);
|
||||
resize_w = max(int(round(float(resize_w) / 32) * 32), 32);
|
||||
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
ratio_h = float(resize_h) / float(h);
|
||||
ratio_w = float(resize_w) / float(w);
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(640, 640));
|
||||
ratio_h = float(640) / float(h);
|
||||
ratio_w = float(640) / float(w);
|
||||
}
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
ratio_h = float(resize_h) / float(h);
|
||||
ratio_w = float(resize_w) / float(w);
|
||||
}
|
||||
|
||||
void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
|
||||
|
@ -108,23 +102,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
|
|||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
|
||||
{127, 127, 127});
|
||||
} else {
|
||||
int k = int(img.cols * 32 / img.rows);
|
||||
if (k >= 100) {
|
||||
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
|
||||
cv::BORDER_CONSTANT, {127, 127, 127});
|
||||
}
|
||||
}
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
|
||||
{127, 127, 127});
|
||||
}
|
||||
|
||||
void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
|
@ -142,15 +125,11 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
|
|||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,12 +12,14 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <dirent.h>
|
||||
#include <include/utility.h>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <vector>
|
||||
|
||||
#include <include/utility.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
std::vector<std::string> Utility::ReadDict(const std::string &path) {
|
||||
|
@ -57,4 +59,37 @@ void Utility::VisualizeBboxes(
|
|||
<< std::endl;
|
||||
}
|
||||
|
||||
// list all files under a directory
|
||||
void Utility::GetAllFiles(const char *dir_name,
|
||||
std::vector<std::string> &all_inputs) {
|
||||
if (NULL == dir_name) {
|
||||
std::cout << " dir_name is null ! " << std::endl;
|
||||
return;
|
||||
}
|
||||
struct stat s;
|
||||
lstat(dir_name, &s);
|
||||
if (!S_ISDIR(s.st_mode)) {
|
||||
std::cout << "dir_name is not a valid directory !" << std::endl;
|
||||
all_inputs.push_back(dir_name);
|
||||
return;
|
||||
} else {
|
||||
struct dirent *filename; // return value for readdir()
|
||||
DIR *dir; // return value for opendir()
|
||||
dir = opendir(dir_name);
|
||||
if (NULL == dir) {
|
||||
std::cout << "Can not open dir " << dir_name << std::endl;
|
||||
return;
|
||||
}
|
||||
std::cout << "Successfully opened the dir !" << std::endl;
|
||||
while ((filename = readdir(dir)) != NULL) {
|
||||
if (strcmp(filename->d_name, ".") == 0 ||
|
||||
strcmp(filename->d_name, "..") == 0)
|
||||
continue;
|
||||
// img_dir + std::string("/") + all_inputs[0];
|
||||
all_inputs.push_back(dir_name + std::string("/") +
|
||||
std::string(filename->d_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PaddleOCR
|
|
@ -12,9 +12,10 @@ cmake .. \
|
|||
-DWITH_MKL=ON \
|
||||
-DWITH_GPU=OFF \
|
||||
-DWITH_STATIC_LIB=OFF \
|
||||
-DUSE_TENSORRT=OFF \
|
||||
-DWITH_TENSORRT=OFF \
|
||||
-DOPENCV_DIR=${OPENCV_DIR} \
|
||||
-DCUDNN_LIB=${CUDNN_LIB_DIR} \
|
||||
-DCUDA_LIB=${CUDA_LIB_DIR} \
|
||||
-DTENSORRT_DIR=${TENSORRT_DIR} \
|
||||
|
||||
make -j
|
||||
|
|
|
@ -20,10 +20,10 @@ cls_thresh 0.9
|
|||
|
||||
# rec config
|
||||
rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
|
||||
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
|
||||
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
|
||||
|
||||
# show the detection results
|
||||
visualize 1
|
||||
visualize 0
|
||||
|
||||
# use_tensorrt
|
||||
use_tensorrt 0
|
||||
|
|
|
@ -6,7 +6,7 @@ paddle-lite is a lightweight inference engine for PaddlePaddle. It provides effi
|
|||
|
||||
## 1. Preparation
|
||||
|
||||
### 运行准备
|
||||
### Preparation environment
|
||||
|
||||
- Computer (for Compiling Paddle Lite)
|
||||
- Mobile phone (arm7 or arm8)
|
||||
|
@ -87,8 +87,8 @@ The following table also provides a series of models that can be deployed on mob
|
|||
|
||||
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|
||||
|---|---|---|---|---|---|---|
|
||||
|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
|
||||
|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9|
|
||||
|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
|
||||
|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9|
|
||||
|
||||
If you directly use the model in the above table for deployment, you can skip the following steps and directly read [Section 2.2](#2.2 Run optimized model on Phone).
|
||||
|
||||
|
|
|
@ -103,14 +103,14 @@ python3 generate_multi_language_configs.py -l it \
|
|||
| german_mobile_v2.0_rec | ppocr/utils/dict/german_dict.txt | Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
|
||||
| korean_mobile_v2.0_rec | ppocr/utils/dict/korean_dict.txt | Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
|
||||
| japan_mobile_v2.0_rec | ppocr/utils/dict/japan_dict.txt | Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
|
||||
| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
|
||||
| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
|
||||
| te_mobile_v2.0_rec | ppocr/utils/dict/te_dict.txt | Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
|
||||
| ka_mobile_v2.0_rec | ppocr/utils/dict/ka_dict.txt | Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
|
||||
| ta_mobile_v2.0_rec | ppocr/utils/dict/ta_dict.txt | Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
|
||||
| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
|
||||
For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md)
|
||||
|
||||
|
|
BIN
doc/joinus.PNG
BIN
doc/joinus.PNG
Binary file not shown.
Before Width: | Height: | Size: 102 KiB After Width: | Height: | Size: 100 KiB |
|
@ -21,6 +21,9 @@ import json
|
|||
from PIL import Image, ImageDraw, ImageFont
|
||||
import math
|
||||
from paddle import inference
|
||||
import time
|
||||
from ppocr.utils.logging import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def str2bool(v):
|
||||
|
@ -40,6 +43,7 @@ inference_args_list = [
|
|||
['total_process_num', int, 1],
|
||||
['process_id', int, 0],
|
||||
['gpu_mem', int, 500],
|
||||
['cpu_threads', int, 10],
|
||||
# params for text detector
|
||||
['image_dir', str, None],
|
||||
['det_algorithm', str, 'DB'],
|
||||
|
@ -128,19 +132,97 @@ def create_predictor(args, mode, logger):
|
|||
config.enable_use_gpu(args.gpu_mem, 0)
|
||||
if args.use_tensorrt:
|
||||
config.enable_tensorrt_engine(
|
||||
precision_mode=inference.PrecisionType.Half
|
||||
if args.use_fp16 else inference.PrecisionType.Float32,
|
||||
max_batch_size=args.max_batch_size)
|
||||
precision_mode=inference.PrecisionType.Float32,
|
||||
max_batch_size=args.max_batch_size,
|
||||
min_subgraph_size=3) # skip the minmum trt subgraph
|
||||
if mode == "det" and "mobile" in model_file_path:
|
||||
min_input_shape = {
|
||||
"x": [1, 3, 50, 50],
|
||||
"conv2d_92.tmp_0": [1, 96, 20, 20],
|
||||
"conv2d_91.tmp_0": [1, 96, 10, 10],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
|
||||
"elementwise_add_7": [1, 56, 2, 2],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 2, 2]
|
||||
}
|
||||
max_input_shape = {
|
||||
"x": [1, 3, 2000, 2000],
|
||||
"conv2d_92.tmp_0": [1, 96, 400, 400],
|
||||
"conv2d_91.tmp_0": [1, 96, 200, 200],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400],
|
||||
"elementwise_add_7": [1, 56, 400, 400],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 400, 400]
|
||||
}
|
||||
opt_input_shape = {
|
||||
"x": [1, 3, 640, 640],
|
||||
"conv2d_92.tmp_0": [1, 96, 160, 160],
|
||||
"conv2d_91.tmp_0": [1, 96, 80, 80],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
|
||||
"elementwise_add_7": [1, 56, 40, 40],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 40, 40]
|
||||
}
|
||||
if mode == "det" and "server" in model_file_path:
|
||||
min_input_shape = {
|
||||
"x": [1, 3, 50, 50],
|
||||
"conv2d_59.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
|
||||
}
|
||||
max_input_shape = {
|
||||
"x": [1, 3, 2000, 2000],
|
||||
"conv2d_59.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
|
||||
}
|
||||
opt_input_shape = {
|
||||
"x": [1, 3, 640, 640],
|
||||
"conv2d_59.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
|
||||
}
|
||||
elif mode == "rec":
|
||||
min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
|
||||
max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
|
||||
opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
|
||||
elif mode == "cls":
|
||||
min_input_shape = {"x": [args.rec_batch_num, 3, 48, 10]}
|
||||
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]}
|
||||
opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
|
||||
else:
|
||||
min_input_shape = {"x": [1, 3, 10, 10]}
|
||||
max_input_shape = {"x": [1, 3, 1000, 1000]}
|
||||
opt_input_shape = {"x": [1, 3, 500, 500]}
|
||||
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
|
||||
opt_input_shape)
|
||||
|
||||
else:
|
||||
config.disable_gpu()
|
||||
config.set_cpu_math_library_num_threads(6)
|
||||
if hasattr(args, "cpu_threads"):
|
||||
config.set_cpu_math_library_num_threads(args.cpu_threads)
|
||||
else:
|
||||
config.set_cpu_math_library_num_threads(
|
||||
10) # default cpu threads as 10
|
||||
if args.enable_mkldnn:
|
||||
# cache 10 different shapes for mkldnn to avoid memory leak
|
||||
config.set_mkldnn_cache_capacity(10)
|
||||
config.enable_mkldnn()
|
||||
# TODO LDOUBLEV: fix mkldnn bug when bach_size > 1
|
||||
# config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
|
||||
args.rec_batch_num = 1
|
||||
|
||||
# enable memory optim
|
||||
config.enable_memory_optim()
|
||||
|
@ -203,7 +285,7 @@ def draw_ocr(image,
|
|||
txts=None,
|
||||
scores=None,
|
||||
drop_score=0.5,
|
||||
font_path="./doc/simfang.ttf"):
|
||||
font_path="./doc/fonts/simfang.ttf"):
|
||||
"""
|
||||
Visualize the results of OCR detection and recognition
|
||||
args:
|
||||
|
@ -411,22 +493,4 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_img = "./doc/test_v2"
|
||||
predict_txt = "./doc/predict.txt"
|
||||
f = open(predict_txt, 'r')
|
||||
data = f.readlines()
|
||||
img_path, anno = data[0].strip().split('\t')
|
||||
img_name = os.path.basename(img_path)
|
||||
img_path = os.path.join(test_img, img_name)
|
||||
image = Image.open(img_path)
|
||||
|
||||
data = json.loads(anno)
|
||||
boxes, txts, scores = [], [], []
|
||||
for dic in data:
|
||||
boxes.append(dic['points'])
|
||||
txts.append(dic['transcription'])
|
||||
scores.append(round(dic['scores'], 3))
|
||||
|
||||
new_img = draw_ocr(image, boxes, txts, scores)
|
||||
|
||||
cv2.imwrite(img_name, new_img)
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue