Merge branch 'dygraph' into bm_dyg
This commit is contained in:
commit
b796c91649
|
@ -92,7 +92,7 @@ class WindowMixin(object):
|
|||
class MainWindow(QMainWindow, WindowMixin):
|
||||
FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3))
|
||||
|
||||
def __init__(self, lang="ch", defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
|
||||
def __init__(self, lang="ch", gpu=False, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None):
|
||||
super(MainWindow, self).__init__()
|
||||
self.setWindowTitle(__appname__)
|
||||
|
||||
|
@ -108,7 +108,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
getStr = lambda strId: self.stringBundle.getString(strId)
|
||||
|
||||
self.defaultSaveDir = defaultSaveDir
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
|
||||
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=gpu, lang=lang)
|
||||
|
||||
if os.path.exists('./data/paddle.png'):
|
||||
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
|
||||
|
@ -1239,6 +1239,8 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
|
||||
def loadFile(self, filePath=None):
|
||||
"""Load the specified file, or the last opened file if None."""
|
||||
if self.dirty:
|
||||
self.mayContinue()
|
||||
self.resetState()
|
||||
self.canvas.setEnabled(False)
|
||||
if filePath is None:
|
||||
|
@ -2037,6 +2039,8 @@ def read(filename, default=None):
|
|||
except:
|
||||
return default
|
||||
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
|
||||
def get_main_app(argv=[]):
|
||||
"""
|
||||
|
@ -2048,13 +2052,14 @@ def get_main_app(argv=[]):
|
|||
app.setWindowIcon(newIcon("app"))
|
||||
# Tzutalin 201705+: Accept extra agruments to change predefined class file
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument("--lang", default='en', nargs="?")
|
||||
argparser.add_argument("--lang", type=str, default='en', nargs="?")
|
||||
argparser.add_argument("--gpu", type=str2bool, default=False, nargs="?")
|
||||
argparser.add_argument("--predefined_classes_file",
|
||||
default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"),
|
||||
nargs="?")
|
||||
args = argparser.parse_args(argv[1:])
|
||||
# Usage : labelImg.py image predefClassFile saveDir
|
||||
win = MainWindow(lang=args.lang,
|
||||
win = MainWindow(lang=args.lang, gpu=args.gpu,
|
||||
defaultPrefdefClassFile=args.predefined_classes_file)
|
||||
win.show()
|
||||
return app, win
|
||||
|
|
|
@ -44,6 +44,9 @@ public:
|
|||
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
}
|
||||
|
||||
static void GetAllFiles(const char *dir_name,
|
||||
std::vector<std::string> &all_inputs);
|
||||
};
|
||||
|
||||
} // namespace PaddleOCR
|
|
@ -77,7 +77,7 @@ opencv3/
|
|||
|
||||
#### 1.2.1 直接下载安装
|
||||
|
||||
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。
|
||||
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。
|
||||
|
||||
* 下载之后使用下面的方法解压。
|
||||
|
||||
|
@ -89,10 +89,11 @@ tar -xf paddle_inference.tgz
|
|||
|
||||
#### 1.2.2 预测库源码编译
|
||||
* 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。
|
||||
* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
|
||||
* 可以参考[Paddle预测库安装编译说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi) 的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
|
||||
|
||||
```shell
|
||||
git clone https://github.com/PaddlePaddle/Paddle.git
|
||||
git checkout release/2.1
|
||||
```
|
||||
|
||||
* 进入Paddle目录后,编译方法如下。
|
||||
|
@ -115,7 +116,7 @@ make -j
|
|||
make inference_lib_dist
|
||||
```
|
||||
|
||||
更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)。
|
||||
更多编译参数选项介绍可以参考[文档说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi)。
|
||||
|
||||
|
||||
* 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。
|
||||
|
@ -140,11 +141,11 @@ build/paddle_inference_install_dir/
|
|||
```
|
||||
inference/
|
||||
|-- det_db
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdimodel
|
||||
| |--inference.pdiparams
|
||||
| |--inference.pdmodel
|
||||
|-- rec_rcnn
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdiparams
|
||||
| |--inference.pdmodel
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -78,8 +78,7 @@ opencv3/
|
|||
|
||||
#### 1.2.1 Direct download and installation
|
||||
|
||||
* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the
|
||||
[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website.
|
||||
[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html). You can view and select the appropriate version of the inference library on the official website.
|
||||
|
||||
|
||||
* After downloading, use the following method to uncompress.
|
||||
|
@ -97,9 +96,10 @@ Finally you can see the following files in the folder of `paddle_inference/`.
|
|||
|
||||
```shell
|
||||
git clone https://github.com/PaddlePaddle/Paddle.git
|
||||
git checkout release/2.1
|
||||
```
|
||||
|
||||
* After entering the Paddle directory, the compilation method is as follows.
|
||||
* After entering the Paddle directory, the commands to compile the paddle inference library are as follows.
|
||||
|
||||
```shell
|
||||
rm -rf build
|
||||
|
@ -119,7 +119,7 @@ make -j
|
|||
make inference_lib_dist
|
||||
```
|
||||
|
||||
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html).
|
||||
For more compilation parameter options, please refer to the [document](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi).
|
||||
|
||||
|
||||
* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
|
||||
|
@ -144,11 +144,11 @@ Among them, `paddle` is the Paddle library required for C++ prediction later, an
|
|||
```
|
||||
inference/
|
||||
|-- det_db
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdimodel
|
||||
| |--inference.pdiparams
|
||||
| |--inference.pdmodel
|
||||
|-- rec_rcnn
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdparams
|
||||
| |--inference.pdiparams
|
||||
| |--inference.pdmodel
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -27,9 +27,12 @@
|
|||
#include <fstream>
|
||||
#include <numeric>
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <include/config.h>
|
||||
#include <include/ocr_det.h>
|
||||
#include <include/ocr_rec.h>
|
||||
#include <include/utility.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
@ -47,13 +50,8 @@ int main(int argc, char **argv) {
|
|||
config.PrintConfigInfo();
|
||||
|
||||
std::string img_path(argv[2]);
|
||||
|
||||
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
|
||||
|
||||
if (!srcimg.data) {
|
||||
std::cerr << "[ERROR] image read failed! image path: " << img_path << "\n";
|
||||
exit(1);
|
||||
}
|
||||
std::vector<std::string> all_img_names;
|
||||
Utility::GetAllFiles((char *)img_path.c_str(), all_img_names);
|
||||
|
||||
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
|
||||
config.gpu_mem, config.cpu_math_library_num_threads,
|
||||
|
@ -76,18 +74,30 @@ int main(int argc, char **argv) {
|
|||
config.use_tensorrt, config.use_fp16);
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
det.Run(srcimg, boxes);
|
||||
|
||||
rec.Run(boxes, srcimg, cls);
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||
std::cout << "Cost "
|
||||
<< double(duration.count()) *
|
||||
std::chrono::microseconds::period::num /
|
||||
std::chrono::microseconds::period::den
|
||||
<< "s" << std::endl;
|
||||
for (auto img_dir : all_img_names) {
|
||||
LOG(INFO) << "The predict img: " << img_dir;
|
||||
|
||||
cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR);
|
||||
if (!srcimg.data) {
|
||||
std::cerr << "[ERROR] image read failed! image path: " << img_path
|
||||
<< "\n";
|
||||
exit(1);
|
||||
}
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
|
||||
det.Run(srcimg, boxes);
|
||||
|
||||
rec.Run(boxes, srcimg, cls);
|
||||
auto end = std::chrono::system_clock::now();
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
|
||||
std::cout << "Cost "
|
||||
<< double(duration.count()) *
|
||||
std::chrono::microseconds::period::num /
|
||||
std::chrono::microseconds::period::den
|
||||
<< "s" << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) {
|
|||
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
|
||||
: paddle_infer::Config::Precision::kFloat32,
|
||||
false, false);
|
||||
std::map<std::string, std::vector<int>> min_input_shape = {
|
||||
{"x", {1, 3, 50, 50}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 20, 20}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 10, 10}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 10, 10}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 20, 20}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 20, 20}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 20, 20}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 20, 20}},
|
||||
{"elementwise_add_7", {1, 56, 2, 2}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 2, 2}}};
|
||||
std::map<std::string, std::vector<int>> max_input_shape = {
|
||||
{"x", {1, 3, this->max_side_len_, this->max_side_len_}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 400, 400}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 200, 200}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 200, 200}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 400, 400}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 400, 400}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 400, 400}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 400, 400}},
|
||||
{"elementwise_add_7", {1, 56, 400, 400}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 400, 400}}};
|
||||
std::map<std::string, std::vector<int>> opt_input_shape = {
|
||||
{"x", {1, 3, 640, 640}},
|
||||
{"conv2d_92.tmp_0", {1, 96, 160, 160}},
|
||||
{"conv2d_91.tmp_0", {1, 96, 80, 80}},
|
||||
{"nearest_interp_v2_1.tmp_0", {1, 96, 80, 80}},
|
||||
{"nearest_interp_v2_2.tmp_0", {1, 96, 160, 160}},
|
||||
{"nearest_interp_v2_3.tmp_0", {1, 24, 160, 160}},
|
||||
{"nearest_interp_v2_4.tmp_0", {1, 24, 160, 160}},
|
||||
{"nearest_interp_v2_5.tmp_0", {1, 24, 160, 160}},
|
||||
{"elementwise_add_7", {1, 56, 40, 40}},
|
||||
{"nearest_interp_v2_0.tmp_0", {1, 96, 40, 40}}};
|
||||
|
||||
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
|
||||
opt_input_shape);
|
||||
}
|
||||
} else {
|
||||
config.DisableGpu();
|
||||
|
@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
|
|||
config.SwitchIrOptim(true);
|
||||
|
||||
config.EnableMemoryOptim();
|
||||
config.DisableGlogInfo();
|
||||
// config.DisableGlogInfo();
|
||||
|
||||
this->predictor_ = CreatePredictor(config);
|
||||
}
|
||||
|
|
|
@ -25,8 +25,9 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
|
|||
|
||||
std::cout << "The predicted text is :" << std::endl;
|
||||
int index = 0;
|
||||
for (int i = boxes.size() - 1; i >= 0; i--) {
|
||||
for (int i = 0; i < boxes.size(); i++) {
|
||||
crop_img = GetRotateCropImage(srcimg, boxes[i]);
|
||||
|
||||
if (cls != nullptr) {
|
||||
crop_img = cls->Run(crop_img);
|
||||
}
|
||||
|
@ -105,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
|
|||
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
|
||||
: paddle_infer::Config::Precision::kFloat32,
|
||||
false, false);
|
||||
std::map<std::string, std::vector<int>> min_input_shape = {
|
||||
{"x", {1, 3, 32, 10}}};
|
||||
std::map<std::string, std::vector<int>> max_input_shape = {
|
||||
{"x", {1, 3, 32, 2000}}};
|
||||
std::map<std::string, std::vector<int>> opt_input_shape = {
|
||||
{"x", {1, 3, 32, 320}}};
|
||||
|
||||
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
|
||||
opt_input_shape);
|
||||
}
|
||||
} else {
|
||||
config.DisableGpu();
|
||||
|
|
|
@ -77,19 +77,13 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
|
|||
|
||||
int resize_h = int(float(h) * ratio);
|
||||
int resize_w = int(float(w) * ratio);
|
||||
|
||||
|
||||
resize_h = max(int(round(float(resize_h) / 32) * 32), 32);
|
||||
resize_w = max(int(round(float(resize_w) / 32) * 32), 32);
|
||||
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
ratio_h = float(resize_h) / float(h);
|
||||
ratio_w = float(resize_w) / float(w);
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(640, 640));
|
||||
ratio_h = float(640) / float(h);
|
||||
ratio_w = float(640) / float(w);
|
||||
}
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
|
||||
ratio_h = float(resize_h) / float(h);
|
||||
ratio_w = float(resize_w) / float(w);
|
||||
}
|
||||
|
||||
void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
|
||||
|
@ -108,23 +102,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
|
|||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
|
||||
{127, 127, 127});
|
||||
} else {
|
||||
int k = int(img.cols * 32 / img.rows);
|
||||
if (k >= 100) {
|
||||
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
|
||||
cv::BORDER_CONSTANT, {127, 127, 127});
|
||||
}
|
||||
}
|
||||
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
|
||||
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
|
||||
{127, 127, 127});
|
||||
}
|
||||
|
||||
void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
|
||||
|
@ -142,15 +125,11 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
|
|||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
if (!use_tensorrt) {
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
} else {
|
||||
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_LINEAR);
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,12 +12,14 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <dirent.h>
|
||||
#include <include/utility.h>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <vector>
|
||||
|
||||
#include <include/utility.h>
|
||||
|
||||
namespace PaddleOCR {
|
||||
|
||||
std::vector<std::string> Utility::ReadDict(const std::string &path) {
|
||||
|
@ -57,4 +59,37 @@ void Utility::VisualizeBboxes(
|
|||
<< std::endl;
|
||||
}
|
||||
|
||||
// list all files under a directory
|
||||
void Utility::GetAllFiles(const char *dir_name,
|
||||
std::vector<std::string> &all_inputs) {
|
||||
if (NULL == dir_name) {
|
||||
std::cout << " dir_name is null ! " << std::endl;
|
||||
return;
|
||||
}
|
||||
struct stat s;
|
||||
lstat(dir_name, &s);
|
||||
if (!S_ISDIR(s.st_mode)) {
|
||||
std::cout << "dir_name is not a valid directory !" << std::endl;
|
||||
all_inputs.push_back(dir_name);
|
||||
return;
|
||||
} else {
|
||||
struct dirent *filename; // return value for readdir()
|
||||
DIR *dir; // return value for opendir()
|
||||
dir = opendir(dir_name);
|
||||
if (NULL == dir) {
|
||||
std::cout << "Can not open dir " << dir_name << std::endl;
|
||||
return;
|
||||
}
|
||||
std::cout << "Successfully opened the dir !" << std::endl;
|
||||
while ((filename = readdir(dir)) != NULL) {
|
||||
if (strcmp(filename->d_name, ".") == 0 ||
|
||||
strcmp(filename->d_name, "..") == 0)
|
||||
continue;
|
||||
// img_dir + std::string("/") + all_inputs[0];
|
||||
all_inputs.push_back(dir_name + std::string("/") +
|
||||
std::string(filename->d_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PaddleOCR
|
|
@ -12,9 +12,10 @@ cmake .. \
|
|||
-DWITH_MKL=ON \
|
||||
-DWITH_GPU=OFF \
|
||||
-DWITH_STATIC_LIB=OFF \
|
||||
-DUSE_TENSORRT=OFF \
|
||||
-DWITH_TENSORRT=OFF \
|
||||
-DOPENCV_DIR=${OPENCV_DIR} \
|
||||
-DCUDNN_LIB=${CUDNN_LIB_DIR} \
|
||||
-DCUDA_LIB=${CUDA_LIB_DIR} \
|
||||
-DTENSORRT_DIR=${TENSORRT_DIR} \
|
||||
|
||||
make -j
|
||||
|
|
|
@ -20,10 +20,10 @@ cls_thresh 0.9
|
|||
|
||||
# rec config
|
||||
rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
|
||||
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
|
||||
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
|
||||
|
||||
# show the detection results
|
||||
visualize 1
|
||||
visualize 0
|
||||
|
||||
# use_tensorrt
|
||||
use_tensorrt 0
|
||||
|
|
|
@ -6,7 +6,7 @@ paddle-lite is a lightweight inference engine for PaddlePaddle. It provides effi
|
|||
|
||||
## 1. Preparation
|
||||
|
||||
### 运行准备
|
||||
### Preparation environment
|
||||
|
||||
- Computer (for Compiling Paddle Lite)
|
||||
- Mobile phone (arm7 or arm8)
|
||||
|
@ -87,8 +87,8 @@ The following table also provides a series of models that can be deployed on mob
|
|||
|
||||
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|
||||
|---|---|---|---|---|---|---|
|
||||
|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
|
||||
|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9|
|
||||
|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|
|
||||
|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9|
|
||||
|
||||
If you directly use the model in the above table for deployment, you can skip the following steps and directly read [Section 2.2](#2.2 Run optimized model on Phone).
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ im_show.save('result.jpg')
|
|||
from paddleocr import PaddleOCR, draw_ocr
|
||||
ocr = PaddleOCR() # need to run only once to download and load model into memory
|
||||
img_path = 'PaddleOCR/doc/imgs/11.jpg'
|
||||
result = ocr.ocr(img_path)
|
||||
result = ocr.ocr(img_path,cls=False)
|
||||
for line in result:
|
||||
print(line)
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
# Distributed training
|
||||
|
||||
## Introduction
|
||||
|
||||
The high performance of distributed training is one of the core advantages of PaddlePaddle. In the classification task, distributed training can achieve almost linear speedup ratio. Generally, OCR training task need massive training data. Such as recognition, ppocrv2.0 model is trained based on 1800W dataset, which is very time-consuming if using single machine. Therefore, the distributed training is used in paddleocr to speedup the training task. For more information about distributed training, please refer to [distributed training quick start tutorial](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/parameter_server/ps_quick_start.html).
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Training with single machine
|
||||
|
||||
Take recognition as an example. After the data is prepared locally, start the training task with the interface of `paddle.distributed.launch`. The start command as follows:
|
||||
|
||||
```shell
|
||||
python3 -m paddle.distributed.launch \
|
||||
--log_dir=./log/ \
|
||||
--gpus '0,1,2,3,4,5,6,7' \
|
||||
tools/train.py \
|
||||
-c configs/rec/rec_mv3_none_bilstm_ctc.yml
|
||||
```
|
||||
|
||||
### Training with multi machine
|
||||
|
||||
Compared with single machine, training with multi machine only needs to add the parameter `--ips` to start command, which represents the IP list of machines used for distributed training, and the IP of different machines are separated by commas. The start command as follows:
|
||||
|
||||
```shell
|
||||
ip_list="192.168.0.1,192.168.0.2"
|
||||
python3 -m paddle.distributed.launch \
|
||||
--log_dir=./log/ \
|
||||
--ips="${ip_list}" \
|
||||
--gpus="0,1,2,3,4,5,6,7" \
|
||||
tools/train.py \
|
||||
-c configs/rec/rec_mv3_none_bilstm_ctc.yml
|
||||
```
|
||||
|
||||
**Notice:**
|
||||
* The IP addresses of different machines need to be separated by commas, which can be queried through `ifconfig` or `ipconfig`.
|
||||
* Different machines need to be set to be secret free and can `ping` success with others directly, otherwise communication cannot establish between them.
|
||||
* The code, data and start command betweent different machines must be completely consistent and then all machines need to run start command. The first machine in the `ip_list` is set to `trainer0`, and so on.
|
||||
|
||||
|
||||
## Performance comparison
|
||||
|
||||
* Based on 26W public recognition dataset (LSVT, rctw, mtwi), training on single 8-card P40 and dual 8-card P40, the final time consumption is as follows.
|
||||
|
||||
| Model | Config file | Number of machines | Number of GPUs per machine | Training time | Recognition acc | Speedup ratio |
|
||||
| :-------: | :------------: | :----------------: | :----------------------------: | :------------------: | :--------------: | :-----------: |
|
||||
| CRNN | configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml | 1 | 8 | 60h | 66.7% | - |
|
||||
| CRNN | configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml | 2 | 8 | 40h | 67.0% | 150% |
|
||||
|
||||
It can be seen that the training time is shortened from 60h to 40h, the speedup ratio can reach 150% (60h / 40h), and the efficiency is 75% (60h / (40h * 2)).
|
|
@ -103,14 +103,14 @@ python3 generate_multi_language_configs.py -l it \
|
|||
| german_mobile_v2.0_rec | ppocr/utils/dict/german_dict.txt | Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
|
||||
| korean_mobile_v2.0_rec | ppocr/utils/dict/korean_dict.txt | Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
|
||||
| japan_mobile_v2.0_rec | ppocr/utils/dict/japan_dict.txt | Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
|
||||
| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
|
||||
| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
|
||||
| te_mobile_v2.0_rec | ppocr/utils/dict/te_dict.txt | Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
|
||||
| ka_mobile_v2.0_rec | ppocr/utils/dict/ka_dict.txt | Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
|
||||
| ta_mobile_v2.0_rec | ppocr/utils/dict/ta_dict.txt | Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
|
||||
| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
|
||||
|
||||
For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md)
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ Visualization of results
|
|||
from paddleocr import PaddleOCR,draw_ocr
|
||||
ocr = PaddleOCR(lang='en') # need to run only once to download and load model into memory
|
||||
img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
|
||||
result = ocr.ocr(img_path)
|
||||
result = ocr.ocr(img_path, cls=False)
|
||||
for line in result:
|
||||
print(line)
|
||||
|
||||
|
|
BIN
doc/joinus.PNG
BIN
doc/joinus.PNG
Binary file not shown.
Before Width: | Height: | Size: 102 KiB After Width: | Height: | Size: 102 KiB |
122
paddleocr.py
122
paddleocr.py
|
@ -30,7 +30,7 @@ from ppocr.utils.logging import get_logger
|
|||
|
||||
logger = get_logger()
|
||||
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
|
||||
from tools.infer.utility import draw_ocr
|
||||
from tools.infer.utility import draw_ocr, init_args, str2bool
|
||||
|
||||
__all__ = ['PaddleOCR']
|
||||
|
||||
|
@ -167,106 +167,24 @@ def maybe_download(model_storage_directory, url):
|
|||
os.remove(tmp_path)
|
||||
|
||||
|
||||
def parse_args(mMain=True, add_help=True):
|
||||
def parse_args(mMain=True):
|
||||
import argparse
|
||||
parser = init_args()
|
||||
parser.add_help = mMain
|
||||
parser.add_argument("--lang", type=str, default='ch')
|
||||
parser.add_argument("--det", type=str2bool, default=True)
|
||||
parser.add_argument("--rec", type=str2bool, default=True)
|
||||
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
|
||||
for action in parser._actions:
|
||||
if action.dest == 'rec_char_dict_path':
|
||||
action.default = None
|
||||
if mMain:
|
||||
parser = argparse.ArgumentParser(add_help=add_help)
|
||||
# params for prediction engine
|
||||
parser.add_argument("--use_gpu", type=str2bool, default=True)
|
||||
parser.add_argument("--ir_optim", type=str2bool, default=True)
|
||||
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
|
||||
parser.add_argument("--gpu_mem", type=int, default=8000)
|
||||
|
||||
# params for text detector
|
||||
parser.add_argument("--image_dir", type=str)
|
||||
parser.add_argument("--det_algorithm", type=str, default='DB')
|
||||
parser.add_argument("--det_model_dir", type=str, default=None)
|
||||
parser.add_argument("--det_limit_side_len", type=float, default=960)
|
||||
parser.add_argument("--det_limit_type", type=str, default='max')
|
||||
|
||||
# DB parmas
|
||||
parser.add_argument("--det_db_thresh", type=float, default=0.3)
|
||||
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
|
||||
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
|
||||
parser.add_argument("--use_dilation", type=bool, default=False)
|
||||
parser.add_argument("--det_db_score_mode", type=str, default="fast")
|
||||
|
||||
# EAST parmas
|
||||
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
|
||||
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
|
||||
parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
|
||||
|
||||
# params for text recognizer
|
||||
parser.add_argument("--rec_algorithm", type=str, default='CRNN')
|
||||
parser.add_argument("--rec_model_dir", type=str, default=None)
|
||||
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
|
||||
parser.add_argument("--rec_char_type", type=str, default='ch')
|
||||
parser.add_argument("--rec_batch_num", type=int, default=6)
|
||||
parser.add_argument("--max_text_length", type=int, default=25)
|
||||
parser.add_argument("--rec_char_dict_path", type=str, default=None)
|
||||
parser.add_argument("--use_space_char", type=bool, default=True)
|
||||
parser.add_argument("--drop_score", type=float, default=0.5)
|
||||
|
||||
# params for text classifier
|
||||
parser.add_argument("--cls_model_dir", type=str, default=None)
|
||||
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
|
||||
parser.add_argument("--label_list", type=list, default=['0', '180'])
|
||||
parser.add_argument("--cls_batch_num", type=int, default=6)
|
||||
parser.add_argument("--cls_thresh", type=float, default=0.9)
|
||||
|
||||
parser.add_argument("--enable_mkldnn", type=bool, default=False)
|
||||
parser.add_argument("--use_zero_copy_run", type=bool, default=False)
|
||||
parser.add_argument("--use_pdserving", type=str2bool, default=False)
|
||||
|
||||
parser.add_argument("--lang", type=str, default='ch')
|
||||
parser.add_argument("--det", type=str2bool, default=True)
|
||||
parser.add_argument("--rec", type=str2bool, default=True)
|
||||
parser.add_argument("--use_angle_cls", type=str2bool, default=False)
|
||||
return parser.parse_args()
|
||||
else:
|
||||
return argparse.Namespace(
|
||||
use_gpu=True,
|
||||
ir_optim=True,
|
||||
use_tensorrt=False,
|
||||
gpu_mem=8000,
|
||||
image_dir='',
|
||||
det_algorithm='DB',
|
||||
det_model_dir=None,
|
||||
det_limit_side_len=960,
|
||||
det_limit_type='max',
|
||||
det_db_thresh=0.3,
|
||||
det_db_box_thresh=0.5,
|
||||
det_db_unclip_ratio=1.6,
|
||||
use_dilation=False,
|
||||
det_db_score_mode="fast",
|
||||
det_east_score_thresh=0.8,
|
||||
det_east_cover_thresh=0.1,
|
||||
det_east_nms_thresh=0.2,
|
||||
rec_algorithm='CRNN',
|
||||
rec_model_dir=None,
|
||||
rec_image_shape="3, 32, 320",
|
||||
rec_char_type='ch',
|
||||
rec_batch_num=6,
|
||||
max_text_length=25,
|
||||
rec_char_dict_path=None,
|
||||
use_space_char=True,
|
||||
drop_score=0.5,
|
||||
cls_model_dir=None,
|
||||
cls_image_shape="3, 48, 192",
|
||||
label_list=['0', '180'],
|
||||
cls_batch_num=6,
|
||||
cls_thresh=0.9,
|
||||
enable_mkldnn=False,
|
||||
use_zero_copy_run=False,
|
||||
use_pdserving=False,
|
||||
lang='ch',
|
||||
det=True,
|
||||
rec=True,
|
||||
use_angle_cls=False)
|
||||
inference_args_dict = {}
|
||||
for action in parser._actions:
|
||||
inference_args_dict[action.dest] = action.default
|
||||
return argparse.Namespace(**inference_args_dict)
|
||||
|
||||
|
||||
class PaddleOCR(predict_system.TextSystem):
|
||||
|
@ -276,7 +194,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
args:
|
||||
**kwargs: other params show in paddleocr --help
|
||||
"""
|
||||
postprocess_params = parse_args(mMain=False, add_help=False)
|
||||
postprocess_params = parse_args(mMain=False)
|
||||
postprocess_params.__dict__.update(**kwargs)
|
||||
self.use_angle_cls = postprocess_params.use_angle_cls
|
||||
lang = postprocess_params.lang
|
||||
|
@ -346,7 +264,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
# init det_model and rec_model
|
||||
super().__init__(postprocess_params)
|
||||
|
||||
def ocr(self, img, det=True, rec=True, cls=False):
|
||||
def ocr(self, img, det=True, rec=True, cls=True):
|
||||
"""
|
||||
ocr with paddleocr
|
||||
args:
|
||||
|
@ -358,9 +276,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
if isinstance(img, list) and det == True:
|
||||
logger.error('When input a list of images, det must be false')
|
||||
exit(0)
|
||||
if cls == False:
|
||||
self.use_angle_cls = False
|
||||
elif cls == True and self.use_angle_cls == False:
|
||||
if cls == True and self.use_angle_cls == False:
|
||||
logger.warning(
|
||||
'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
|
||||
)
|
||||
|
@ -382,7 +298,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
if isinstance(img, np.ndarray) and len(img.shape) == 2:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
if det and rec:
|
||||
dt_boxes, rec_res = self.__call__(img)
|
||||
dt_boxes, rec_res = self.__call__(img, cls)
|
||||
return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
|
||||
elif det and not rec:
|
||||
dt_boxes, elapse = self.text_detector(img)
|
||||
|
@ -392,7 +308,7 @@ class PaddleOCR(predict_system.TextSystem):
|
|||
else:
|
||||
if not isinstance(img, list):
|
||||
img = [img]
|
||||
if self.use_angle_cls:
|
||||
if self.use_angle_cls and cls:
|
||||
img, cls_res, elapse = self.text_classifier(img)
|
||||
if not rec:
|
||||
return cls_res
|
||||
|
|
|
@ -84,7 +84,7 @@ class TextSystem(object):
|
|||
cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
|
||||
logger.info(bno, rec_res[bno])
|
||||
|
||||
def __call__(self, img):
|
||||
def __call__(self, img, cls=True):
|
||||
ori_im = img.copy()
|
||||
dt_boxes, elapse = self.text_detector(img)
|
||||
|
||||
|
@ -98,7 +98,7 @@ class TextSystem(object):
|
|||
tmp_box = copy.deepcopy(dt_boxes[bno])
|
||||
img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
|
||||
img_crop_list.append(img_crop)
|
||||
if self.use_angle_cls:
|
||||
if self.use_angle_cls and cls:
|
||||
img_crop_list, angle_list, elapse = self.text_classifier(
|
||||
img_crop_list)
|
||||
|
||||
|
|
|
@ -23,13 +23,15 @@ import math
|
|||
from paddle import inference
|
||||
import time
|
||||
from ppocr.utils.logging import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def parse_args():
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
def str2bool(v):
|
||||
return v.lower() in ("true", "t", "1")
|
||||
|
||||
|
||||
def init_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
# params for prediction engine
|
||||
parser.add_argument("--use_gpu", type=str2bool, default=True)
|
||||
|
@ -110,6 +112,12 @@ def parse_args():
|
|||
|
||||
parser.add_argument("--benchmark", type=bool, default=False)
|
||||
parser.add_argument("--save_log_path", type=str, default="./log_output/")
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = init_args()
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -221,22 +229,97 @@ def create_predictor(args, mode, logger):
|
|||
config.enable_use_gpu(args.gpu_mem, 0)
|
||||
if args.use_tensorrt:
|
||||
config.enable_tensorrt_engine(
|
||||
precision_mode=inference.PrecisionType.Half
|
||||
if args.use_fp16 else inference.PrecisionType.Float32,
|
||||
max_batch_size=args.max_batch_size)
|
||||
precision_mode=inference.PrecisionType.Float32,
|
||||
max_batch_size=args.max_batch_size,
|
||||
min_subgraph_size=3) # skip the minmum trt subgraph
|
||||
if mode == "det" and "mobile" in model_file_path:
|
||||
min_input_shape = {
|
||||
"x": [1, 3, 50, 50],
|
||||
"conv2d_92.tmp_0": [1, 96, 20, 20],
|
||||
"conv2d_91.tmp_0": [1, 96, 10, 10],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
|
||||
"elementwise_add_7": [1, 56, 2, 2],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 2, 2]
|
||||
}
|
||||
max_input_shape = {
|
||||
"x": [1, 3, 2000, 2000],
|
||||
"conv2d_92.tmp_0": [1, 96, 400, 400],
|
||||
"conv2d_91.tmp_0": [1, 96, 200, 200],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400],
|
||||
"elementwise_add_7": [1, 56, 400, 400],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 400, 400]
|
||||
}
|
||||
opt_input_shape = {
|
||||
"x": [1, 3, 640, 640],
|
||||
"conv2d_92.tmp_0": [1, 96, 160, 160],
|
||||
"conv2d_91.tmp_0": [1, 96, 80, 80],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
|
||||
"elementwise_add_7": [1, 56, 40, 40],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 96, 40, 40]
|
||||
}
|
||||
if mode == "det" and "server" in model_file_path:
|
||||
min_input_shape = {
|
||||
"x": [1, 3, 50, 50],
|
||||
"conv2d_59.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
|
||||
}
|
||||
max_input_shape = {
|
||||
"x": [1, 3, 2000, 2000],
|
||||
"conv2d_59.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
|
||||
}
|
||||
opt_input_shape = {
|
||||
"x": [1, 3, 640, 640],
|
||||
"conv2d_59.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
|
||||
}
|
||||
elif mode == "rec":
|
||||
min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
|
||||
max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
|
||||
opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
|
||||
elif mode == "cls":
|
||||
min_input_shape = {"x": [args.rec_batch_num, 3, 48, 10]}
|
||||
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]}
|
||||
opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
|
||||
else:
|
||||
min_input_shape = {"x": [1, 3, 10, 10]}
|
||||
max_input_shape = {"x": [1, 3, 1000, 1000]}
|
||||
opt_input_shape = {"x": [1, 3, 500, 500]}
|
||||
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
|
||||
opt_input_shape)
|
||||
|
||||
else:
|
||||
config.disable_gpu()
|
||||
if hasattr(args, "cpu_threads"):
|
||||
config.set_cpu_math_library_num_threads(args.cpu_threads)
|
||||
else:
|
||||
# default cpu threads as 10
|
||||
config.set_cpu_math_library_num_threads(10)
|
||||
if args.enable_mkldnn:
|
||||
# cache 10 different shapes for mkldnn to avoid memory leak
|
||||
config.set_mkldnn_cache_capacity(10)
|
||||
config.enable_mkldnn()
|
||||
# TODO LDOUBLEV: fix mkldnn bug when bach_size > 1
|
||||
#config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
|
||||
args.rec_batch_num = 1
|
||||
|
||||
# enable memory optim
|
||||
config.enable_memory_optim()
|
||||
|
@ -299,7 +382,7 @@ def draw_ocr(image,
|
|||
txts=None,
|
||||
scores=None,
|
||||
drop_score=0.5,
|
||||
font_path="./doc/simfang.ttf"):
|
||||
font_path="./doc/fonts/simfang.ttf"):
|
||||
"""
|
||||
Visualize the results of OCR detection and recognition
|
||||
args:
|
||||
|
@ -532,22 +615,4 @@ def get_current_memory_mb(gpu_id=None):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_img = "./doc/test_v2"
|
||||
predict_txt = "./doc/predict.txt"
|
||||
f = open(predict_txt, 'r')
|
||||
data = f.readlines()
|
||||
img_path, anno = data[0].strip().split('\t')
|
||||
img_name = os.path.basename(img_path)
|
||||
img_path = os.path.join(test_img, img_name)
|
||||
image = Image.open(img_path)
|
||||
|
||||
data = json.loads(anno)
|
||||
boxes, txts, scores = [], [], []
|
||||
for dic in data:
|
||||
boxes.append(dic['points'])
|
||||
txts.append(dic['transcription'])
|
||||
scores.append(round(dic['scores'], 3))
|
||||
|
||||
new_img = draw_ocr(image, boxes, txts, scores)
|
||||
|
||||
cv2.imwrite(img_name, new_img)
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue