add windows doc
This commit is contained in:
parent
8d9324cab1
commit
b55b8eda22
|
@ -36,6 +36,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
|
||||||
|
|
||||||
#### 2.inference模型下载
|
#### 2.inference模型下载
|
||||||
|
|
||||||
|
*windows 环境下没有如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下*
|
||||||
|
|
||||||
#### (1)超轻量级中文OCR模型下载
|
#### (1)超轻量级中文OCR模型下载
|
||||||
```
|
```
|
||||||
mkdir inference && cd inference
|
mkdir inference && cd inference
|
||||||
|
@ -63,6 +65,9 @@ cd ..
|
||||||
# 设置PYTHONPATH环境变量
|
# 设置PYTHONPATH环境变量
|
||||||
export PYTHONPATH=.
|
export PYTHONPATH=.
|
||||||
|
|
||||||
|
# windows下设置环境变量
|
||||||
|
SET PYTHONPATH=.
|
||||||
|
|
||||||
# 预测image_dir指定的单张图像
|
# 预测image_dir指定的单张图像
|
||||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/" --rec_model_dir="./inference/ch_rec_mv3_crnn/"
|
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/" --rec_model_dir="./inference/ch_rec_mv3_crnn/"
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 320]
|
image_shape: [3, 32, 320]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: ch
|
character_type: ch
|
||||||
|
|
|
@ -9,13 +9,12 @@ Global:
|
||||||
eval_batch_step: 500
|
eval_batch_step: 500
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
loss_type: ctc
|
loss_type: ctc
|
||||||
reader_yml: ./configs/rec/rec_icdar15_reader.yml
|
reader_yml: ./configs/rec/rec_icdar15_reader.yml
|
||||||
pretrain_weights:
|
pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
|
||||||
checkpoints:
|
checkpoints:
|
||||||
save_inference_dir:
|
save_inference_dir:
|
||||||
infer_img:
|
infer_img:
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -9,7 +9,6 @@ Global:
|
||||||
eval_batch_step: 2000
|
eval_batch_step: 2000
|
||||||
train_batch_size_per_card: 256
|
train_batch_size_per_card: 256
|
||||||
test_batch_size_per_card: 256
|
test_batch_size_per_card: 256
|
||||||
drop_last: false
|
|
||||||
image_shape: [3, 32, 100]
|
image_shape: [3, 32, 100]
|
||||||
max_text_length: 25
|
max_text_length: 25
|
||||||
character_type: en
|
character_type: en
|
||||||
|
|
|
@ -166,6 +166,10 @@ STAR-Net文本识别模型推理,可以执行如下命令:
|
||||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
|
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 3.基于Attention损失的识别模型推理
|
||||||
|
|
||||||
|
基于Attention损失的识别模型与ctc不同,需要额外设置识别算法参数 --rec_algorithm="RARE"
|
||||||
|
|
||||||
RARE 文本识别模型推理,可以执行如下命令:
|
RARE 文本识别模型推理,可以执行如下命令:
|
||||||
```
|
```
|
||||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE"
|
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE"
|
||||||
|
|
|
@ -8,6 +8,8 @@ PaddleOCR 工作环境
|
||||||
|
|
||||||
建议使用我们提供的docker运行PaddleOCR,有关docker使用请参考[链接](https://docs.docker.com/get-started/)。
|
建议使用我们提供的docker运行PaddleOCR,有关docker使用请参考[链接](https://docs.docker.com/get-started/)。
|
||||||
|
|
||||||
|
*如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。*
|
||||||
|
|
||||||
1. (建议)准备docker环境。第一次使用这个镜像,会自动下载该镜像,请耐心等待。
|
1. (建议)准备docker环境。第一次使用这个镜像,会自动下载该镜像,请耐心等待。
|
||||||
```
|
```
|
||||||
# 切换到工作目录下
|
# 切换到工作目录下
|
||||||
|
@ -54,6 +56,10 @@ python3 -m pip install paddlepaddle-gpu==1.7.2.post97 -i https://pypi.tuna.tsing
|
||||||
如果您的机器安装的是CUDA10,请运行以下命令安装
|
如果您的机器安装的是CUDA10,请运行以下命令安装
|
||||||
python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple
|
python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
|
如果您的机器是CPU,请运行以下命令安装
|
||||||
|
|
||||||
|
python3 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
|
更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,8 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通
|
||||||
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt
|
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt
|
||||||
# 测试集标签
|
# 测试集标签
|
||||||
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
|
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
最终训练集应有如下文件结构:
|
最终训练集应有如下文件结构:
|
||||||
|
@ -168,10 +170,11 @@ Global:
|
||||||
|
|
||||||
评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml` 修改EvalReader中的 `label_file_path` 设置。
|
评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml` 修改EvalReader中的 `label_file_path` 设置。
|
||||||
|
|
||||||
|
*注意* 评估时必须确保配置文件中 infer_img 字段为空
|
||||||
```
|
```
|
||||||
export CUDA_VISIBLE_DEVICES=0
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
# GPU 评估, Global.checkpoints 为待测权重
|
# GPU 评估, Global.checkpoints 为待测权重
|
||||||
python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
|
python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
|
||||||
```
|
```
|
||||||
|
|
||||||
### 预测
|
### 预测
|
||||||
|
@ -184,7 +187,7 @@ python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkp
|
||||||
|
|
||||||
```
|
```
|
||||||
# 预测英文结果
|
# 预测英文结果
|
||||||
python3 tools/infer_rec.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
|
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
|
||||||
```
|
```
|
||||||
|
|
||||||
预测图片:
|
预测图片:
|
||||||
|
|
|
@ -42,14 +42,15 @@ class LMDBReader(object):
|
||||||
self.max_text_length = params['max_text_length']
|
self.max_text_length = params['max_text_length']
|
||||||
self.mode = params['mode']
|
self.mode = params['mode']
|
||||||
self.drop_last = False
|
self.drop_last = False
|
||||||
self.tps = False
|
self.use_tps = False
|
||||||
if "tps" in params:
|
if "tps" in params:
|
||||||
self.tps = True
|
self.ues_tps = True
|
||||||
if params['mode'] == 'train':
|
if params['mode'] == 'train':
|
||||||
self.batch_size = params['train_batch_size_per_card']
|
self.batch_size = params['train_batch_size_per_card']
|
||||||
self.drop_last = params['drop_last']
|
self.drop_last = True
|
||||||
else:
|
else:
|
||||||
self.batch_size = params['test_batch_size_per_card']
|
self.batch_size = params['test_batch_size_per_card']
|
||||||
|
self.drop_last = False
|
||||||
self.infer_img = params['infer_img']
|
self.infer_img = params['infer_img']
|
||||||
|
|
||||||
def load_hierarchical_lmdb_dataset(self):
|
def load_hierarchical_lmdb_dataset(self):
|
||||||
|
@ -114,7 +115,7 @@ class LMDBReader(object):
|
||||||
img=img,
|
img=img,
|
||||||
image_shape=self.image_shape,
|
image_shape=self.image_shape,
|
||||||
char_ops=self.char_ops,
|
char_ops=self.char_ops,
|
||||||
tps=self.tps,
|
tps=self.use_tps,
|
||||||
infer_mode=True)
|
infer_mode=True)
|
||||||
yield norm_img
|
yield norm_img
|
||||||
else:
|
else:
|
||||||
|
@ -181,15 +182,15 @@ class SimpleReader(object):
|
||||||
self.max_text_length = params['max_text_length']
|
self.max_text_length = params['max_text_length']
|
||||||
self.mode = params['mode']
|
self.mode = params['mode']
|
||||||
self.infer_img = params['infer_img']
|
self.infer_img = params['infer_img']
|
||||||
self.tps = False
|
self.use_tps = False
|
||||||
if "tps" in params:
|
if "tps" in params:
|
||||||
self.tps = True
|
self.ues_tps = True
|
||||||
self.drop_last = False
|
|
||||||
if params['mode'] == 'train':
|
if params['mode'] == 'train':
|
||||||
self.batch_size = params['train_batch_size_per_card']
|
self.batch_size = params['train_batch_size_per_card']
|
||||||
self.drop_last = params['drop_last']
|
self.drop_last = True
|
||||||
else:
|
else:
|
||||||
self.batch_size = params['test_batch_size_per_card']
|
self.batch_size = params['test_batch_size_per_card']
|
||||||
|
self.drop_last = False
|
||||||
|
|
||||||
def __call__(self, process_id):
|
def __call__(self, process_id):
|
||||||
if self.mode != 'train':
|
if self.mode != 'train':
|
||||||
|
@ -206,7 +207,7 @@ class SimpleReader(object):
|
||||||
img=img,
|
img=img,
|
||||||
image_shape=self.image_shape,
|
image_shape=self.image_shape,
|
||||||
char_ops=self.char_ops,
|
char_ops=self.char_ops,
|
||||||
tps=self.tps,
|
tps=self.use_tps,
|
||||||
infer_mode=True)
|
infer_mode=True)
|
||||||
yield norm_img
|
yield norm_img
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -95,14 +95,10 @@ def process_image(img,
|
||||||
max_text_length=None,
|
max_text_length=None,
|
||||||
tps=None,
|
tps=None,
|
||||||
infer_mode=False):
|
infer_mode=False):
|
||||||
if not infer_mode or char_ops.character_type == "en":
|
if not infer_mode or char_ops.character_type == "en" or tps != None:
|
||||||
norm_img = resize_norm_img(img, image_shape)
|
norm_img = resize_norm_img(img, image_shape)
|
||||||
else:
|
else:
|
||||||
if tps != None and char_ops.character_type == "ch":
|
norm_img = resize_norm_img_chinese(img, image_shape)
|
||||||
image_shape = [3, 32, 320]
|
|
||||||
norm_img = resize_norm_img(img, image_shape)
|
|
||||||
else:
|
|
||||||
norm_img = resize_norm_img_chinese(img, image_shape)
|
|
||||||
norm_img = norm_img[np.newaxis, :]
|
norm_img = norm_img[np.newaxis, :]
|
||||||
if label is not None:
|
if label is not None:
|
||||||
char_num = char_ops.get_char_num()
|
char_num = char_ops.get_char_num()
|
||||||
|
|
|
@ -38,8 +38,10 @@ class TextRecognizer(object):
|
||||||
char_ops_params["character_dict_path"] = args.rec_char_dict_path
|
char_ops_params["character_dict_path"] = args.rec_char_dict_path
|
||||||
if self.rec_algorithm != "RARE":
|
if self.rec_algorithm != "RARE":
|
||||||
char_ops_params['loss_type'] = 'ctc'
|
char_ops_params['loss_type'] = 'ctc'
|
||||||
|
self.loss_type = 'ctc'
|
||||||
else:
|
else:
|
||||||
char_ops_params['loss_type'] = 'attention'
|
char_ops_params['loss_type'] = 'attention'
|
||||||
|
self.loss_type = 'attention'
|
||||||
self.char_ops = CharacterOps(char_ops_params)
|
self.char_ops = CharacterOps(char_ops_params)
|
||||||
|
|
||||||
def resize_norm_img(self, img, max_wh_ratio):
|
def resize_norm_img(self, img, max_wh_ratio):
|
||||||
|
@ -85,7 +87,7 @@ class TextRecognizer(object):
|
||||||
self.input_tensor.copy_from_cpu(norm_img_batch)
|
self.input_tensor.copy_from_cpu(norm_img_batch)
|
||||||
self.predictor.zero_copy_run()
|
self.predictor.zero_copy_run()
|
||||||
|
|
||||||
if self.rec_algorithm != "RARE":
|
if self.loss_type == "ctc":
|
||||||
rec_idx_batch = self.output_tensors[0].copy_to_cpu()
|
rec_idx_batch = self.output_tensors[0].copy_to_cpu()
|
||||||
rec_idx_lod = self.output_tensors[0].lod()[0]
|
rec_idx_lod = self.output_tensors[0].lod()[0]
|
||||||
predict_batch = self.output_tensors[1].copy_to_cpu()
|
predict_batch = self.output_tensors[1].copy_to_cpu()
|
||||||
|
@ -139,9 +141,13 @@ if __name__ == "__main__":
|
||||||
img_list.append(img)
|
img_list.append(img)
|
||||||
try:
|
try:
|
||||||
rec_res, predict_time = text_recognizer(img_list)
|
rec_res, predict_time = text_recognizer(img_list)
|
||||||
except:
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
logger.info(
|
logger.info(
|
||||||
"ERROR!! \nInput image shape is not equal with config. TPS does not support variable shape.\n"
|
"ERROR!!!! \n"
|
||||||
|
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n"
|
||||||
|
"If your model has tps module: "
|
||||||
|
"TPS does not support variable shape.\n"
|
||||||
"Please set --rec_image_shape=input_shape and --rec_char_type='en' ")
|
"Please set --rec_image_shape=input_shape and --rec_char_type='en' ")
|
||||||
exit()
|
exit()
|
||||||
for ino in range(len(img_list)):
|
for ino in range(len(img_list)):
|
||||||
|
|
Loading…
Reference in New Issue