From b55b8eda220ae9cb83414fbdbc4819694df65fc6 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Thu, 4 Jun 2020 19:41:42 +0800 Subject: [PATCH] add windows doc --- README.md | 5 +++++ configs/rec/rec_chinese_lite_train.yml | 1 - configs/rec/rec_icdar15_train.yml | 3 +-- configs/rec/rec_mv3_none_bilstm_ctc.yml | 1 - configs/rec/rec_mv3_none_none_ctc.yml | 1 - configs/rec/rec_mv3_tps_bilstm_attn.yml | 1 - configs/rec/rec_mv3_tps_bilstm_ctc.yml | 1 - configs/rec/rec_r34_vd_none_bilstm_ctc.yml | 1 - configs/rec/rec_r34_vd_none_none_ctc.yml | 1 - configs/rec/rec_r34_vd_tps_bilstm_attn.yml | 1 - configs/rec/rec_r34_vd_tps_bilstm_ctc.yml | 1 - doc/inference.md | 4 ++++ doc/installation.md | 6 ++++++ doc/recognition.md | 7 +++++-- ppocr/data/rec/dataset_traversal.py | 19 ++++++++++--------- ppocr/data/rec/img_tools.py | 8 ++------ tools/infer/predict_rec.py | 12 +++++++++--- 17 files changed, 42 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 9b94466b..56e3f993 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 #### 2.inference模型下载 +*windows 环境下没有如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下* + #### (1)超轻量级中文OCR模型下载 ``` mkdir inference && cd inference @@ -63,6 +65,9 @@ cd .. # 设置PYTHONPATH环境变量 export PYTHONPATH=. +# windows下设置环境变量 +SET PYTHONPATH=. + # 预测image_dir指定的单张图像 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/" --rec_model_dir="./inference/ch_rec_mv3_crnn/" diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml index 4517f3e2..b64313a1 100755 --- a/configs/rec/rec_chinese_lite_train.yml +++ b/configs/rec/rec_chinese_lite_train.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 320] max_text_length: 25 character_type: ch diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml index b783cc2f..934a9410 100755 --- a/configs/rec/rec_icdar15_train.yml +++ b/configs/rec/rec_icdar15_train.yml @@ -9,13 +9,12 @@ Global: eval_batch_step: 500 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en loss_type: ctc reader_yml: ./configs/rec/rec_icdar15_reader.yml - pretrain_weights: + pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy checkpoints: save_inference_dir: infer_img: diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml index 35b5206c..d2e096fb 100755 --- a/configs/rec/rec_mv3_none_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml index 72191349..ceec09ce 100755 --- a/configs/rec/rec_mv3_none_none_ctc.yml +++ b/configs/rec/rec_mv3_none_none_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_tps_bilstm_attn.yml b/configs/rec/rec_mv3_tps_bilstm_attn.yml index c4f64f0a..7fc4f679 100755 --- a/configs/rec/rec_mv3_tps_bilstm_attn.yml +++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml index 8236dd8a..4b9660bc 100755 --- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml index c234cbb7..b71e8fea 100755 --- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml index 457c79ad..d9c9458d 100755 --- a/configs/rec/rec_r34_vd_none_none_ctc.yml +++ b/configs/rec/rec_r34_vd_none_none_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml index 2f351ae4..405082bd 100755 --- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml index 93021709..517322c3 100755 --- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/doc/inference.md b/doc/inference.md index b16b89a9..0d5f45fd 100644 --- a/doc/inference.md +++ b/doc/inference.md @@ -166,6 +166,10 @@ STAR-Net文本识别模型推理,可以执行如下命令: python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" ``` +### 3.基于Attention损失的识别模型推理 + +基于Attention损失的识别模型与ctc不同,需要额外设置识别算法参数 --rec_algorithm="RARE" + RARE 文本识别模型推理,可以执行如下命令: ``` python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE" diff --git a/doc/installation.md b/doc/installation.md index 9bdd5379..f1edbf4a 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -8,6 +8,8 @@ PaddleOCR 工作环境 建议使用我们提供的docker运行PaddleOCR,有关docker使用请参考[链接](https://docs.docker.com/get-started/)。 +*如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。* + 1. (建议)准备docker环境。第一次使用这个镜像,会自动下载该镜像,请耐心等待。 ``` # 切换到工作目录下 @@ -54,6 +56,10 @@ python3 -m pip install paddlepaddle-gpu==1.7.2.post97 -i https://pypi.tuna.tsing 如果您的机器安装的是CUDA10,请运行以下命令安装 python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple +如果您的机器是CPU,请运行以下命令安装 + +python3 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple + 更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。 ``` diff --git a/doc/recognition.md b/doc/recognition.md index ea38c0f3..7dd9ca7e 100644 --- a/doc/recognition.md +++ b/doc/recognition.md @@ -41,6 +41,8 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt # 测试集标签 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt + + ``` 最终训练集应有如下文件结构: @@ -168,10 +170,11 @@ Global: 评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml` 修改EvalReader中的 `label_file_path` 设置。 +*注意* 评估时必须确保配置文件中 infer_img 字段为空 ``` export CUDA_VISIBLE_DEVICES=0 # GPU 评估, Global.checkpoints 为待测权重 -python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` ### 预测 @@ -184,7 +187,7 @@ python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkp ``` # 预测英文结果 -python3 tools/infer_rec.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png ``` 预测图片: diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 6c2b7a1a..71b488f4 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -42,14 +42,15 @@ class LMDBReader(object): self.max_text_length = params['max_text_length'] self.mode = params['mode'] self.drop_last = False - self.tps = False + self.use_tps = False if "tps" in params: - self.tps = True + self.ues_tps = True if params['mode'] == 'train': self.batch_size = params['train_batch_size_per_card'] - self.drop_last = params['drop_last'] + self.drop_last = True else: self.batch_size = params['test_batch_size_per_card'] + self.drop_last = False self.infer_img = params['infer_img'] def load_hierarchical_lmdb_dataset(self): @@ -114,7 +115,7 @@ class LMDBReader(object): img=img, image_shape=self.image_shape, char_ops=self.char_ops, - tps=self.tps, + tps=self.use_tps, infer_mode=True) yield norm_img else: @@ -181,15 +182,15 @@ class SimpleReader(object): self.max_text_length = params['max_text_length'] self.mode = params['mode'] self.infer_img = params['infer_img'] - self.tps = False + self.use_tps = False if "tps" in params: - self.tps = True - self.drop_last = False + self.ues_tps = True if params['mode'] == 'train': self.batch_size = params['train_batch_size_per_card'] - self.drop_last = params['drop_last'] + self.drop_last = True else: self.batch_size = params['test_batch_size_per_card'] + self.drop_last = False def __call__(self, process_id): if self.mode != 'train': @@ -206,7 +207,7 @@ class SimpleReader(object): img=img, image_shape=self.image_shape, char_ops=self.char_ops, - tps=self.tps, + tps=self.use_tps, infer_mode=True) yield norm_img else: diff --git a/ppocr/data/rec/img_tools.py b/ppocr/data/rec/img_tools.py index 6d7b66e9..57543293 100755 --- a/ppocr/data/rec/img_tools.py +++ b/ppocr/data/rec/img_tools.py @@ -95,14 +95,10 @@ def process_image(img, max_text_length=None, tps=None, infer_mode=False): - if not infer_mode or char_ops.character_type == "en": + if not infer_mode or char_ops.character_type == "en" or tps != None: norm_img = resize_norm_img(img, image_shape) else: - if tps != None and char_ops.character_type == "ch": - image_shape = [3, 32, 320] - norm_img = resize_norm_img(img, image_shape) - else: - norm_img = resize_norm_img_chinese(img, image_shape) + norm_img = resize_norm_img_chinese(img, image_shape) norm_img = norm_img[np.newaxis, :] if label is not None: char_num = char_ops.get_char_num() diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index e8b485fb..3417d500 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -38,8 +38,10 @@ class TextRecognizer(object): char_ops_params["character_dict_path"] = args.rec_char_dict_path if self.rec_algorithm != "RARE": char_ops_params['loss_type'] = 'ctc' + self.loss_type = 'ctc' else: char_ops_params['loss_type'] = 'attention' + self.loss_type = 'attention' self.char_ops = CharacterOps(char_ops_params) def resize_norm_img(self, img, max_wh_ratio): @@ -85,7 +87,7 @@ class TextRecognizer(object): self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.zero_copy_run() - if self.rec_algorithm != "RARE": + if self.loss_type == "ctc": rec_idx_batch = self.output_tensors[0].copy_to_cpu() rec_idx_lod = self.output_tensors[0].lod()[0] predict_batch = self.output_tensors[1].copy_to_cpu() @@ -139,9 +141,13 @@ if __name__ == "__main__": img_list.append(img) try: rec_res, predict_time = text_recognizer(img_list) - except: + except Exception as e: + print(e) logger.info( - "ERROR!! \nInput image shape is not equal with config. TPS does not support variable shape.\n" + "ERROR!!!! \n" + "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" + "If your model has tps module: " + "TPS does not support variable shape.\n" "Please set --rec_image_shape=input_shape and --rec_char_type='en' ") exit() for ino in range(len(img_list)):