revert dict and add space in character
This commit is contained in:
parent
ed3ed30ecc
commit
db3d78d8ea
12
README.md
12
README.md
|
@ -5,6 +5,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
|
|||
|
||||
**近期更新**
|
||||
- 2020.7.9 添加支持空格的识别模型,[识别效果](#支持空格的中文OCR效果展示)
|
||||
- 2020.7.9 添加数据增强、学习率衰减策略,具体参考[配置文件](./doc/doc_ch/config.md)
|
||||
- 2020.6.8 添加[数据集](./doc/doc_ch/datasets.md),并保持持续更新
|
||||
- 2020.6.5 支持 `attetnion` 模型导出 `inference_model`
|
||||
- 2020.6.5 支持单独预测识别时,输出结果得分
|
||||
|
@ -51,6 +52,8 @@ mkdir inference && cd inference
|
|||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
|
||||
# 下载超轻量级中文OCR模型的识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
|
||||
# 下载支持空格的超轻量级中文OCR模型的识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar && tar xf ch_rec_mv3_crnn_enhance_infer.tar
|
||||
cd ..
|
||||
```
|
||||
#### (2)通用中文OCR模型下载
|
||||
|
@ -60,6 +63,8 @@ mkdir inference && cd inference
|
|||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar && tar xf ch_det_r50_vd_db_infer.tar
|
||||
# 下载通用中文OCR模型的识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar && tar xf ch_rec_r34_vd_crnn_infer.tar
|
||||
# 下载支持空格的通用中文OCR模型的识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar && tar xf ch_rec_r34_vd_crnn_enhance_infer.tar
|
||||
cd ..
|
||||
```
|
||||
|
||||
|
@ -85,6 +90,13 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_mode
|
|||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/" --rec_model_dir="./inference/ch_rec_r34_vd_crnn/"
|
||||
```
|
||||
|
||||
带空格的通用中文OCR模型的体验可以按照上述步骤下载相应的模型,并且更新相关的参数,示例如下:
|
||||
|
||||
```
|
||||
# 预测image_dir指定的单张图像
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_12.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/" --rec_model_dir="./inference/ch_rec_r34_vd_crnn_enhance/"
|
||||
```
|
||||
|
||||
更多的文本检测、识别串联推理使用方式请参考文档教程中[基于预测引擎推理](./doc/doc_ch/inference.md)。
|
||||
|
||||
## 文档教程
|
||||
|
|
13
README_en.md
13
README_en.md
|
@ -5,6 +5,7 @@ PaddleOCR aims to create a rich, leading, and practical OCR tools that help user
|
|||
|
||||
**Recent updates**、
|
||||
- 2020.7.9 Add recognition model to support space, [recognition result](#space Chinese OCR results)
|
||||
- 2020.7.9 Add data auguments and learning rate decay strategies,please read [config](./doc/doc_en/config_en.md)
|
||||
- 2020.6.8 Add [dataset](./doc/doc_en/datasets_en.md) and keep updating
|
||||
- 2020.6.5 Support exporting `attention` model to `inference_model`
|
||||
- 2020.6.5 Support separate prediction and recognition, output result score
|
||||
|
@ -52,6 +53,9 @@ mkdir inference && cd inference
|
|||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
|
||||
# Download the recognition part of the lightweight Chinese OCR and decompress it
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
|
||||
# Download the space-recognized part of the lightweight Chinese OCR and decompress it
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar && tar xf ch_rec_mv3_crnn_enhance_infer.tar
|
||||
|
||||
cd ..
|
||||
```
|
||||
#### (2) Download General Chinese OCR models
|
||||
|
@ -61,6 +65,8 @@ mkdir inference && cd inference
|
|||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar && tar xf ch_det_r50_vd_db_infer.tar
|
||||
# Download the recognition part of the generic Chinese OCR model and decompress it
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar && tar xf ch_rec_r34_vd_crnn_infer.tar
|
||||
# Download the space-recognition part of the generic Chinese OCR model and decompress it
|
||||
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar && tar xf ch_rec_r34_vd_crnn_enhance_infer.tar
|
||||
cd ..
|
||||
```
|
||||
|
||||
|
@ -86,6 +92,13 @@ To run inference of the Generic Chinese OCR model, follow these steps above to d
|
|||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/" --rec_model_dir="./inference/ch_rec_r34_vd_crnn/"
|
||||
```
|
||||
|
||||
To run inference of the space-Generic Chinese OCR model, follow these steps above to download the corresponding models and update the relevant parameters. Examples are as follows:
|
||||
|
||||
```
|
||||
# Prediction on a single image by specifying image path to image_dir
|
||||
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_12.jpg" --det_model_dir="./inference/ch_det_r50_vd_db/" --rec_model_dir="./inference/ch_rec_r34_vd_crnn_enhance/"
|
||||
```
|
||||
|
||||
For more text detection and recognition models, please refer to the document [Inference](./doc/doc_en/inference_en.md)
|
||||
|
||||
## DOCUMENTATION
|
||||
|
|
|
@ -14,6 +14,8 @@ Global:
|
|||
character_type: ch
|
||||
character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
|
||||
loss_type: ctc
|
||||
distort: false
|
||||
add_sapce: false
|
||||
reader_yml: ./configs/rec/rec_chinese_reader.yml
|
||||
pretrain_weights:
|
||||
checkpoints:
|
||||
|
|
|
@ -15,6 +15,7 @@ Global:
|
|||
character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
|
||||
loss_type: ctc
|
||||
distort: false
|
||||
add_space: false
|
||||
reader_yml: ./configs/rec/rec_chinese_reader.yml
|
||||
pretrain_weights:
|
||||
checkpoints:
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
| character_type | 设置字符类型 | ch | en/ch, en时将使用默认dict,ch时使用自定义dict|
|
||||
| character_dict_path | 设置字典路径 | ./ppocr/utils/ic15_dict.txt | \ |
|
||||
| loss_type | 设置 loss 类型 | ctc | 支持两种loss: ctc / attention |
|
||||
| distort | 设置是否使用数据增强 | false | 设置为true时,将在训练时随机进行扰动,支持的扰动操作可阅读[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py) |
|
||||
| add_space | 设置是否识别空格 | false | 仅在"ch"模式下支持空格 |
|
||||
| reader_yml | 设置reader配置文件 | ./configs/rec/rec_icdar15_reader.yml | \ |
|
||||
| pretrain_weights | 加载预训练模型路径 | ./pretrain_models/CRNN/best_accuracy | \ |
|
||||
| checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 |
|
||||
|
|
|
@ -94,7 +94,7 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,
|
|||
`ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典,
|
||||
您可以按需使用。
|
||||
|
||||
如需自定义dic文件,请修改 `configs/rec/rec_icdar15_train.yml` 中的 `character_dict_path` 字段, 并将 `character_type` 设置为 `ch`。
|
||||
如需自定义dic文件,请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 并将 `character_type` 设置为 `ch`。
|
||||
|
||||
### 启动训练
|
||||
|
||||
|
@ -157,12 +157,26 @@ Global:
|
|||
character_type: ch
|
||||
# 添加自定义字典,如修改字典请将路径指向新字典
|
||||
character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
|
||||
# 训练时添加数据增强
|
||||
distort: true
|
||||
# 识别空格
|
||||
add_space: true
|
||||
...
|
||||
# 修改reader类型
|
||||
reader_yml: ./configs/rec/rec_chinese_reader.yml
|
||||
...
|
||||
|
||||
...
|
||||
|
||||
Optimizer:
|
||||
...
|
||||
# 添加学习率衰减策略
|
||||
decay:
|
||||
function: cosine_decay
|
||||
# 每个 epoch 包含 iter 数
|
||||
step_each_epoch: 20
|
||||
# 总共训练epoch数
|
||||
total_epoch: 1000
|
||||
```
|
||||
**注意,预测/评估时的配置文件请务必与训练一致。**
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ Take `rec_chinese_lite_train.yml` as an example
|
|||
| character_type | Set character type | ch | en/ch, the default dict will be used for en, and the custom dict will be used for ch|
|
||||
| character_dict_path | Set dictionary path | ./ppocr/utils/ic15_dict.txt | \ |
|
||||
| loss_type | Set loss type | ctc | Supports two types of loss: ctc / attention |
|
||||
| distort | Set use distort | false | Support distort type ,read [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py) |
|
||||
| add_space | Wether to recognize space | false | Only support in "ch" mode |
|
||||
| reader_yml | Set the reader configuration file | ./configs/rec/rec_icdar15_reader.yml | \ |
|
||||
| pretrain_weights | Load pre-trained model path | ./pretrain_models/CRNN/best_accuracy | \ |
|
||||
| checkpoints | Load saved model path | None | Used to load saved parameters to continue training after interruption |
|
||||
|
|
|
@ -158,9 +158,23 @@ Global:
|
|||
...
|
||||
# Modify reader type
|
||||
reader_yml: ./configs/rec/rec_chinese_reader.yml
|
||||
# Whether to use data augmentation
|
||||
distort: true
|
||||
# Whether to recognize spaces
|
||||
add_space: true
|
||||
...
|
||||
|
||||
...
|
||||
|
||||
Optimizer:
|
||||
...
|
||||
# Add learning rate decay strategy
|
||||
decay:
|
||||
function: cosine_decay
|
||||
# Each epoch contains iter number
|
||||
step_each_epoch: 20
|
||||
# Total epoch number
|
||||
total_epoch: 1000
|
||||
```
|
||||
**Note that the configuration file for prediction/evaluation must be consistent with the training.**
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 722 KiB |
|
@ -58,6 +58,7 @@ class LMDBReader(object):
|
|||
else:
|
||||
self.batch_size = params['test_batch_size_per_card']
|
||||
self.drop_last = False
|
||||
self.use_distort = False
|
||||
self.infer_img = params['infer_img']
|
||||
|
||||
def load_hierarchical_lmdb_dataset(self):
|
||||
|
@ -206,6 +207,7 @@ class SimpleReader(object):
|
|||
else:
|
||||
self.batch_size = params['test_batch_size_per_card']
|
||||
self.drop_last = False
|
||||
self.use_distort = False
|
||||
|
||||
def __call__(self, process_id):
|
||||
if self.mode != 'train':
|
||||
|
|
|
@ -136,6 +136,9 @@ def jitter(img):
|
|||
|
||||
|
||||
def add_gasuss_noise(image, mean=0, var=0.1):
|
||||
"""
|
||||
Gasuss noise
|
||||
"""
|
||||
|
||||
noise = np.random.normal(mean, var**0.5, image.shape)
|
||||
out = image + 0.5 * noise
|
||||
|
@ -152,9 +155,8 @@ def get_crop(image):
|
|||
top_min = 1
|
||||
top_max = 8
|
||||
top_crop = int(random.randint(top_min, top_max))
|
||||
|
||||
top_crop = min(top_crop, h - 1)
|
||||
crop_img = image.copy()
|
||||
|
||||
ratio = random.randint(0, 1)
|
||||
if ratio:
|
||||
crop_img = crop_img[top_crop:h, :, :]
|
||||
|
@ -249,13 +251,13 @@ def get_warpR(config):
|
|||
dst2 = r.dot(p2)
|
||||
dst3 = r.dot(p3)
|
||||
dst4 = r.dot(p4)
|
||||
list_dst = [dst1, dst2, dst3, dst4]
|
||||
list_dst = np.array([dst1, dst2, dst3, dst4])
|
||||
org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32)
|
||||
dst = np.zeros((4, 2), np.float32)
|
||||
# Project onto the image plane
|
||||
for i in range(4):
|
||||
dst[i, 0] = list_dst[i][0] * z / (z - list_dst[i][2]) + pcenter[0]
|
||||
dst[i, 1] = list_dst[i][1] * z / (z - list_dst[i][2]) + pcenter[1]
|
||||
dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0]
|
||||
dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1]
|
||||
|
||||
warpR = cv2.getPerspectiveTransform(org, dst)
|
||||
|
||||
dst1, dst2, dst3, dst4 = dst
|
||||
|
|
|
@ -30,12 +30,17 @@ class CharacterOps(object):
|
|||
dict_character = list(self.character_str)
|
||||
elif self.character_type == "ch":
|
||||
character_dict_path = config['character_dict_path']
|
||||
add_space = False
|
||||
if 'add_space' in config:
|
||||
add_space = config['add_space']
|
||||
self.character_str = ""
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||
self.character_str += line
|
||||
if add_space:
|
||||
self.character_str += " "
|
||||
dict_character = list(self.character_str)
|
||||
elif self.character_type == "en_sensitive":
|
||||
# same with ASTER setting (use 94 char).
|
||||
|
|
|
@ -6621,4 +6621,3 @@ j
|
|||
續
|
||||
紹
|
||||
懮
|
||||
|
||||
|
|
|
@ -39,7 +39,8 @@ class TextRecognizer(object):
|
|||
self.rec_algorithm = args.rec_algorithm
|
||||
char_ops_params = {
|
||||
"character_type": args.rec_char_type,
|
||||
"character_dict_path": args.rec_char_dict_path
|
||||
"character_dict_path": args.rec_char_dict_path,
|
||||
"add_space": args.rec_add_space
|
||||
}
|
||||
if self.rec_algorithm != "RARE":
|
||||
char_ops_params['loss_type'] = 'ctc'
|
||||
|
|
|
@ -63,6 +63,7 @@ def parse_args():
|
|||
"--rec_char_dict_path",
|
||||
type=str,
|
||||
default="./ppocr/utils/ppocr_keys_v1.txt")
|
||||
parser.add_argument("--rec_add_space", type=bool, default=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue