add doc and font for languages
This commit is contained in:
parent
289bf76067
commit
98c01dabce
|
@ -24,6 +24,7 @@ inference 模型(`fluid.io.save_inference_model`保存的模型)
|
||||||
- [2. 基于CTC损失的识别模型推理](#基于CTC损失的识别模型推理)
|
- [2. 基于CTC损失的识别模型推理](#基于CTC损失的识别模型推理)
|
||||||
- [3. 基于Attention损失的识别模型推理](#基于Attention损失的识别模型推理)
|
- [3. 基于Attention损失的识别模型推理](#基于Attention损失的识别模型推理)
|
||||||
- [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
|
- [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
|
||||||
|
- [5. 多语言模型的推理](#多语言模型的推理)
|
||||||
|
|
||||||
- [四、方向分类模型推理](#方向识别模型推理)
|
- [四、方向分类模型推理](#方向识别模型推理)
|
||||||
- [1. 方向分类模型推理](#方向分类模型推理)
|
- [1. 方向分类模型推理](#方向分类模型推理)
|
||||||
|
@ -305,6 +306,22 @@ dict_character = list(self.character_str)
|
||||||
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path"
|
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
<a name="多语言模型的推理"></a>
|
||||||
|
### 5. 多语言模型的推理
|
||||||
|
如果您需要预测的是其他语言模型,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径, 同时为了得到正确的可视化结果,
|
||||||
|
需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/` 路径下有默认提供的小语种字体,例如韩文识别:
|
||||||
|
|
||||||
|
```
|
||||||
|
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/korean_dict.txt" --vis_font_path="doc/korean.ttf"
|
||||||
|
```
|
||||||
|
![](../imgs_words/korean/1.jpg)
|
||||||
|
|
||||||
|
执行命令后,上图的预测结果为:
|
||||||
|
``` text
|
||||||
|
2020-09-19 16:15:05,076-INFO: index: [205 206 38 39]
|
||||||
|
2020-09-19 16:15:05,077-INFO: word : 바탕으로
|
||||||
|
2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
|
||||||
|
```
|
||||||
|
|
||||||
<a name="方向分类模型推理"></a>
|
<a name="方向分类模型推理"></a>
|
||||||
## 四、方向分类模型推理
|
## 四、方向分类模型推理
|
||||||
|
|
|
@ -201,7 +201,19 @@ Optimizer:
|
||||||
```
|
```
|
||||||
**注意,预测/评估时的配置文件请务必与训练一致。**
|
**注意,预测/评估时的配置文件请务必与训练一致。**
|
||||||
|
|
||||||
|
- 小语种
|
||||||
|
|
||||||
|
PaddleOCR也提供了多语言的, `configs/rec/multi_languages` 路径下的提供了多语言的配置文件,目前PaddleOCR支持的多语言算法有:
|
||||||
|
|
||||||
|
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
|
||||||
|
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
|
||||||
|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 |
|
||||||
|
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
|
||||||
|
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
|
||||||
|
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
|
||||||
|
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
|
||||||
|
|
||||||
|
多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体和测试数据可以在[百度网盘]()上下载。
|
||||||
|
|
||||||
### 评估
|
### 评估
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 9.5 KiB |
Binary file not shown.
After Width: | Height: | Size: 13 KiB |
Binary file not shown.
After Width: | Height: | Size: 8.2 KiB |
Binary file not shown.
After Width: | Height: | Size: 8.7 KiB |
Binary file not shown.
After Width: | Height: | Size: 3.6 KiB |
Binary file not shown.
After Width: | Height: | Size: 4.4 KiB |
Binary file not shown.
Binary file not shown.
|
@ -133,6 +133,7 @@ def main(args):
|
||||||
image_file_list = get_image_file_list(args.image_dir)
|
image_file_list = get_image_file_list(args.image_dir)
|
||||||
text_sys = TextSystem(args)
|
text_sys = TextSystem(args)
|
||||||
is_visualize = True
|
is_visualize = True
|
||||||
|
font_path = args.vis_font_path
|
||||||
for image_file in image_file_list:
|
for image_file in image_file_list:
|
||||||
img, flag = check_and_read_gif(image_file)
|
img, flag = check_and_read_gif(image_file)
|
||||||
if not flag:
|
if not flag:
|
||||||
|
@ -160,7 +161,7 @@ def main(args):
|
||||||
scores = [rec_res[i][1] for i in range(len(rec_res))]
|
scores = [rec_res[i][1] for i in range(len(rec_res))]
|
||||||
|
|
||||||
draw_img = draw_ocr(
|
draw_img = draw_ocr(
|
||||||
image, boxes, txts, scores, drop_score=drop_score)
|
image, boxes, txts, scores, drop_score=drop_score, font_path=font_path)
|
||||||
draw_img_save = "./inference_results/"
|
draw_img_save = "./inference_results/"
|
||||||
if not os.path.exists(draw_img_save):
|
if not os.path.exists(draw_img_save):
|
||||||
os.makedirs(draw_img_save)
|
os.makedirs(draw_img_save)
|
||||||
|
|
|
@ -71,6 +71,10 @@ def parse_args():
|
||||||
type=str,
|
type=str,
|
||||||
default="./ppocr/utils/ppocr_keys_v1.txt")
|
default="./ppocr/utils/ppocr_keys_v1.txt")
|
||||||
parser.add_argument("--use_space_char", type=str2bool, default=True)
|
parser.add_argument("--use_space_char", type=str2bool, default=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--vis_font_path",
|
||||||
|
type=str,
|
||||||
|
default="./doc/simfang.ttf")
|
||||||
|
|
||||||
# params for text classifier
|
# params for text classifier
|
||||||
parser.add_argument("--use_angle_cls", type=str2bool, default=False)
|
parser.add_argument("--use_angle_cls", type=str2bool, default=False)
|
||||||
|
@ -198,7 +202,7 @@ def draw_ocr(image,
|
||||||
return image
|
return image
|
||||||
|
|
||||||
|
|
||||||
def draw_ocr_box_txt(image, boxes, txts):
|
def draw_ocr_box_txt(image, boxes, txts, font_path="./doc/simfang.ttf"):
|
||||||
h, w = image.height, image.width
|
h, w = image.height, image.width
|
||||||
img_left = image.copy()
|
img_left = image.copy()
|
||||||
img_right = Image.new('RGB', (w, h), (255, 255, 255))
|
img_right = Image.new('RGB', (w, h), (255, 255, 255))
|
||||||
|
@ -225,7 +229,7 @@ def draw_ocr_box_txt(image, boxes, txts):
|
||||||
if box_height > 2 * box_width:
|
if box_height > 2 * box_width:
|
||||||
font_size = max(int(box_width * 0.9), 10)
|
font_size = max(int(box_width * 0.9), 10)
|
||||||
font = ImageFont.truetype(
|
font = ImageFont.truetype(
|
||||||
"./doc/simfang.ttf", font_size, encoding="utf-8")
|
font_path, font_size, encoding="utf-8")
|
||||||
cur_y = box[0][1]
|
cur_y = box[0][1]
|
||||||
for c in txt:
|
for c in txt:
|
||||||
char_size = font.getsize(c)
|
char_size = font.getsize(c)
|
||||||
|
@ -235,7 +239,7 @@ def draw_ocr_box_txt(image, boxes, txts):
|
||||||
else:
|
else:
|
||||||
font_size = max(int(box_height * 0.8), 10)
|
font_size = max(int(box_height * 0.8), 10)
|
||||||
font = ImageFont.truetype(
|
font = ImageFont.truetype(
|
||||||
"./doc/simfang.ttf", font_size, encoding="utf-8")
|
font_path, font_size, encoding="utf-8")
|
||||||
draw_right.text(
|
draw_right.text(
|
||||||
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
|
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
|
||||||
img_left = Image.blend(image, img_left, 0.5)
|
img_left = Image.blend(image, img_left, 0.5)
|
||||||
|
|
Loading…
Reference in New Issue