|
@ -24,4 +24,8 @@ output/
|
|||
|
||||
build/
|
||||
dist/
|
||||
paddleocr.egg-info/
|
||||
paddleocr.egg-info/
|
||||
/deploy/android_demo/app/OpenCV/
|
||||
/deploy/android_demo/app/PaddleLite/
|
||||
/deploy/android_demo/app/.cxx/
|
||||
/deploy/android_demo/app/cache/
|
||||
|
|
|
@ -4,4 +4,5 @@ include README.md
|
|||
recursive-include ppocr/utils *.txt utility.py logging.py
|
||||
recursive-include ppocr/data/ *.py
|
||||
recursive-include ppocr/postprocess *.py
|
||||
recursive-include tools/infer *.py
|
||||
recursive-include tools/infer *.py
|
||||
recursive-include ppocr/utils/e2e_utils/ *.py
|
|
@ -147,6 +147,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
self.itemsToShapesbox = {}
|
||||
self.shapesToItemsbox = {}
|
||||
self.prevLabelText = getStr('tempLabel')
|
||||
self.noLabelText = getStr('nullLabel')
|
||||
self.model = 'paddle'
|
||||
self.PPreader = None
|
||||
self.autoSaveNum = 5
|
||||
|
@ -1020,7 +1021,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
|
||||
self.updateComboBox()
|
||||
|
||||
def updateComboBox(self): # TODO:貌似没用
|
||||
def updateComboBox(self):
|
||||
# Get the unique labels and add them to the Combobox.
|
||||
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
|
||||
|
||||
|
@ -1040,7 +1041,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
return dict(label=s.label, # str
|
||||
line_color=s.line_color.getRgb(),
|
||||
fill_color=s.fill_color.getRgb(),
|
||||
points=[(p.x(), p.y()) for p in s.points], # QPonitF
|
||||
points=[(int(p.x()), int(p.y())) for p in s.points], # QPonitF
|
||||
# add chris
|
||||
difficult=s.difficult) # bool
|
||||
|
||||
|
@ -1069,7 +1070,7 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
# print('Image:{0} -> Annotation:{1}'.format(self.filePath, annotationFilePath))
|
||||
return True
|
||||
except:
|
||||
self.errorMessage(u'Error saving label data')
|
||||
self.errorMessage(u'Error saving label data', u'Error saving label data')
|
||||
return False
|
||||
|
||||
def copySelectedShape(self):
|
||||
|
@ -1802,10 +1803,14 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
result.insert(0, box)
|
||||
print('result in reRec is ', result)
|
||||
self.result_dic.append(result)
|
||||
if result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
rec_flag += 1
|
||||
else:
|
||||
print('Can not recognise the box')
|
||||
self.result_dic.append([box,(self.noLabelText,0)])
|
||||
|
||||
if self.noLabelText == shape.label or result[1][0] == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
rec_flag += 1
|
||||
|
||||
if len(self.result_dic) > 0 and rec_flag > 0:
|
||||
self.saveFile(mode='Auto')
|
||||
|
@ -1836,9 +1841,14 @@ class MainWindow(QMainWindow, WindowMixin):
|
|||
print('label no change')
|
||||
else:
|
||||
shape.label = result[1][0]
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
print(box)
|
||||
else:
|
||||
print('Can not recognise the box')
|
||||
if self.noLabelText == shape.label:
|
||||
print('label no change')
|
||||
else:
|
||||
shape.label = self.noLabelText
|
||||
self.singleLabel(shape)
|
||||
self.setDirty()
|
||||
|
||||
def autolcm(self):
|
||||
vbox = QVBoxLayout()
|
||||
|
|
|
@ -29,9 +29,7 @@ PaddleOCR models has been built in PPOCRLabel, please refer to [PaddleOCR instal
|
|||
|
||||
### 2. Install PPOCRLabel
|
||||
|
||||
#### Windows + Anaconda
|
||||
|
||||
Download and install [Anaconda](https://www.anaconda.com/download/#download) (Python 3+)
|
||||
#### Windows
|
||||
|
||||
```
|
||||
pip install pyqt5
|
||||
|
|
|
@ -31,7 +31,7 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
|
|||
PPOCRLabel内置PaddleOCR模型,故请参考[PaddleOCR安装文档](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/installation.md)准备好PaddleOCR,并确保PaddleOCR安装成功。
|
||||
|
||||
### 2. 安装PPOCRLabel
|
||||
#### Windows + Anaconda
|
||||
#### Windows
|
||||
|
||||
```
|
||||
pip install pyqt5
|
||||
|
|
|
@ -45,7 +45,7 @@ class Canvas(QWidget):
|
|||
CREATE, EDIT = list(range(2))
|
||||
_fill_drawing = False # draw shadows
|
||||
|
||||
epsilon = 11.0
|
||||
epsilon = 5.0
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Canvas, self).__init__(*args, **kwargs)
|
||||
|
|
|
@ -124,6 +124,15 @@ def natural_sort(list, key=lambda s:s):
|
|||
|
||||
|
||||
def get_rotate_crop_image(img, points):
|
||||
# Use Green's theory to judge clockwise or counterclockwise
|
||||
# author: biyanhua
|
||||
d = 0.0
|
||||
for index in range(-1, 3):
|
||||
d += -0.5 * (points[index + 1][1] + points[index][1]) * (
|
||||
points[index + 1][0] - points[index][0])
|
||||
if d < 0: # counterclockwise
|
||||
tmp = np.array(points)
|
||||
points[1], points[3] = tmp[3], tmp[1]
|
||||
|
||||
try:
|
||||
img_crop_width = int(
|
||||
|
|
|
@ -87,6 +87,7 @@ creatPolygon=四点标注
|
|||
drawSquares=正方形标注
|
||||
saveRec=保存识别结果
|
||||
tempLabel=待识别
|
||||
nullLabel=无法识别
|
||||
steps=操作步骤
|
||||
choseModelLg=选择模型语言
|
||||
cancel=取消
|
||||
|
|
|
@ -77,7 +77,7 @@ IR=Image Resize
|
|||
autoRecognition=Auto Recognition
|
||||
reRecognition=Re-recognition
|
||||
mfile=File
|
||||
medit=Eidt
|
||||
medit=Edit
|
||||
mview=View
|
||||
mhelp=Help
|
||||
iconList=Icon List
|
||||
|
@ -87,6 +87,7 @@ creatPolygon=Create Quadrilateral
|
|||
drawSquares=Draw Squares
|
||||
saveRec=Save Recognition Result
|
||||
tempLabel=TEMPORARY
|
||||
nullLabel=NULL
|
||||
steps=Steps
|
||||
choseModelLg=Choose Model Language
|
||||
cancel=Cancel
|
||||
|
|
|
@ -32,7 +32,8 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm
|
|||
|
||||
<div align="center">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/test_add_91.jpg" width="800">
|
||||
<img src="doc/imgs_results/ch_ppocr_mobile_v2.0/00018069.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_01.jpg" width="800">
|
||||
<img src="doc/imgs_results/multi_lang/img_02.jpg" width="800">
|
||||
</div>
|
||||
|
||||
The above pictures are the visualizations of the general ppocr_server model. For more effect pictures, please see [More visualizations](./doc/doc_en/visualization_en.md).
|
||||
|
@ -42,7 +43,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
|
|||
- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
|
||||
|
|
12
README_ch.md
|
@ -8,9 +8,9 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 静态图版本:develop分支
|
||||
|
||||
**近期更新**
|
||||
- 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
|
||||
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
|
||||
- 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课,1月26日、28日、29日晚上19:30,[直播地址](https://live.bilibili.com/21689802)
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,[多语言模型下载](./doc/doc_ch/models_list.md),包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
|
||||
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
|
||||
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
|
||||
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
|
||||
|
@ -46,7 +46,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
- 微信扫描二维码加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
|
||||
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
## 快速体验
|
||||
|
@ -74,11 +74,13 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
## 文档教程
|
||||
- [快速安装](./doc/doc_ch/installation.md)
|
||||
- [中文OCR模型快速使用](./doc/doc_ch/quickstart.md)
|
||||
- [多语言OCR模型快速使用](./doc/doc_ch/multi_languages.md)
|
||||
- [代码组织结构](./doc/doc_ch/tree.md)
|
||||
- 算法介绍
|
||||
- [文本检测](./doc/doc_ch/algorithm_overview.md)
|
||||
- [文本识别](./doc/doc_ch/algorithm_overview.md)
|
||||
- [PP-OCR Pipline](#PP-OCR)
|
||||
- [PP-OCR Pipeline](#PP-OCR)
|
||||
- [端到端PGNet算法](./doc/doc_ch/pgnet.md)
|
||||
- 模型训练/评估
|
||||
- [文本检测](./doc/doc_ch/detection.md)
|
||||
- [文本识别](./doc/doc_ch/recognition.md)
|
||||
|
@ -112,7 +114,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
|
|||
|
||||
|
||||
<a name="PP-OCR"></a>
|
||||
## PP-OCR Pipline
|
||||
## PP-OCR Pipeline
|
||||
<div align="center">
|
||||
<img src="./doc/ppocr_framework.png" width="800">
|
||||
</div>
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [3000, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet18_vd_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0, 2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1200
|
||||
# evaluation is run every 2000 iterations
|
||||
eval_batch_step: [0,2000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
|
||||
checkpoints:
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
checkpoints:
|
||||
|
|
|
@ -7,19 +7,15 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
infer_img:
|
||||
save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
|
||||
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: SAST
|
||||
|
|
|
@ -7,11 +7,6 @@ Global:
|
|||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
|
||||
# from static branch, load_static_weights must be set as True.
|
||||
# 2. If you want to finetune the pretrained models we provide in the docs,
|
||||
# you should set load_static_weights as False.
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
Global:
|
||||
use_gpu: True
|
||||
epoch_num: 600
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/pgnet_r50_vd_totaltext/
|
||||
save_epoch_step: 10
|
||||
# evaluation is run every 0 iterationss after the 1000th iteration
|
||||
eval_batch_step: [ 0, 1000 ]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model:
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
|
||||
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
|
||||
character_dict_path: ppocr/utils/ic15_dict.txt
|
||||
character_type: EN
|
||||
max_text_length: 50 # the max length in seq
|
||||
max_text_nums: 30 # the max seq nums in a pic
|
||||
tcl_len: 64
|
||||
|
||||
Architecture:
|
||||
model_type: e2e
|
||||
algorithm: PGNet
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 50
|
||||
Neck:
|
||||
name: PGFPN
|
||||
Head:
|
||||
name: PGHead
|
||||
|
||||
Loss:
|
||||
name: PGLoss
|
||||
tcl_bs: 64
|
||||
max_text_length: 50 # the same as Global: max_text_length
|
||||
max_text_nums: 30 # the same as Global:max_text_nums
|
||||
pad_num: 36 # the length of dict for pad
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
|
||||
PostProcess:
|
||||
name: PGPostProcess
|
||||
score_thresh: 0.5
|
||||
mode: fast # fast or slow two ways
|
||||
|
||||
Metric:
|
||||
name: E2EMetric
|
||||
mode: A # two ways for eval, A: label from txt, B: label from gt_mat
|
||||
gt_mat_dir: ./train_data/total_text/gt # the dir of gt_mat
|
||||
character_dict_path: ppocr/utils/ic15_dict.txt
|
||||
main_indicator: f_score_e2e
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: PGDataSet
|
||||
data_dir: ./train_data/total_text/train
|
||||
label_file_list: [./train_data/total_text/train/train.txt]
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- E2ELabelEncodeTrain:
|
||||
- PGProcessTrain:
|
||||
batch_size: 14 # same as loader: batch_size_per_card
|
||||
min_crop_size: 24
|
||||
min_text_size: 4
|
||||
max_text_size: 512
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
batch_size_per_card: 14
|
||||
num_workers: 16
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: PGDataSet
|
||||
data_dir: ./train_data/total_text/test
|
||||
label_file_list: [./train_data/total_text/test/test.txt]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: RGB
|
||||
channel_first: False
|
||||
- E2ELabelEncodeTest:
|
||||
- E2EResizeForTest:
|
||||
max_side_len: 768
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [ 0.485, 0.456, 0.406 ]
|
||||
std: [ 0.229, 0.224, 0.225 ]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: [ 'image', 'shape', 'polys', 'texts', 'ignore_tags', 'img_id']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: True
|
||||
save_res_path: ./output/rec/predicts_chinese_common_v2.0.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: True
|
||||
save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,21 +19,56 @@ import logging
|
|||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
support_list = {
|
||||
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic',
|
||||
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin',
|
||||
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian',
|
||||
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi',
|
||||
'ne':'nepali',
|
||||
'it': 'italian',
|
||||
'xi': 'spanish',
|
||||
'pu': 'portuguese',
|
||||
'ru': 'russian',
|
||||
'ar': 'arabic',
|
||||
'ta': 'tamil',
|
||||
'ug': 'uyghur',
|
||||
'fa': 'persian',
|
||||
'ur': 'urdu',
|
||||
'rs': 'serbian latin',
|
||||
'oc': 'occitan',
|
||||
'rsc': 'serbian cyrillic',
|
||||
'bg': 'bulgarian',
|
||||
'uk': 'ukranian',
|
||||
'be': 'belarusian',
|
||||
'te': 'telugu',
|
||||
'ka': 'kannada',
|
||||
'chinese_cht': 'chinese tradition',
|
||||
'hi': 'hindi',
|
||||
'mr': 'marathi',
|
||||
'ne': 'nepali',
|
||||
}
|
||||
assert(
|
||||
os.path.isfile("./rec_multi_language_lite_train.yml")
|
||||
),"Loss basic configuration file rec_multi_language_lite_train.yml.\
|
||||
|
||||
latin_lang = [
|
||||
'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
|
||||
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
|
||||
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
|
||||
'sw', 'tl', 'tr', 'uz', 'vi', 'latin'
|
||||
]
|
||||
arabic_lang = ['ar', 'fa', 'ug', 'ur']
|
||||
cyrillic_lang = [
|
||||
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
|
||||
'dar', 'inh', 'che', 'lbe', 'lez', 'tab', 'cyrillic'
|
||||
]
|
||||
devanagari_lang = [
|
||||
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
|
||||
'sa', 'bgc', 'devanagari'
|
||||
]
|
||||
multi_lang = latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
|
||||
|
||||
assert (os.path.isfile("./rec_multi_language_lite_train.yml")
|
||||
), "Loss basic configuration file rec_multi_language_lite_train.yml.\
|
||||
You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
|
||||
|
||||
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
|
||||
|
||||
global_config = yaml.load(
|
||||
open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
|
||||
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
|
||||
|
||||
|
||||
class ArgsParser(ArgumentParser):
|
||||
def __init__(self):
|
||||
super(ArgsParser, self).__init__(
|
||||
|
@ -41,15 +76,30 @@ class ArgsParser(ArgumentParser):
|
|||
self.add_argument(
|
||||
"-o", "--opt", nargs='+', help="set configuration options")
|
||||
self.add_argument(
|
||||
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list))
|
||||
"-l",
|
||||
"--language",
|
||||
nargs='+',
|
||||
help="set language type, support {}".format(support_list))
|
||||
self.add_argument(
|
||||
"--train",type=str,help="you can use this command to change the train dataset default path")
|
||||
"--train",
|
||||
type=str,
|
||||
help="you can use this command to change the train dataset default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--val",type=str,help="you can use this command to change the eval dataset default path")
|
||||
"--val",
|
||||
type=str,
|
||||
help="you can use this command to change the eval dataset default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--dict",type=str,help="you can use this command to change the dictionary default path")
|
||||
"--dict",
|
||||
type=str,
|
||||
help="you can use this command to change the dictionary default path"
|
||||
)
|
||||
self.add_argument(
|
||||
"--data_dir",type=str,help="you can use this command to change the dataset default root path")
|
||||
"--data_dir",
|
||||
type=str,
|
||||
help="you can use this command to change the dataset default root path"
|
||||
)
|
||||
|
||||
def parse_args(self, argv=None):
|
||||
args = super(ArgsParser, self).parse_args(argv)
|
||||
|
@ -68,21 +118,37 @@ class ArgsParser(ArgumentParser):
|
|||
return config
|
||||
|
||||
def _set_language(self, type):
|
||||
assert(type),"please use -l or --language to choose language type"
|
||||
lang = type[0]
|
||||
assert (type), "please use -l or --language to choose language type"
|
||||
assert(
|
||||
type[0] in support_list.keys()
|
||||
lang in support_list.keys() or lang in multi_lang
|
||||
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
|
||||
"please check your running command".format(support_list, type)
|
||||
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0])
|
||||
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0])
|
||||
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
|
||||
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
|
||||
global_config['Global']['character_type'] = type[0]
|
||||
assert(
|
||||
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path']))
|
||||
),"Loss default dictionary file {}_dict.txt.You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0])
|
||||
return type[0]
|
||||
"please check your running command".format(multi_lang, type)
|
||||
if lang in latin_lang:
|
||||
lang = "latin"
|
||||
elif lang in arabic_lang:
|
||||
lang = "arabic"
|
||||
elif lang in cyrillic_lang:
|
||||
lang = "cyrillic"
|
||||
elif lang in devanagari_lang:
|
||||
lang = "devanagari"
|
||||
global_config['Global'][
|
||||
'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(lang)
|
||||
global_config['Global'][
|
||||
'save_model_dir'] = './output/rec_{}_lite'.format(lang)
|
||||
global_config['Train']['dataset'][
|
||||
'label_file_list'] = ["train_data/{}_train.txt".format(lang)]
|
||||
global_config['Eval']['dataset'][
|
||||
'label_file_list'] = ["train_data/{}_val.txt".format(lang)]
|
||||
global_config['Global']['character_type'] = lang
|
||||
assert (
|
||||
os.path.isfile(
|
||||
os.path.join(project_path, global_config['Global'][
|
||||
'character_dict_path']))
|
||||
), "Loss default dictionary file {}_dict.txt.You can download it from \
|
||||
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
|
||||
lang)
|
||||
return lang
|
||||
|
||||
|
||||
def merge_config(config):
|
||||
|
@ -110,43 +176,51 @@ def merge_config(config):
|
|||
cur[sub_key] = value
|
||||
else:
|
||||
cur = cur[sub_key]
|
||||
|
||||
def loss_file(path):
|
||||
assert(
|
||||
os.path.exists(path)
|
||||
),"There is no such file:{},Please do not forget to put in the specified file".format(path)
|
||||
|
||||
|
||||
|
||||
def loss_file(path):
|
||||
assert (
|
||||
os.path.exists(path)
|
||||
), "There is no such file:{},Please do not forget to put in the specified file".format(
|
||||
path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = ArgsParser().parse_args()
|
||||
merge_config(FLAGS.opt)
|
||||
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
|
||||
if os.path.isfile(save_file_path):
|
||||
os.remove(save_file_path)
|
||||
|
||||
|
||||
if FLAGS.train:
|
||||
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
|
||||
train_label_path = os.path.join(project_path,FLAGS.train)
|
||||
train_label_path = os.path.join(project_path, FLAGS.train)
|
||||
loss_file(train_label_path)
|
||||
if FLAGS.val:
|
||||
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
|
||||
eval_label_path = os.path.join(project_path,FLAGS.val)
|
||||
eval_label_path = os.path.join(project_path, FLAGS.val)
|
||||
loss_file(eval_label_path)
|
||||
if FLAGS.dict:
|
||||
global_config['Global']['character_dict_path'] = FLAGS.dict
|
||||
dict_path = os.path.join(project_path,FLAGS.dict)
|
||||
dict_path = os.path.join(project_path, FLAGS.dict)
|
||||
loss_file(dict_path)
|
||||
if FLAGS.data_dir:
|
||||
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
|
||||
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
|
||||
data_dir = os.path.join(project_path,FLAGS.data_dir)
|
||||
data_dir = os.path.join(project_path, FLAGS.data_dir)
|
||||
loss_file(data_dir)
|
||||
|
||||
|
||||
with open(save_file_path, 'w') as f:
|
||||
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False)
|
||||
yaml.dump(
|
||||
dict(global_config), f, default_flow_style=False, sort_keys=False)
|
||||
logging.info("Project path is :{}".format(project_path))
|
||||
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0]))
|
||||
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0]))
|
||||
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir']))
|
||||
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path']))
|
||||
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path))
|
||||
logging.info("Train list path set to :{}".format(global_config['Train'][
|
||||
'dataset']['label_file_list'][0]))
|
||||
logging.info("Eval list path set to :{}".format(global_config['Eval'][
|
||||
'dataset']['label_file_list'][0]))
|
||||
logging.info("Dataset root path set to :{}".format(global_config['Eval'][
|
||||
'dataset']['data_dir']))
|
||||
logging.info("Dict path set to :{}".format(global_config['Global'][
|
||||
'character_dict_path']))
|
||||
logging.info("Config file set to :configs/rec/multi_language/{}".
|
||||
format(save_file_path))
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_arabic_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/arabic_dict.txt
|
||||
character_type: arabic
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/arabic_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/arabic_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_cyrillic_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
|
||||
character_type: cyrillic
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/cyrillic_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/cyrillic_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_devanagari_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/devanagari_dict.txt
|
||||
character_type: devanagari
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/devanagari_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/devanagari_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -15,11 +15,11 @@ Global:
|
|||
use_visualdl: False
|
||||
infer_img:
|
||||
# for data or label process
|
||||
character_dict_path: ppocr/utils/dict/en_dict.txt
|
||||
character_dict_path: ppocr/utils/en_dict.txt
|
||||
character_type: EN
|
||||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
use_space_char: True
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 500
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 10
|
||||
save_model_dir: ./output/rec_latin_lite
|
||||
save_epoch_step: 3
|
||||
eval_batch_step:
|
||||
- 0
|
||||
- 2000
|
||||
cal_metric_during_train: true
|
||||
pretrained_model: null
|
||||
checkpoints: null
|
||||
save_inference_dir: null
|
||||
use_visualdl: false
|
||||
infer_img: null
|
||||
character_dict_path: ppocr/utils/dict/latin_dict.txt
|
||||
character_type: latin
|
||||
max_text_length: 25
|
||||
infer_mode: false
|
||||
use_space_char: true
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.001
|
||||
regularizer:
|
||||
name: L2
|
||||
factor: 1.0e-05
|
||||
Architecture:
|
||||
model_type: rec
|
||||
algorithm: CRNN
|
||||
Transform: null
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: small
|
||||
small_stride:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 2
|
||||
Neck:
|
||||
name: SequenceEncoder
|
||||
encoder_type: rnn
|
||||
hidden_size: 48
|
||||
Head:
|
||||
name: CTCHead
|
||||
fc_decay: 1.0e-05
|
||||
Loss:
|
||||
name: CTCLoss
|
||||
PostProcess:
|
||||
name: CTCLabelDecode
|
||||
Metric:
|
||||
name: RecMetric
|
||||
main_indicator: acc
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/latin_train.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- RecAug: null
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: true
|
||||
batch_size_per_card: 256
|
||||
drop_last: true
|
||||
num_workers: 8
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: train_data/
|
||||
label_file_list:
|
||||
- train_data/latin_val.txt
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
img_mode: BGR
|
||||
channel_first: false
|
||||
- CTCLabelEncode: null
|
||||
- RecResizeImg:
|
||||
image_shape:
|
||||
- 3
|
||||
- 32
|
||||
- 320
|
||||
- KeepKeys:
|
||||
keep_keys:
|
||||
- image
|
||||
- label
|
||||
- length
|
||||
loader:
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
batch_size_per_card: 256
|
||||
num_workers: 8
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_ic15.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
@ -81,7 +82,7 @@ Eval:
|
|||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list: ["./train_data/train_list.txt"]
|
||||
label_file_list: ["./train_data/val_list.txt"]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_none_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_none_none_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_mv3_tps_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_none_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_none_none_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_b3_rare_r34_none_gru.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -19,6 +19,7 @@ Global:
|
|||
max_text_length: 25
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_r34_vd_tps_bilstm_ctc.txt
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
|
@ -37,7 +38,7 @@ Architecture:
|
|||
name: TPS
|
||||
num_fiducial: 20
|
||||
loc_lr: 0.1
|
||||
model_name: small
|
||||
model_name: large
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 34
|
||||
|
|
|
@ -20,6 +20,7 @@ Global:
|
|||
num_heads: 8
|
||||
infer_mode: False
|
||||
use_space_char: False
|
||||
save_res_path: ./output/rec/predicts_srn.txt
|
||||
|
||||
|
||||
Optimizer:
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/*
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# 如何快速测试
|
||||
### 1. 安装最新版本的Android Studio
|
||||
可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
|
||||
|
||||
### 2. 按照NDK 20 以上版本
|
||||
Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。
|
||||
|
||||
如果您是初学者,可以用以下方式安装和测试NDK编译环境。
|
||||
点击 File -> New ->New Project, 新建 "Native C++" project
|
||||
|
||||
### 3. 导入项目
|
||||
点击 File->New->Import Project..., 然后跟着Android Studio的引导导入
|
||||
|
||||
|
||||
# 获得更多支持
|
||||
前往[端计算模型生成平台EasyEdge](https://ai.baidu.com/easyedge/app/open_source_demo?referrerUrl=paddlelite),获得更多开发支持:
|
||||
|
||||
- Demo APP:可使用手机扫码安装,方便手机端快速体验文字识别
|
||||
- SDK:模型被封装为适配不同芯片硬件和操作系统SDK,包括完善的接口,方便进行二次开发
|
|
@ -0,0 +1 @@
|
|||
/build
|
|
@ -0,0 +1,98 @@
|
|||
import java.security.MessageDigest
|
||||
|
||||
apply plugin: 'com.android.application'
|
||||
|
||||
android {
|
||||
compileSdkVersion 29
|
||||
defaultConfig {
|
||||
applicationId "com.baidu.paddle.lite.demo.ocr"
|
||||
minSdkVersion 23
|
||||
targetSdkVersion 29
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags "-std=c++11 -frtti -fexceptions -Wno-format"
|
||||
arguments '-DANDROID_PLATFORM=android-23', '-DANDROID_STL=c++_shared' ,"-DANDROID_ARM_NEON=TRUE"
|
||||
}
|
||||
}
|
||||
ndk {
|
||||
// abiFilters "arm64-v8a", "armeabi-v7a"
|
||||
abiFilters "arm64-v8a", "armeabi-v7a"
|
||||
ldLibs "jnigraphics"
|
||||
}
|
||||
}
|
||||
buildTypes {
|
||||
release {
|
||||
minifyEnabled false
|
||||
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
|
||||
}
|
||||
}
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path "src/main/cpp/CMakeLists.txt"
|
||||
version "3.10.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation fileTree(include: ['*.jar'], dir: 'libs')
|
||||
implementation 'androidx.appcompat:appcompat:1.1.0'
|
||||
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
|
||||
testImplementation 'junit:junit:4.12'
|
||||
androidTestImplementation 'com.android.support.test:runner:1.0.2'
|
||||
androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
|
||||
}
|
||||
|
||||
def archives = [
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/paddle_lite_libs_v2_9_0.tar.gz',
|
||||
'dest': 'PaddleLite'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddlelite-demo.bj.bcebos.com/libs/android/opencv-4.2.0-android-sdk.tar.gz',
|
||||
'dest': 'OpenCV'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ocr_v2_for_cpu.tar.gz',
|
||||
'dest' : 'src/main/assets/models'
|
||||
],
|
||||
[
|
||||
'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_dict.tar.gz',
|
||||
'dest' : 'src/main/assets/labels'
|
||||
]
|
||||
]
|
||||
|
||||
task downloadAndExtractArchives(type: DefaultTask) {
|
||||
doFirst {
|
||||
println "Downloading and extracting archives including libs and models"
|
||||
}
|
||||
doLast {
|
||||
// Prepare cache folder for archives
|
||||
String cachePath = "cache"
|
||||
if (!file("${cachePath}").exists()) {
|
||||
mkdir "${cachePath}"
|
||||
}
|
||||
archives.eachWithIndex { archive, index ->
|
||||
MessageDigest messageDigest = MessageDigest.getInstance('MD5')
|
||||
messageDigest.update(archive.src.bytes)
|
||||
String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
|
||||
// Download the target archive if not exists
|
||||
boolean copyFiles = !file("${archive.dest}").exists()
|
||||
if (!file("${cachePath}/${cacheName}.tar.gz").exists()) {
|
||||
ant.get(src: archive.src, dest: file("${cachePath}/${cacheName}.tar.gz"))
|
||||
copyFiles = true; // force to copy files from the latest archive files
|
||||
}
|
||||
// Extract the target archive if its dest path does not exists
|
||||
if (copyFiles) {
|
||||
copy {
|
||||
from tarTree("${cachePath}/${cacheName}.tar.gz")
|
||||
into "${archive.dest}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
preBuild.dependsOn downloadAndExtractArchives
|
|
@ -0,0 +1,21 @@
|
|||
# Add project specific ProGuard rules here.
|
||||
# You can control the set of applied configuration files using the
|
||||
# proguardFiles setting in build.gradle.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
||||
|
||||
# Uncomment this to preserve the line number information for
|
||||
# debugging stack traces.
|
||||
#-keepattributes SourceFile,LineNumberTable
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
|
@ -0,0 +1,26 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.support.test.InstrumentationRegistry;
|
||||
import android.support.test.runner.AndroidJUnit4;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* Instrumented test, which will execute on an Android device.
|
||||
*
|
||||
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
|
||||
*/
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public class ExampleInstrumentedTest {
|
||||
@Test
|
||||
public void useAppContext() {
|
||||
// Context of the app under test.
|
||||
Context appContext = InstrumentationRegistry.getTargetContext();
|
||||
|
||||
assertEquals("com.baidu.paddle.lite.demo", appContext.getPackageName());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.baidu.paddle.lite.demo.ocr">
|
||||
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.CAMERA"/>
|
||||
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<!-- to test MiniActivity, change this to com.baidu.paddle.lite.demo.ocr.MiniActivity -->
|
||||
<activity android:name="com.baidu.paddle.lite.demo.ocr.MainActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN"/>
|
||||
<category android:name="android.intent.category.LAUNCHER"/>
|
||||
</intent-filter>
|
||||
</activity>
|
||||
<activity
|
||||
android:name="com.baidu.paddle.lite.demo.ocr.SettingsActivity"
|
||||
android:label="Settings">
|
||||
</activity>
|
||||
<provider
|
||||
android:name="androidx.core.content.FileProvider"
|
||||
android:authorities="com.baidu.paddle.lite.demo.ocr.fileprovider"
|
||||
android:exported="false"
|
||||
android:grantUriPermissions="true">
|
||||
<meta-data
|
||||
android:name="android.support.FILE_PROVIDER_PATHS"
|
||||
android:resource="@xml/file_paths"></meta-data>
|
||||
</provider>
|
||||
</application>
|
||||
|
||||
</manifest>
|
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 171 KiB |
After Width: | Height: | Size: 61 KiB |
|
@ -0,0 +1,117 @@
|
|||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html
|
||||
|
||||
# Sets the minimum version of CMake required to build the native library.
|
||||
|
||||
cmake_minimum_required(VERSION 3.4.1)
|
||||
|
||||
# Creates and names a library, sets it as either STATIC or SHARED, and provides
|
||||
# the relative paths to its source code. You can define multiple libraries, and
|
||||
# CMake builds them for you. Gradle automatically packages shared libraries with
|
||||
# your APK.
|
||||
|
||||
set(PaddleLite_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../PaddleLite")
|
||||
include_directories(${PaddleLite_DIR}/cxx/include)
|
||||
|
||||
set(OpenCV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../OpenCV/sdk/native/jni")
|
||||
message(STATUS "opencv dir: ${OpenCV_DIR}")
|
||||
find_package(OpenCV REQUIRED)
|
||||
message(STATUS "OpenCV libraries: ${OpenCV_LIBS}")
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
aux_source_directory(. SOURCES)
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os"
|
||||
)
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden -fdata-sections -ffunction-sections"
|
||||
)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS
|
||||
"${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,-z,nocopyreloc")
|
||||
|
||||
add_library(
|
||||
# Sets the name of the library.
|
||||
Native
|
||||
# Sets the library as a shared library.
|
||||
SHARED
|
||||
# Provides a relative path to your source file(s).
|
||||
${SOURCES})
|
||||
|
||||
find_library(
|
||||
# Sets the name of the path variable.
|
||||
log-lib
|
||||
# Specifies the name of the NDK library that you want CMake to locate.
|
||||
log)
|
||||
|
||||
add_library(
|
||||
# Sets the name of the library.
|
||||
paddle_light_api_shared
|
||||
# Sets the library as a shared library.
|
||||
SHARED
|
||||
# Provides a relative path to your source file(s).
|
||||
IMPORTED)
|
||||
|
||||
set_target_properties(
|
||||
# Specifies the target library.
|
||||
paddle_light_api_shared
|
||||
# Specifies the parameter you want to define.
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libpaddle_light_api_shared.so
|
||||
# Provides the path to the library you want to import.
|
||||
)
|
||||
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You can link
|
||||
# multiple libraries, such as libraries you define in this build script,
|
||||
# prebuilt third-party libraries, or system libraries.
|
||||
|
||||
target_link_libraries(
|
||||
# Specifies the target library.
|
||||
Native
|
||||
paddle_light_api_shared
|
||||
${OpenCV_LIBS}
|
||||
GLESv2
|
||||
EGL
|
||||
jnigraphics
|
||||
${log-lib}
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libc++_shared.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libc++_shared.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libpaddle_light_api_shared.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libpaddle_light_api_shared.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai_ir.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai_ir.so)
|
||||
|
||||
add_custom_command(
|
||||
TARGET Native
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PaddleLite_DIR}/cxx/libs/${ANDROID_ABI}/libhiai_ir_build.so
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libhiai_ir_build.so)
|
|
@ -0,0 +1,37 @@
|
|||
//
|
||||
// Created by fu on 4/25/18.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#import <numeric>
|
||||
#import <vector>
|
||||
|
||||
#ifdef __ANDROID__
|
||||
|
||||
#include <android/log.h>
|
||||
|
||||
#define LOG_TAG "OCR_NDK"
|
||||
|
||||
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
||||
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
|
||||
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define LOGI(format, ...) \
|
||||
fprintf(stdout, "[" LOG_TAG "]" format "\n", ##__VA_ARGS__)
|
||||
#define LOGW(format, ...) \
|
||||
fprintf(stdout, "[" LOG_TAG "]" format "\n", ##__VA_ARGS__)
|
||||
#define LOGE(format, ...) \
|
||||
fprintf(stderr, "[" LOG_TAG "]Error: " format "\n", ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
enum RETURN_CODE { RETURN_OK = 0 };
|
||||
|
||||
enum NET_TYPE { NET_OCR = 900100, NET_OCR_INTERNAL = 991008 };
|
||||
|
||||
template <typename T> inline T product(const std::vector<T> &vec) {
|
||||
if (vec.empty()) {
|
||||
return 0;
|
||||
}
|
||||
return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<T>());
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/5.
|
||||
//
|
||||
|
||||
#include "native.h"
|
||||
#include "ocr_ppredictor.h"
|
||||
#include <algorithm>
|
||||
#include <paddle_api.h>
|
||||
#include <string>
|
||||
|
||||
static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode);
|
||||
|
||||
extern "C" JNIEXPORT jlong JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
|
||||
JNIEnv *env, jobject thiz, jstring j_det_model_path,
|
||||
jstring j_rec_model_path, jstring j_cls_model_path, jint j_thread_num,
|
||||
jstring j_cpu_mode) {
|
||||
std::string det_model_path = jstring_to_cpp_string(env, j_det_model_path);
|
||||
std::string rec_model_path = jstring_to_cpp_string(env, j_rec_model_path);
|
||||
std::string cls_model_path = jstring_to_cpp_string(env, j_cls_model_path);
|
||||
int thread_num = j_thread_num;
|
||||
std::string cpu_mode = jstring_to_cpp_string(env, j_cpu_mode);
|
||||
ppredictor::OCR_Config conf;
|
||||
conf.thread_num = thread_num;
|
||||
conf.mode = str_to_cpu_mode(cpu_mode);
|
||||
ppredictor::OCR_PPredictor *orc_predictor =
|
||||
new ppredictor::OCR_PPredictor{conf};
|
||||
orc_predictor->init_from_file(det_model_path, rec_model_path, cls_model_path);
|
||||
return reinterpret_cast<jlong>(orc_predictor);
|
||||
}
|
||||
|
||||
/**
|
||||
* "LITE_POWER_HIGH" convert to paddle::lite_api::LITE_POWER_HIGH
|
||||
* @param cpu_mode
|
||||
* @return
|
||||
*/
|
||||
static paddle::lite_api::PowerMode
|
||||
str_to_cpu_mode(const std::string &cpu_mode) {
|
||||
static std::map<std::string, paddle::lite_api::PowerMode> cpu_mode_map{
|
||||
{"LITE_POWER_HIGH", paddle::lite_api::LITE_POWER_HIGH},
|
||||
{"LITE_POWER_LOW", paddle::lite_api::LITE_POWER_HIGH},
|
||||
{"LITE_POWER_FULL", paddle::lite_api::LITE_POWER_FULL},
|
||||
{"LITE_POWER_NO_BIND", paddle::lite_api::LITE_POWER_NO_BIND},
|
||||
{"LITE_POWER_RAND_HIGH", paddle::lite_api::LITE_POWER_RAND_HIGH},
|
||||
{"LITE_POWER_RAND_LOW", paddle::lite_api::LITE_POWER_RAND_LOW}};
|
||||
std::string upper_key;
|
||||
std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
|
||||
::toupper);
|
||||
auto index = cpu_mode_map.find(upper_key);
|
||||
if (index == cpu_mode_map.end()) {
|
||||
LOGE("cpu_mode not found %s", upper_key.c_str());
|
||||
return paddle::lite_api::LITE_POWER_HIGH;
|
||||
} else {
|
||||
return index->second;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
|
||||
JNIEnv *env, jobject thiz, jlong java_pointer, jfloatArray buf,
|
||||
jfloatArray ddims, jobject original_image) {
|
||||
LOGI("begin to run native forward");
|
||||
if (java_pointer == 0) {
|
||||
LOGE("JAVA pointer is NULL");
|
||||
return cpp_array_to_jfloatarray(env, nullptr, 0);
|
||||
}
|
||||
cv::Mat origin = bitmap_to_cv_mat(env, original_image);
|
||||
if (origin.size == 0) {
|
||||
LOGE("origin bitmap cannot convert to CV Mat");
|
||||
return cpp_array_to_jfloatarray(env, nullptr, 0);
|
||||
}
|
||||
ppredictor::OCR_PPredictor *ppredictor =
|
||||
(ppredictor::OCR_PPredictor *)java_pointer;
|
||||
std::vector<float> dims_float_arr = jfloatarray_to_float_vector(env, ddims);
|
||||
std::vector<int64_t> dims_arr;
|
||||
dims_arr.resize(dims_float_arr.size());
|
||||
std::copy(dims_float_arr.cbegin(), dims_float_arr.cend(), dims_arr.begin());
|
||||
|
||||
// 这里值有点大,就不调用jfloatarray_to_float_vector了
|
||||
int64_t buf_len = (int64_t)env->GetArrayLength(buf);
|
||||
jfloat *buf_data = env->GetFloatArrayElements(buf, JNI_FALSE);
|
||||
float *data = (jfloat *)buf_data;
|
||||
std::vector<ppredictor::OCRPredictResult> results =
|
||||
ppredictor->infer_ocr(dims_arr, data, buf_len, NET_OCR, origin);
|
||||
LOGI("infer_ocr finished with boxes %ld", results.size());
|
||||
// 这里将std::vector<ppredictor::OCRPredictResult> 序列化成
|
||||
// float数组,传输到java层再反序列化
|
||||
std::vector<float> float_arr;
|
||||
for (const ppredictor::OCRPredictResult &r : results) {
|
||||
float_arr.push_back(r.points.size());
|
||||
float_arr.push_back(r.word_index.size());
|
||||
float_arr.push_back(r.score);
|
||||
for (const std::vector<int> &point : r.points) {
|
||||
float_arr.push_back(point.at(0));
|
||||
float_arr.push_back(point.at(1));
|
||||
}
|
||||
for (int index : r.word_index) {
|
||||
float_arr.push_back(index);
|
||||
}
|
||||
}
|
||||
return cpp_array_to_jfloatarray(env, float_arr.data(), float_arr.size());
|
||||
}
|
||||
|
||||
extern "C" JNIEXPORT void JNICALL
|
||||
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
|
||||
JNIEnv *env, jobject thiz, jlong java_pointer) {
|
||||
if (java_pointer == 0) {
|
||||
LOGE("JAVA pointer is NULL");
|
||||
return;
|
||||
}
|
||||
ppredictor::OCR_PPredictor *ppredictor =
|
||||
(ppredictor::OCR_PPredictor *)java_pointer;
|
||||
delete ppredictor;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/5.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <android/bitmap.h>
|
||||
#include <jni.h>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
inline std::string jstring_to_cpp_string(JNIEnv *env, jstring jstr) {
|
||||
// In java, a unicode char will be encoded using 2 bytes (utf16).
|
||||
// so jstring will contain characters utf16. std::string in c++ is
|
||||
// essentially a string of bytes, not characters, so if we want to
|
||||
// pass jstring from JNI to c++, we have convert utf16 to bytes.
|
||||
if (!jstr) {
|
||||
return "";
|
||||
}
|
||||
const jclass stringClass = env->GetObjectClass(jstr);
|
||||
const jmethodID getBytes =
|
||||
env->GetMethodID(stringClass, "getBytes", "(Ljava/lang/String;)[B");
|
||||
const jbyteArray stringJbytes = (jbyteArray)env->CallObjectMethod(
|
||||
jstr, getBytes, env->NewStringUTF("UTF-8"));
|
||||
|
||||
size_t length = (size_t)env->GetArrayLength(stringJbytes);
|
||||
jbyte *pBytes = env->GetByteArrayElements(stringJbytes, NULL);
|
||||
|
||||
std::string ret = std::string(reinterpret_cast<char *>(pBytes), length);
|
||||
env->ReleaseByteArrayElements(stringJbytes, pBytes, JNI_ABORT);
|
||||
|
||||
env->DeleteLocalRef(stringJbytes);
|
||||
env->DeleteLocalRef(stringClass);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) {
|
||||
auto *data = str.c_str();
|
||||
jclass strClass = env->FindClass("java/lang/String");
|
||||
jmethodID strClassInitMethodID =
|
||||
env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V");
|
||||
|
||||
jbyteArray bytes = env->NewByteArray(strlen(data));
|
||||
env->SetByteArrayRegion(bytes, 0, strlen(data),
|
||||
reinterpret_cast<const jbyte *>(data));
|
||||
|
||||
jstring encoding = env->NewStringUTF("UTF-8");
|
||||
jstring res = (jstring)(
|
||||
env->NewObject(strClass, strClassInitMethodID, bytes, encoding));
|
||||
|
||||
env->DeleteLocalRef(strClass);
|
||||
env->DeleteLocalRef(encoding);
|
||||
env->DeleteLocalRef(bytes);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env, const float *buf,
|
||||
int64_t len) {
|
||||
if (len == 0) {
|
||||
return env->NewFloatArray(0);
|
||||
}
|
||||
jfloatArray result = env->NewFloatArray(len);
|
||||
env->SetFloatArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jintArray cpp_array_to_jintarray(JNIEnv *env, const int *buf,
|
||||
int64_t len) {
|
||||
jintArray result = env->NewIntArray(len);
|
||||
env->SetIntArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env, const int8_t *buf,
|
||||
int64_t len) {
|
||||
jbyteArray result = env->NewByteArray(len);
|
||||
env->SetByteArrayRegion(result, 0, len, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline jlongArray int64_vector_to_jlongarray(JNIEnv *env,
|
||||
const std::vector<int64_t> &vec) {
|
||||
jlongArray result = env->NewLongArray(vec.size());
|
||||
jlong *buf = new jlong[vec.size()];
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
buf[i] = (jlong)vec[i];
|
||||
}
|
||||
env->SetLongArrayRegion(result, 0, vec.size(), buf);
|
||||
delete[] buf;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::vector<int64_t> jlongarray_to_int64_vector(JNIEnv *env,
|
||||
jlongArray data) {
|
||||
int data_size = env->GetArrayLength(data);
|
||||
jlong *data_ptr = env->GetLongArrayElements(data, nullptr);
|
||||
std::vector<int64_t> data_vec(data_ptr, data_ptr + data_size);
|
||||
env->ReleaseLongArrayElements(data, data_ptr, 0);
|
||||
return data_vec;
|
||||
}
|
||||
|
||||
inline std::vector<float> jfloatarray_to_float_vector(JNIEnv *env,
|
||||
jfloatArray data) {
|
||||
int data_size = env->GetArrayLength(data);
|
||||
jfloat *data_ptr = env->GetFloatArrayElements(data, nullptr);
|
||||
std::vector<float> data_vec(data_ptr, data_ptr + data_size);
|
||||
env->ReleaseFloatArrayElements(data, data_ptr, 0);
|
||||
return data_vec;
|
||||
}
|
||||
|
||||
inline cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap) {
|
||||
AndroidBitmapInfo info;
|
||||
int result = AndroidBitmap_getInfo(env, bitmap, &info);
|
||||
if (result != ANDROID_BITMAP_RESULT_SUCCESS) {
|
||||
LOGE("AndroidBitmap_getInfo failed, result: %d", result);
|
||||
return cv::Mat{};
|
||||
}
|
||||
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
|
||||
LOGE("Bitmap format is not RGBA_8888 !");
|
||||
return cv::Mat{};
|
||||
}
|
||||
unsigned char *srcData = NULL;
|
||||
AndroidBitmap_lockPixels(env, bitmap, (void **)&srcData);
|
||||
cv::Mat mat = cv::Mat::zeros(info.height, info.width, CV_8UC4);
|
||||
memcpy(mat.data, srcData, info.height * info.width * 4);
|
||||
AndroidBitmap_unlockPixels(env, bitmap);
|
||||
cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR);
|
||||
/**
|
||||
if (!cv::imwrite("/sdcard/1/copy.jpg", mat)){
|
||||
LOGE("Write image failed " );
|
||||
}
|
||||
*/
|
||||
return mat;
|
||||
}
|
|
@ -0,0 +1,544 @@
|
|||
/*******************************************************************************
|
||||
* *
|
||||
* Author : Angus Johnson *
|
||||
* Version : 6.4.2 *
|
||||
* Date : 27 February 2017 *
|
||||
* Website : http://www.angusj.com *
|
||||
* Copyright : Angus Johnson 2010-2017 *
|
||||
* *
|
||||
* License: *
|
||||
* Use, modification & distribution is subject to Boost Software License Ver 1. *
|
||||
* http://www.boost.org/LICENSE_1_0.txt *
|
||||
* *
|
||||
* Attributions: *
|
||||
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
|
||||
* "A generic solution to polygon clipping" *
|
||||
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
|
||||
* http://portal.acm.org/citation.cfm?id=129906 *
|
||||
* *
|
||||
* Computer graphics and geometric modeling: implementation and algorithms *
|
||||
* By Max K. Agoston *
|
||||
* Springer; 1 edition (January 4, 2005) *
|
||||
* http://books.google.com/books?q=vatti+clipping+agoston *
|
||||
* *
|
||||
* See also: *
|
||||
* "Polygon Offsetting by Computing Winding Numbers" *
|
||||
* Paper no. DETC2005-85513 pp. 565-575 *
|
||||
* ASME 2005 International Design Engineering Technical Conferences *
|
||||
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
|
||||
* September 24-28, 2005 , Long Beach, California, USA *
|
||||
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef clipper_hpp
|
||||
#define clipper_hpp
|
||||
|
||||
#define CLIPPER_VERSION "6.4.2"
|
||||
|
||||
// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
|
||||
// improve performance but coordinate values are limited to the range +/- 46340
|
||||
//#define use_int32
|
||||
|
||||
// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
|
||||
//#define use_xyz
|
||||
|
||||
// use_lines: Enables line clipping. Adds a very minor cost to performance.
|
||||
#define use_lines
|
||||
|
||||
// use_deprecated: Enables temporary support for the obsolete functions
|
||||
//#define use_deprecated
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <ostream>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
namespace ClipperLib {
|
||||
|
||||
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
|
||||
enum PolyType { ptSubject, ptClip };
|
||||
// By far the most widely used winding rules for polygon filling are
|
||||
// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
|
||||
// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
|
||||
// see http://glprogramming.com/red/chapter11.html
|
||||
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
|
||||
|
||||
#ifdef use_int32
|
||||
typedef int cInt;
|
||||
static cInt const loRange = 0x7FFF;
|
||||
static cInt const hiRange = 0x7FFF;
|
||||
#else
|
||||
typedef signed long long cInt;
|
||||
static cInt const loRange = 0x3FFFFFFF;
|
||||
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
|
||||
typedef signed long long long64; // used by Int128 class
|
||||
typedef unsigned long long ulong64;
|
||||
|
||||
#endif
|
||||
|
||||
struct IntPoint {
|
||||
cInt X;
|
||||
cInt Y;
|
||||
#ifdef use_xyz
|
||||
cInt Z;
|
||||
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
|
||||
#else
|
||||
|
||||
IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
|
||||
#endif
|
||||
|
||||
friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X == b.X && a.Y == b.Y;
|
||||
}
|
||||
|
||||
friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
|
||||
return a.X != b.X || a.Y != b.Y;
|
||||
}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
typedef std::vector<IntPoint> Path;
|
||||
typedef std::vector<Path> Paths;
|
||||
|
||||
inline Path &operator<<(Path &poly, const IntPoint &p) {
|
||||
poly.push_back(p);
|
||||
return poly;
|
||||
}
|
||||
|
||||
inline Paths &operator<<(Paths &polys, const Path &p) {
|
||||
polys.push_back(p);
|
||||
return polys;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const IntPoint &p);
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const Path &p);
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const Paths &p);
|
||||
|
||||
struct DoublePoint {
|
||||
double X;
|
||||
double Y;
|
||||
|
||||
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
|
||||
|
||||
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef use_xyz
|
||||
typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
|
||||
IntPoint &e2top, IntPoint &pt);
|
||||
#endif
|
||||
|
||||
enum InitOptions {
|
||||
ioReverseSolution = 1,
|
||||
ioStrictlySimple = 2,
|
||||
ioPreserveCollinear = 4
|
||||
};
|
||||
enum JoinType { jtSquare, jtRound, jtMiter };
|
||||
enum EndType {
|
||||
etClosedPolygon,
|
||||
etClosedLine,
|
||||
etOpenButt,
|
||||
etOpenSquare,
|
||||
etOpenRound
|
||||
};
|
||||
|
||||
class PolyNode;
|
||||
|
||||
typedef std::vector<PolyNode *> PolyNodes;
|
||||
|
||||
class PolyNode {
|
||||
public:
|
||||
PolyNode();
|
||||
|
||||
virtual ~PolyNode(){};
|
||||
Path Contour;
|
||||
PolyNodes Childs;
|
||||
PolyNode *Parent;
|
||||
|
||||
PolyNode *GetNext() const;
|
||||
|
||||
bool IsHole() const;
|
||||
|
||||
bool IsOpen() const;
|
||||
|
||||
int ChildCount() const;
|
||||
|
||||
private:
|
||||
// PolyNode& operator =(PolyNode& other);
|
||||
unsigned Index; // node index in Parent.Childs
|
||||
bool m_IsOpen;
|
||||
JoinType m_jointype;
|
||||
EndType m_endtype;
|
||||
|
||||
PolyNode *GetNextSiblingUp() const;
|
||||
|
||||
void AddChild(PolyNode &child);
|
||||
|
||||
friend class Clipper; // to access Index
|
||||
friend class ClipperOffset;
|
||||
};
|
||||
|
||||
class PolyTree : public PolyNode {
|
||||
public:
|
||||
~PolyTree() { Clear(); };
|
||||
|
||||
PolyNode *GetFirst() const;
|
||||
|
||||
void Clear();
|
||||
|
||||
int Total() const;
|
||||
|
||||
private:
|
||||
// PolyTree& operator =(PolyTree& other);
|
||||
PolyNodes AllNodes;
|
||||
|
||||
friend class Clipper; // to access AllNodes
|
||||
};
|
||||
|
||||
bool Orientation(const Path &poly);
|
||||
|
||||
double Area(const Path &poly);
|
||||
|
||||
int PointInPolygon(const IntPoint &pt, const Path &path);
|
||||
|
||||
void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
|
||||
|
||||
void CleanPolygon(Path &poly, double distance = 1.415);
|
||||
|
||||
void CleanPolygons(const Paths &in_polys, Paths &out_polys,
|
||||
double distance = 1.415);
|
||||
|
||||
void CleanPolygons(Paths &polys, double distance = 1.415);
|
||||
|
||||
void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
|
||||
void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
|
||||
bool pathIsClosed);
|
||||
|
||||
void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
|
||||
|
||||
void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
|
||||
|
||||
void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
|
||||
|
||||
void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
|
||||
|
||||
void ReversePath(Path &p);
|
||||
|
||||
void ReversePaths(Paths &p);
|
||||
|
||||
struct IntRect {
|
||||
cInt left;
|
||||
cInt top;
|
||||
cInt right;
|
||||
cInt bottom;
|
||||
};
|
||||
|
||||
// enums that are used internally ...
|
||||
enum EdgeSide { esLeft = 1, esRight = 2 };
|
||||
|
||||
// forward declarations (for stuff used internally) ...
|
||||
struct TEdge;
|
||||
struct IntersectNode;
|
||||
struct LocalMinimum;
|
||||
struct OutPt;
|
||||
struct OutRec;
|
||||
struct Join;
|
||||
|
||||
typedef std::vector<OutRec *> PolyOutList;
|
||||
typedef std::vector<TEdge *> EdgeList;
|
||||
typedef std::vector<Join *> JoinList;
|
||||
typedef std::vector<IntersectNode *> IntersectList;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// ClipperBase is the ancestor to the Clipper class. It should not be
|
||||
// instantiated directly. This class simply abstracts the conversion of sets of
|
||||
// polygon coordinates into edge objects that are stored in a LocalMinima list.
|
||||
class ClipperBase {
|
||||
public:
|
||||
ClipperBase();
|
||||
|
||||
virtual ~ClipperBase();
|
||||
|
||||
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
|
||||
|
||||
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
|
||||
|
||||
virtual void Clear();
|
||||
|
||||
IntRect GetBounds();
|
||||
|
||||
bool PreserveCollinear() { return m_PreserveCollinear; };
|
||||
|
||||
void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
|
||||
|
||||
protected:
|
||||
void DisposeLocalMinimaList();
|
||||
|
||||
TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
|
||||
|
||||
virtual void Reset();
|
||||
|
||||
TEdge *ProcessBound(TEdge *E, bool IsClockwise);
|
||||
|
||||
void InsertScanbeam(const cInt Y);
|
||||
|
||||
bool PopScanbeam(cInt &Y);
|
||||
|
||||
bool LocalMinimaPending();
|
||||
|
||||
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
|
||||
|
||||
OutRec *CreateOutRec();
|
||||
|
||||
void DisposeAllOutRecs();
|
||||
|
||||
void DisposeOutRec(PolyOutList::size_type index);
|
||||
|
||||
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
|
||||
|
||||
void DeleteFromAEL(TEdge *e);
|
||||
|
||||
void UpdateEdgeIntoAEL(TEdge *&e);
|
||||
|
||||
typedef std::vector<LocalMinimum> MinimaList;
|
||||
MinimaList::iterator m_CurrentLM;
|
||||
MinimaList m_MinimaList;
|
||||
|
||||
bool m_UseFullRange;
|
||||
EdgeList m_edges;
|
||||
bool m_PreserveCollinear;
|
||||
bool m_HasOpenPaths;
|
||||
PolyOutList m_PolyOuts;
|
||||
TEdge *m_ActiveEdges;
|
||||
|
||||
typedef std::priority_queue<cInt> ScanbeamList;
|
||||
ScanbeamList m_Scanbeam;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class Clipper : public virtual ClipperBase {
|
||||
public:
|
||||
Clipper(int initOptions = 0);
|
||||
|
||||
bool Execute(ClipType clipType, Paths &solution,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
|
||||
bool Execute(ClipType clipType, PolyTree &polytree,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
|
||||
bool ReverseSolution() { return m_ReverseOutput; };
|
||||
|
||||
void ReverseSolution(bool value) { m_ReverseOutput = value; };
|
||||
|
||||
bool StrictlySimple() { return m_StrictSimple; };
|
||||
|
||||
void StrictlySimple(bool value) { m_StrictSimple = value; };
|
||||
// set the callback function for z value filling on intersections (otherwise Z
|
||||
// is 0)
|
||||
#ifdef use_xyz
|
||||
void ZFillFunction(ZFillCallback zFillFunc);
|
||||
#endif
|
||||
protected:
|
||||
virtual bool ExecuteInternal();
|
||||
|
||||
private:
|
||||
JoinList m_Joins;
|
||||
JoinList m_GhostJoins;
|
||||
IntersectList m_IntersectList;
|
||||
ClipType m_ClipType;
|
||||
typedef std::list<cInt> MaximaList;
|
||||
MaximaList m_Maxima;
|
||||
TEdge *m_SortedEdges;
|
||||
bool m_ExecuteLocked;
|
||||
PolyFillType m_ClipFillType;
|
||||
PolyFillType m_SubjFillType;
|
||||
bool m_ReverseOutput;
|
||||
bool m_UsingPolyTree;
|
||||
bool m_StrictSimple;
|
||||
#ifdef use_xyz
|
||||
ZFillCallback m_ZFill; // custom callback
|
||||
#endif
|
||||
|
||||
void SetWindingCount(TEdge &edge);
|
||||
|
||||
bool IsEvenOddFillType(const TEdge &edge) const;
|
||||
|
||||
bool IsEvenOddAltFillType(const TEdge &edge) const;
|
||||
|
||||
void InsertLocalMinimaIntoAEL(const cInt botY);
|
||||
|
||||
void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
|
||||
|
||||
void AddEdgeToSEL(TEdge *edge);
|
||||
|
||||
bool PopEdgeFromSEL(TEdge *&edge);
|
||||
|
||||
void CopyAELToSEL();
|
||||
|
||||
void DeleteFromSEL(TEdge *e);
|
||||
|
||||
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
|
||||
|
||||
bool IsContributing(const TEdge &edge) const;
|
||||
|
||||
bool IsTopHorz(const cInt XPos);
|
||||
|
||||
void DoMaxima(TEdge *e);
|
||||
|
||||
void ProcessHorizontals();
|
||||
|
||||
void ProcessHorizontal(TEdge *horzEdge);
|
||||
|
||||
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
|
||||
OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
|
||||
OutRec *GetOutRec(int idx);
|
||||
|
||||
void AppendPolygon(TEdge *e1, TEdge *e2);
|
||||
|
||||
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
|
||||
|
||||
OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
|
||||
|
||||
OutPt *GetLastOutPt(TEdge *e);
|
||||
|
||||
bool ProcessIntersections(const cInt topY);
|
||||
|
||||
void BuildIntersectList(const cInt topY);
|
||||
|
||||
void ProcessIntersectList();
|
||||
|
||||
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
|
||||
|
||||
void BuildResult(Paths &polys);
|
||||
|
||||
void BuildResult2(PolyTree &polytree);
|
||||
|
||||
void SetHoleState(TEdge *e, OutRec *outrec);
|
||||
|
||||
void DisposeIntersectNodes();
|
||||
|
||||
bool FixupIntersectionOrder();
|
||||
|
||||
void FixupOutPolygon(OutRec &outrec);
|
||||
|
||||
void FixupOutPolyline(OutRec &outrec);
|
||||
|
||||
bool IsHole(TEdge *e);
|
||||
|
||||
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
|
||||
|
||||
void FixHoleLinkage(OutRec &outrec);
|
||||
|
||||
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
|
||||
|
||||
void ClearJoins();
|
||||
|
||||
void ClearGhostJoins();
|
||||
|
||||
void AddGhostJoin(OutPt *op, const IntPoint offPt);
|
||||
|
||||
bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
|
||||
|
||||
void JoinCommonEdges();
|
||||
|
||||
void DoSimplePolygons();
|
||||
|
||||
void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
|
||||
void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
|
||||
|
||||
void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
|
||||
|
||||
#ifdef use_xyz
|
||||
void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
|
||||
#endif
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class ClipperOffset {
|
||||
public:
|
||||
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
|
||||
|
||||
~ClipperOffset();
|
||||
|
||||
void AddPath(const Path &path, JoinType joinType, EndType endType);
|
||||
|
||||
void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
|
||||
|
||||
void Execute(Paths &solution, double delta);
|
||||
|
||||
void Execute(PolyTree &solution, double delta);
|
||||
|
||||
void Clear();
|
||||
|
||||
double MiterLimit;
|
||||
double ArcTolerance;
|
||||
|
||||
private:
|
||||
Paths m_destPolys;
|
||||
Path m_srcPoly;
|
||||
Path m_destPoly;
|
||||
std::vector<DoublePoint> m_normals;
|
||||
double m_delta, m_sinA, m_sin, m_cos;
|
||||
double m_miterLim, m_StepsPerRad;
|
||||
IntPoint m_lowest;
|
||||
PolyNode m_polyNodes;
|
||||
|
||||
void FixOrientations();
|
||||
|
||||
void DoOffset(double delta);
|
||||
|
||||
void OffsetPoint(int j, int &k, JoinType jointype);
|
||||
|
||||
void DoSquare(int j, int k);
|
||||
|
||||
void DoMiter(int j, int k, double r);
|
||||
|
||||
void DoRound(int j, int k);
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class clipperException : public std::exception {
|
||||
public:
|
||||
clipperException(const char *description) : m_descr(description) {}
|
||||
|
||||
virtual ~clipperException() throw() {}
|
||||
|
||||
virtual const char *what() const throw() { return m_descr.c_str(); }
|
||||
|
||||
private:
|
||||
std::string m_descr;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
} // ClipperLib namespace
|
||||
|
||||
#endif // clipper_hpp
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_cls_process.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
const std::vector<int> CLS_IMAGE_SHAPE = {3, 48, 192};
|
||||
|
||||
cv::Mat cls_resize_img(const cv::Mat &img) {
|
||||
int imgC = CLS_IMAGE_SHAPE[0];
|
||||
int imgW = CLS_IMAGE_SHAPE[2];
|
||||
int imgH = CLS_IMAGE_SHAPE[1];
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_CUBIC);
|
||||
|
||||
if (resize_w < imgW) {
|
||||
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(imgW - resize_w),
|
||||
cv::BORDER_CONSTANT, {0, 0, 0});
|
||||
}
|
||||
return resize_img;
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
extern const std::vector<int> CLS_IMAGE_SHAPE;
|
||||
|
||||
cv::Mat cls_resize_img(const cv::Mat &img);
|
|
@ -0,0 +1,142 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_crnn_process.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
const std::string CHARACTER_TYPE = "ch";
|
||||
const int MAX_DICT_LENGTH = 6624;
|
||||
const std::vector<int> REC_IMAGE_SHAPE = {3, 32, 320};
|
||||
|
||||
static cv::Mat crnn_resize_norm_img(cv::Mat img, float wh_ratio) {
|
||||
int imgC = REC_IMAGE_SHAPE[0];
|
||||
int imgW = REC_IMAGE_SHAPE[2];
|
||||
int imgH = REC_IMAGE_SHAPE[1];
|
||||
|
||||
if (CHARACTER_TYPE == "ch")
|
||||
imgW = int(32 * wh_ratio);
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
|
||||
cv::INTER_CUBIC);
|
||||
|
||||
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
|
||||
|
||||
for (int h = 0; h < resize_img.rows; h++) {
|
||||
for (int w = 0; w < resize_img.cols; w++) {
|
||||
resize_img.at<cv::Vec3f>(h, w)[0] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[0] - 0.5) * 2;
|
||||
resize_img.at<cv::Vec3f>(h, w)[1] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[1] - 0.5) * 2;
|
||||
resize_img.at<cv::Vec3f>(h, w)[2] =
|
||||
(resize_img.at<cv::Vec3f>(h, w)[2] - 0.5) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat dist;
|
||||
cv::copyMakeBorder(resize_img, dist, 0, 0, 0, int(imgW - resize_w),
|
||||
cv::BORDER_CONSTANT, {0, 0, 0});
|
||||
|
||||
return dist;
|
||||
}
|
||||
|
||||
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio) {
|
||||
int imgC = REC_IMAGE_SHAPE[0];
|
||||
int imgW = REC_IMAGE_SHAPE[2];
|
||||
int imgH = REC_IMAGE_SHAPE[1];
|
||||
|
||||
if (CHARACTER_TYPE == "ch") {
|
||||
imgW = int(32 * wh_ratio);
|
||||
}
|
||||
|
||||
float ratio = float(img.cols) / float(img.rows);
|
||||
int resize_w = 0;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
cv::Mat resize_img;
|
||||
cv::resize(img, resize_img, cv::Size(resize_w, imgH));
|
||||
return resize_img;
|
||||
}
|
||||
|
||||
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
|
||||
const std::vector<std::vector<int>> &box) {
|
||||
|
||||
std::vector<std::vector<int>> points = box;
|
||||
|
||||
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
|
||||
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
|
||||
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||
|
||||
cv::Mat img_crop;
|
||||
srcimage(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
|
||||
|
||||
for (int i = 0; i < points.size(); i++) {
|
||||
points[i][0] -= left;
|
||||
points[i][1] -= top;
|
||||
}
|
||||
|
||||
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
|
||||
pow(points[0][1] - points[1][1], 2)));
|
||||
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
|
||||
pow(points[0][1] - points[3][1], 2)));
|
||||
|
||||
cv::Point2f pts_std[4];
|
||||
pts_std[0] = cv::Point2f(0., 0.);
|
||||
pts_std[1] = cv::Point2f(img_crop_width, 0.);
|
||||
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
|
||||
pts_std[3] = cv::Point2f(0.f, img_crop_height);
|
||||
|
||||
cv::Point2f pointsf[4];
|
||||
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
|
||||
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
|
||||
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
|
||||
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
|
||||
|
||||
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
|
||||
|
||||
cv::Mat dst_img;
|
||||
cv::warpPerspective(img_crop, dst_img, M,
|
||||
cv::Size(img_crop_width, img_crop_height),
|
||||
cv::BORDER_REPLICATE);
|
||||
|
||||
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
|
||||
/*
|
||||
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
|
||||
cv::transpose(dst_img, srcCopy);
|
||||
cv::flip(srcCopy, srcCopy, 0);
|
||||
return srcCopy;
|
||||
*/
|
||||
cv::transpose(dst_img, dst_img);
|
||||
cv::flip(dst_img, dst_img, 0);
|
||||
return dst_img;
|
||||
} else {
|
||||
return dst_img;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/3.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
extern const std::vector<int> REC_IMAGE_SHAPE;
|
||||
|
||||
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
|
||||
const std::vector<std::vector<int>> &box);
|
||||
|
||||
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio);
|
||||
|
||||
template <class ForwardIterator>
|
||||
inline size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ocr_clipper.hpp"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgcodecs.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
|
||||
static void getcontourarea(float **box, float unclip_ratio, float &distance) {
|
||||
int pts_num = 4;
|
||||
float area = 0.0f;
|
||||
float dist = 0.0f;
|
||||
for (int i = 0; i < pts_num; i++) {
|
||||
area += box[i][0] * box[(i + 1) % pts_num][1] -
|
||||
box[i][1] * box[(i + 1) % pts_num][0];
|
||||
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
|
||||
(box[i][0] - box[(i + 1) % pts_num][0]) +
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]) *
|
||||
(box[i][1] - box[(i + 1) % pts_num][1]));
|
||||
}
|
||||
area = fabs(float(area / 2.0));
|
||||
|
||||
distance = area * unclip_ratio / dist;
|
||||
}
|
||||
|
||||
static cv::RotatedRect unclip(float **box) {
|
||||
float unclip_ratio = 2.0;
|
||||
float distance = 1.0;
|
||||
|
||||
getcontourarea(box, unclip_ratio, distance);
|
||||
|
||||
ClipperLib::ClipperOffset offset;
|
||||
ClipperLib::Path p;
|
||||
p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
|
||||
<< ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
|
||||
<< ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
|
||||
<< ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
|
||||
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
|
||||
|
||||
ClipperLib::Paths soln;
|
||||
offset.Execute(soln, distance);
|
||||
std::vector<cv::Point2f> points;
|
||||
|
||||
for (int j = 0; j < soln.size(); j++) {
|
||||
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
|
||||
points.emplace_back(soln[j][i].X, soln[j][i].Y);
|
||||
}
|
||||
}
|
||||
cv::RotatedRect res = cv::minAreaRect(points);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static float **Mat2Vec(cv::Mat mat) {
|
||||
auto **array = new float *[mat.rows];
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
array[i] = new float[mat.cols];
|
||||
}
|
||||
for (int i = 0; i < mat.rows; ++i) {
|
||||
for (int j = 0; j < mat.cols; ++j) {
|
||||
array[i][j] = mat.at<float>(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
static void quickSort(float **s, int l, int r) {
|
||||
if (l < r) {
|
||||
int i = l, j = r;
|
||||
float x = s[l][0];
|
||||
float *xp = s[l];
|
||||
while (i < j) {
|
||||
while (i < j && s[j][0] >= x) {
|
||||
j--;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(s[i++], s[j]);
|
||||
}
|
||||
while (i < j && s[i][0] < x) {
|
||||
i++;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(s[j--], s[i]);
|
||||
}
|
||||
}
|
||||
s[i] = xp;
|
||||
quickSort(s, l, i - 1);
|
||||
quickSort(s, i + 1, r);
|
||||
}
|
||||
}
|
||||
|
||||
static void quickSort_vector(std::vector<std::vector<int>> &box, int l, int r,
|
||||
int axis) {
|
||||
if (l < r) {
|
||||
int i = l, j = r;
|
||||
int x = box[l][axis];
|
||||
std::vector<int> xp(box[l]);
|
||||
while (i < j) {
|
||||
while (i < j && box[j][axis] >= x) {
|
||||
j--;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(box[i++], box[j]);
|
||||
}
|
||||
while (i < j && box[i][axis] < x) {
|
||||
i++;
|
||||
}
|
||||
if (i < j) {
|
||||
std::swap(box[j--], box[i]);
|
||||
}
|
||||
}
|
||||
box[i] = xp;
|
||||
quickSort_vector(box, l, i - 1, axis);
|
||||
quickSort_vector(box, i + 1, r, axis);
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<std::vector<int>>
|
||||
order_points_clockwise(std::vector<std::vector<int>> pts) {
|
||||
std::vector<std::vector<int>> box = pts;
|
||||
quickSort_vector(box, 0, int(box.size() - 1), 0);
|
||||
std::vector<std::vector<int>> leftmost = {box[0], box[1]};
|
||||
std::vector<std::vector<int>> rightmost = {box[2], box[3]};
|
||||
|
||||
if (leftmost[0][1] > leftmost[1][1]) {
|
||||
std::swap(leftmost[0], leftmost[1]);
|
||||
}
|
||||
|
||||
if (rightmost[0][1] > rightmost[1][1]) {
|
||||
std::swap(rightmost[0], rightmost[1]);
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
|
||||
leftmost[1]};
|
||||
return rect;
|
||||
}
|
||||
|
||||
static float **get_mini_boxes(cv::RotatedRect box, float &ssid) {
|
||||
ssid = box.size.width >= box.size.height ? box.size.height : box.size.width;
|
||||
|
||||
cv::Mat points;
|
||||
cv::boxPoints(box, points);
|
||||
// sorted box points
|
||||
auto array = Mat2Vec(points);
|
||||
quickSort(array, 0, 3);
|
||||
|
||||
float *idx1 = array[0], *idx2 = array[1], *idx3 = array[2], *idx4 = array[3];
|
||||
if (array[3][1] <= array[2][1]) {
|
||||
idx2 = array[3];
|
||||
idx3 = array[2];
|
||||
} else {
|
||||
idx2 = array[2];
|
||||
idx3 = array[3];
|
||||
}
|
||||
if (array[1][1] <= array[0][1]) {
|
||||
idx1 = array[1];
|
||||
idx4 = array[0];
|
||||
} else {
|
||||
idx1 = array[0];
|
||||
idx4 = array[1];
|
||||
}
|
||||
|
||||
array[0] = idx1;
|
||||
array[1] = idx2;
|
||||
array[2] = idx3;
|
||||
array[3] = idx4;
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
template <class T> T clamp(T x, T min, T max) {
|
||||
if (x > max) {
|
||||
return max;
|
||||
}
|
||||
if (x < min) {
|
||||
return min;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static float clampf(float x, float min, float max) {
|
||||
if (x > max)
|
||||
return max;
|
||||
if (x < min)
|
||||
return min;
|
||||
return x;
|
||||
}
|
||||
|
||||
float box_score_fast(float **box_array, cv::Mat pred) {
|
||||
auto array = box_array;
|
||||
int width = pred.cols;
|
||||
int height = pred.rows;
|
||||
|
||||
float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
|
||||
float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
|
||||
|
||||
int xmin = clamp(int(std::floorf(*(std::min_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int xmax = clamp(int(std::ceilf(*(std::max_element(box_x, box_x + 4)))), 0,
|
||||
width - 1);
|
||||
int ymin = clamp(int(std::floorf(*(std::min_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
int ymax = clamp(int(std::ceilf(*(std::max_element(box_y, box_y + 4)))), 0,
|
||||
height - 1);
|
||||
|
||||
cv::Mat mask;
|
||||
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
|
||||
|
||||
cv::Point root_point[4];
|
||||
root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
|
||||
root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
|
||||
root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
|
||||
root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
|
||||
const cv::Point *ppt[1] = {root_point};
|
||||
int npt[] = {4};
|
||||
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
|
||||
|
||||
cv::Mat croppedImg;
|
||||
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
|
||||
.copyTo(croppedImg);
|
||||
|
||||
auto score = cv::mean(croppedImg, mask)[0];
|
||||
return score;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap) {
|
||||
const int min_size = 3;
|
||||
const int max_candidates = 1000;
|
||||
const float box_thresh = 0.5;
|
||||
|
||||
int width = bitmap.cols;
|
||||
int height = bitmap.rows;
|
||||
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
std::vector<cv::Vec4i> hierarchy;
|
||||
|
||||
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
|
||||
cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
int num_contours =
|
||||
contours.size() >= max_candidates ? max_candidates : contours.size();
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes;
|
||||
|
||||
for (int _i = 0; _i < num_contours; _i++) {
|
||||
float ssid;
|
||||
cv::RotatedRect box = cv::minAreaRect(contours[_i]);
|
||||
auto array = get_mini_boxes(box, ssid);
|
||||
|
||||
auto box_for_unclip = array;
|
||||
// end get_mini_box
|
||||
|
||||
if (ssid < min_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float score;
|
||||
score = box_score_fast(array, pred);
|
||||
// end box_score_fast
|
||||
if (score < box_thresh) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// start for unclip
|
||||
cv::RotatedRect points = unclip(box_for_unclip);
|
||||
// end for unclip
|
||||
|
||||
cv::RotatedRect clipbox = points;
|
||||
auto cliparray = get_mini_boxes(clipbox, ssid);
|
||||
|
||||
if (ssid < min_size + 2)
|
||||
continue;
|
||||
|
||||
int dest_width = pred.cols;
|
||||
int dest_height = pred.rows;
|
||||
std::vector<std::vector<int>> intcliparray;
|
||||
|
||||
for (int num_pt = 0; num_pt < 4; num_pt++) {
|
||||
std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
|
||||
float(dest_width)),
|
||||
0, float(dest_width))),
|
||||
int(clampf(roundf(cliparray[num_pt][1] /
|
||||
float(height) * float(dest_height)),
|
||||
0, float(dest_height)))};
|
||||
intcliparray.emplace_back(std::move(a));
|
||||
}
|
||||
boxes.emplace_back(std::move(intcliparray));
|
||||
|
||||
} // end for
|
||||
return boxes;
|
||||
}
|
||||
|
||||
int _max(int a, int b) { return a >= b ? a : b; }
|
||||
|
||||
int _min(int a, int b) { return a >= b ? b : a; }
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
|
||||
float ratio_h, float ratio_w, const cv::Mat &srcimg) {
|
||||
int oriimg_h = srcimg.rows;
|
||||
int oriimg_w = srcimg.cols;
|
||||
std::vector<std::vector<std::vector<int>>> boxes{o_boxes};
|
||||
std::vector<std::vector<std::vector<int>>> root_points;
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
boxes[n] = order_points_clockwise(boxes[n]);
|
||||
for (int m = 0; m < boxes[0].size(); m++) {
|
||||
boxes[n][m][0] /= ratio_w;
|
||||
boxes[n][m][1] /= ratio_h;
|
||||
|
||||
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
|
||||
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
int rect_width, rect_height;
|
||||
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
|
||||
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
|
||||
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
|
||||
if (rect_width <= 10 || rect_height <= 10)
|
||||
continue;
|
||||
root_points.push_back(boxes[n]);
|
||||
}
|
||||
return root_points;
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/2.
|
||||
//
|
||||
#pragma once
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
|
||||
float ratio_h, float ratio_w, const cv::Mat &srcimg);
|
|
@ -0,0 +1,261 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/1.
|
||||
//
|
||||
|
||||
#include "ocr_ppredictor.h"
|
||||
#include "common.h"
|
||||
#include "ocr_cls_process.h"
|
||||
#include "ocr_crnn_process.h"
|
||||
#include "ocr_db_post_process.h"
|
||||
#include "preprocess.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
OCR_PPredictor::OCR_PPredictor(const OCR_Config &config) : _config(config) {}
|
||||
|
||||
int OCR_PPredictor::init(const std::string &det_model_content,
|
||||
const std::string &rec_model_content,
|
||||
const std::string &cls_model_content) {
|
||||
_det_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
|
||||
_det_predictor->init_nb(det_model_content);
|
||||
|
||||
_rec_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_rec_predictor->init_nb(rec_model_content);
|
||||
|
||||
_cls_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_cls_predictor->init_nb(cls_model_content);
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
int OCR_PPredictor::init_from_file(const std::string &det_model_path,
|
||||
const std::string &rec_model_path,
|
||||
const std::string &cls_model_path) {
|
||||
_det_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
|
||||
_det_predictor->init_from_file(det_model_path);
|
||||
|
||||
_rec_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_rec_predictor->init_from_file(rec_model_path);
|
||||
|
||||
_cls_predictor = std::unique_ptr<PPredictor>(
|
||||
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
|
||||
_cls_predictor->init_from_file(cls_model_path);
|
||||
return RETURN_OK;
|
||||
}
|
||||
/**
|
||||
* for debug use, show result of First Step
|
||||
* @param filter_boxes
|
||||
* @param boxes
|
||||
* @param srcimg
|
||||
*/
|
||||
static void
|
||||
visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
|
||||
const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &srcimg) {
|
||||
// visualization
|
||||
cv::Point rook_points[filter_boxes.size()][4];
|
||||
for (int n = 0; n < filter_boxes.size(); n++) {
|
||||
for (int m = 0; m < filter_boxes[0].size(); m++) {
|
||||
rook_points[n][m] =
|
||||
cv::Point(int(filter_boxes[n][m][0]), int(filter_boxes[n][m][1]));
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat img_vis;
|
||||
srcimg.copyTo(img_vis);
|
||||
for (int n = 0; n < boxes.size(); n++) {
|
||||
const cv::Point *ppt[1] = {rook_points[n]};
|
||||
int npt[] = {4};
|
||||
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||
}
|
||||
// 调试用,自行替换需要修改的路径
|
||||
cv::imwrite("/sdcard/1/vis.png", img_vis);
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult>
|
||||
OCR_PPredictor::infer_ocr(const std::vector<int64_t> &dims,
|
||||
const float *input_data, int input_len, int net_flag,
|
||||
cv::Mat &origin) {
|
||||
PredictorInput input = _det_predictor->get_first_input();
|
||||
input.set_dims(dims);
|
||||
input.set_data(input_data, input_len);
|
||||
std::vector<PredictorOutput> results = _det_predictor->infer();
|
||||
PredictorOutput &res = results.at(0);
|
||||
std::vector<std::vector<std::vector<int>>> filtered_box = calc_filtered_boxes(
|
||||
res.get_float_data(), res.get_size(), (int)dims[2], (int)dims[3], origin);
|
||||
LOGI("Filter_box size %ld", filtered_box.size());
|
||||
return infer_rec(filtered_box, origin);
|
||||
}
|
||||
|
||||
std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
|
||||
const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &origin_img) {
|
||||
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
|
||||
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
|
||||
std::vector<int64_t> dims = {1, 3, 0, 0};
|
||||
std::vector<OCRPredictResult> ocr_results;
|
||||
|
||||
PredictorInput input = _rec_predictor->get_first_input();
|
||||
for (auto bp = boxes.crbegin(); bp != boxes.crend(); ++bp) {
|
||||
const std::vector<std::vector<int>> &box = *bp;
|
||||
cv::Mat crop_img = get_rotate_crop_image(origin_img, box);
|
||||
crop_img = infer_cls(crop_img);
|
||||
|
||||
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
|
||||
cv::Mat input_image = crnn_resize_img(crop_img, wh_ratio);
|
||||
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
|
||||
const float *dimg = reinterpret_cast<const float *>(input_image.data);
|
||||
int input_size = input_image.rows * input_image.cols;
|
||||
|
||||
dims[2] = input_image.rows;
|
||||
dims[3] = input_image.cols;
|
||||
input.set_dims(dims);
|
||||
|
||||
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
|
||||
scale);
|
||||
|
||||
std::vector<PredictorOutput> results = _rec_predictor->infer();
|
||||
const float *predict_batch = results.at(0).get_float_data();
|
||||
const std::vector<int64_t> predict_shape = results.at(0).get_shape();
|
||||
|
||||
OCRPredictResult res;
|
||||
|
||||
// ctc decode
|
||||
int argmax_idx;
|
||||
int last_index = 0;
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float max_value = 0.0f;
|
||||
|
||||
for (int n = 0; n < predict_shape[1]; n++) {
|
||||
argmax_idx = int(argmax(&predict_batch[n * predict_shape[2]],
|
||||
&predict_batch[(n + 1) * predict_shape[2]]));
|
||||
max_value =
|
||||
float(*std::max_element(&predict_batch[n * predict_shape[2]],
|
||||
&predict_batch[(n + 1) * predict_shape[2]]));
|
||||
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
res.word_index.push_back(argmax_idx);
|
||||
}
|
||||
last_index = argmax_idx;
|
||||
}
|
||||
score /= count;
|
||||
if (res.word_index.empty()) {
|
||||
continue;
|
||||
}
|
||||
res.score = score;
|
||||
res.points = box;
|
||||
ocr_results.emplace_back(std::move(res));
|
||||
}
|
||||
LOGI("ocr_results finished %lu", ocr_results.size());
|
||||
return ocr_results;
|
||||
}
|
||||
|
||||
cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
|
||||
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
|
||||
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
|
||||
std::vector<int64_t> dims = {1, 3, 0, 0};
|
||||
std::vector<OCRPredictResult> ocr_results;
|
||||
|
||||
PredictorInput input = _cls_predictor->get_first_input();
|
||||
|
||||
cv::Mat input_image = cls_resize_img(img);
|
||||
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
|
||||
const float *dimg = reinterpret_cast<const float *>(input_image.data);
|
||||
int input_size = input_image.rows * input_image.cols;
|
||||
|
||||
dims[2] = input_image.rows;
|
||||
dims[3] = input_image.cols;
|
||||
input.set_dims(dims);
|
||||
|
||||
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
|
||||
scale);
|
||||
|
||||
std::vector<PredictorOutput> results = _cls_predictor->infer();
|
||||
|
||||
const float *scores = results.at(0).get_float_data();
|
||||
float score = 0;
|
||||
int label = 0;
|
||||
for (int64_t i = 0; i < results.at(0).get_size(); i++) {
|
||||
LOGI("output scores [%f]", scores[i]);
|
||||
if (scores[i] > score) {
|
||||
score = scores[i];
|
||||
label = i;
|
||||
}
|
||||
}
|
||||
cv::Mat srcimg;
|
||||
img.copyTo(srcimg);
|
||||
if (label % 2 == 1 && score > thresh) {
|
||||
cv::rotate(srcimg, srcimg, 1);
|
||||
}
|
||||
return srcimg;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
OCR_PPredictor::calc_filtered_boxes(const float *pred, int pred_size,
|
||||
int output_height, int output_width,
|
||||
const cv::Mat &origin) {
|
||||
const double threshold = 0.3;
|
||||
const double maxvalue = 1;
|
||||
|
||||
cv::Mat pred_map = cv::Mat::zeros(output_height, output_width, CV_32F);
|
||||
memcpy(pred_map.data, pred, pred_size * sizeof(float));
|
||||
cv::Mat cbuf_map;
|
||||
pred_map.convertTo(cbuf_map, CV_8UC1);
|
||||
|
||||
cv::Mat bit_map;
|
||||
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
|
||||
|
||||
std::vector<std::vector<std::vector<int>>> boxes =
|
||||
boxes_from_bitmap(pred_map, bit_map);
|
||||
float ratio_h = output_height * 1.0f / origin.rows;
|
||||
float ratio_w = output_width * 1.0f / origin.cols;
|
||||
std::vector<std::vector<std::vector<int>>> filter_boxes =
|
||||
filter_tag_det_res(boxes, ratio_h, ratio_w, origin);
|
||||
return filter_boxes;
|
||||
}
|
||||
|
||||
std::vector<int>
|
||||
OCR_PPredictor::postprocess_rec_word_index(const PredictorOutput &res) {
|
||||
const int *rec_idx = res.get_int_data();
|
||||
const std::vector<std::vector<uint64_t>> rec_idx_lod = res.get_lod();
|
||||
|
||||
std::vector<int> pred_idx;
|
||||
for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1] * 2); n += 2) {
|
||||
pred_idx.emplace_back(rec_idx[n]);
|
||||
}
|
||||
return pred_idx;
|
||||
}
|
||||
|
||||
float OCR_PPredictor::postprocess_rec_score(const PredictorOutput &res) {
|
||||
const float *predict_batch = res.get_float_data();
|
||||
const std::vector<int64_t> predict_shape = res.get_shape();
|
||||
const std::vector<std::vector<uint64_t>> predict_lod = res.get_lod();
|
||||
int blank = predict_shape[1];
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
|
||||
int argmax_idx = argmax(predict_batch + n * predict_shape[1],
|
||||
predict_batch + (n + 1) * predict_shape[1]);
|
||||
float max_value = predict_batch[n * predict_shape[1] + argmax_idx];
|
||||
if (blank - 1 - argmax_idx > 1e-5) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
if (count == 0) {
|
||||
LOGE("calc score count 0");
|
||||
} else {
|
||||
score /= count;
|
||||
}
|
||||
LOGI("calc score: %f", score);
|
||||
return score;
|
||||
}
|
||||
|
||||
NET_TYPE OCR_PPredictor::get_net_flag() const { return NET_OCR; }
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
//
|
||||
// Created by fujiayi on 2020/7/1.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ppredictor.h"
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <paddle_api.h>
|
||||
#include <string>
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
/**
|
||||
* Config
|
||||
*/
|
||||
struct OCR_Config {
|
||||
int thread_num = 4; // Thread num
|
||||
paddle::lite_api::PowerMode mode =
|
||||
paddle::lite_api::LITE_POWER_HIGH; // PaddleLite Mode
|
||||
};
|
||||
|
||||
/**
|
||||
* PolyGone Result
|
||||
*/
|
||||
struct OCRPredictResult {
|
||||
std::vector<int> word_index;
|
||||
std::vector<std::vector<int>> points;
|
||||
float score;
|
||||
};
|
||||
|
||||
/**
|
||||
* OCR there are 2 models
|
||||
* 1. First model(det),select polygones to show where are the texts
|
||||
* 2. crop from the origin images, use these polygones to infer
|
||||
*/
|
||||
class OCR_PPredictor : public PPredictor_Interface {
|
||||
public:
|
||||
OCR_PPredictor(const OCR_Config &config);
|
||||
|
||||
virtual ~OCR_PPredictor() {}
|
||||
|
||||
/**
|
||||
* 初始化二个模型的Predictor
|
||||
* @param det_model_content
|
||||
* @param rec_model_content
|
||||
* @return
|
||||
*/
|
||||
int init(const std::string &det_model_content,
|
||||
const std::string &rec_model_content,
|
||||
const std::string &cls_model_content);
|
||||
int init_from_file(const std::string &det_model_path,
|
||||
const std::string &rec_model_path,
|
||||
const std::string &cls_model_path);
|
||||
/**
|
||||
* Return OCR result
|
||||
* @param dims
|
||||
* @param input_data
|
||||
* @param input_len
|
||||
* @param net_flag
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
virtual std::vector<OCRPredictResult>
|
||||
infer_ocr(const std::vector<int64_t> &dims, const float *input_data,
|
||||
int input_len, int net_flag, cv::Mat &origin);
|
||||
|
||||
virtual NET_TYPE get_net_flag() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* calcul Polygone from the result image of first model
|
||||
* @param pred
|
||||
* @param output_height
|
||||
* @param output_width
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
std::vector<std::vector<std::vector<int>>>
|
||||
calc_filtered_boxes(const float *pred, int pred_size, int output_height,
|
||||
int output_width, const cv::Mat &origin);
|
||||
|
||||
/**
|
||||
* infer for second model
|
||||
*
|
||||
* @param boxes
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
std::vector<OCRPredictResult>
|
||||
infer_rec(const std::vector<std::vector<std::vector<int>>> &boxes,
|
||||
const cv::Mat &origin);
|
||||
|
||||
/**
|
||||
* infer for cls model
|
||||
*
|
||||
* @param boxes
|
||||
* @param origin
|
||||
* @return
|
||||
*/
|
||||
cv::Mat infer_cls(const cv::Mat &origin, float thresh = 0.9);
|
||||
|
||||
/**
|
||||
* Postprocess or sencod model to extract text
|
||||
* @param res
|
||||
* @return
|
||||
*/
|
||||
std::vector<int> postprocess_rec_word_index(const PredictorOutput &res);
|
||||
|
||||
/**
|
||||
* calculate confidence of second model text result
|
||||
* @param res
|
||||
* @return
|
||||
*/
|
||||
float postprocess_rec_score(const PredictorOutput &res);
|
||||
|
||||
std::unique_ptr<PPredictor> _det_predictor;
|
||||
std::unique_ptr<PPredictor> _rec_predictor;
|
||||
std::unique_ptr<PPredictor> _cls_predictor;
|
||||
OCR_Config _config;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
#include "ppredictor.h"
|
||||
#include "common.h"
|
||||
|
||||
namespace ppredictor {
|
||||
PPredictor::PPredictor(int thread_num, int net_flag,
|
||||
paddle::lite_api::PowerMode mode)
|
||||
: _thread_num(thread_num), _net_flag(net_flag), _mode(mode) {}
|
||||
|
||||
int PPredictor::init_nb(const std::string &model_content) {
|
||||
paddle::lite_api::MobileConfig config;
|
||||
config.set_model_from_buffer(model_content);
|
||||
return _init(config);
|
||||
}
|
||||
|
||||
int PPredictor::init_from_file(const std::string &model_content) {
|
||||
paddle::lite_api::MobileConfig config;
|
||||
config.set_model_from_file(model_content);
|
||||
return _init(config);
|
||||
}
|
||||
|
||||
template <typename ConfigT> int PPredictor::_init(ConfigT &config) {
|
||||
config.set_threads(_thread_num);
|
||||
config.set_power_mode(_mode);
|
||||
_predictor = paddle::lite_api::CreatePaddlePredictor(config);
|
||||
LOGI("paddle instance created");
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
PredictorInput PPredictor::get_input(int index) {
|
||||
PredictorInput input{_predictor->GetInput(index), index, _net_flag};
|
||||
_is_input_get = true;
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<PredictorInput> PPredictor::get_inputs(int num) {
|
||||
std::vector<PredictorInput> results;
|
||||
for (int i = 0; i < num; i++) {
|
||||
results.emplace_back(get_input(i));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
PredictorInput PPredictor::get_first_input() { return get_input(0); }
|
||||
|
||||
std::vector<PredictorOutput> PPredictor::infer() {
|
||||
LOGI("infer Run start %d", _net_flag);
|
||||
std::vector<PredictorOutput> results;
|
||||
if (!_is_input_get) {
|
||||
return results;
|
||||
}
|
||||
_predictor->Run();
|
||||
LOGI("infer Run end");
|
||||
|
||||
for (int i = 0; i < _predictor->GetOutputNames().size(); i++) {
|
||||
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
|
||||
_predictor->GetOutput(i);
|
||||
LOGI("output tensor[%d] size %ld", i, product(output_tensor->shape()));
|
||||
PredictorOutput result{std::move(output_tensor), i, _net_flag};
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
NET_TYPE PPredictor::get_net_flag() const { return (NET_TYPE)_net_flag; }
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
|
||||
#include "paddle_api.h"
|
||||
#include "predictor_input.h"
|
||||
#include "predictor_output.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
/**
|
||||
* PaddleLite Preditor Common Interface
|
||||
*/
|
||||
class PPredictor_Interface {
|
||||
public:
|
||||
virtual ~PPredictor_Interface() {}
|
||||
|
||||
virtual NET_TYPE get_net_flag() const = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Common Predictor
|
||||
*/
|
||||
class PPredictor : public PPredictor_Interface {
|
||||
public:
|
||||
PPredictor(
|
||||
int thread_num, int net_flag = 0,
|
||||
paddle::lite_api::PowerMode mode = paddle::lite_api::LITE_POWER_HIGH);
|
||||
|
||||
virtual ~PPredictor() {}
|
||||
|
||||
/**
|
||||
* init paddlitelite opt model,nb format ,or use ini_paddle
|
||||
* @param model_content
|
||||
* @return 0
|
||||
*/
|
||||
virtual int init_nb(const std::string &model_content);
|
||||
|
||||
virtual int init_from_file(const std::string &model_content);
|
||||
|
||||
std::vector<PredictorOutput> infer();
|
||||
|
||||
std::shared_ptr<paddle::lite_api::PaddlePredictor> get_predictor() {
|
||||
return _predictor;
|
||||
}
|
||||
|
||||
virtual std::vector<PredictorInput> get_inputs(int num);
|
||||
|
||||
virtual PredictorInput get_input(int index);
|
||||
|
||||
virtual PredictorInput get_first_input();
|
||||
|
||||
virtual NET_TYPE get_net_flag() const;
|
||||
|
||||
protected:
|
||||
template <typename ConfigT> int _init(ConfigT &config);
|
||||
|
||||
private:
|
||||
int _thread_num;
|
||||
paddle::lite_api::PowerMode _mode;
|
||||
std::shared_ptr<paddle::lite_api::PaddlePredictor> _predictor;
|
||||
bool _is_input_get = false;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#include "predictor_input.h"
|
||||
|
||||
namespace ppredictor {
|
||||
|
||||
void PredictorInput::set_dims(std::vector<int64_t> dims) {
|
||||
// yolov3
|
||||
if (_net_flag == 101 && _index == 1) {
|
||||
_tensor->Resize({1, 2});
|
||||
_tensor->mutable_data<int>()[0] = (int)dims.at(2);
|
||||
_tensor->mutable_data<int>()[1] = (int)dims.at(3);
|
||||
} else {
|
||||
_tensor->Resize(dims);
|
||||
}
|
||||
_is_dims_set = true;
|
||||
}
|
||||
|
||||
float *PredictorInput::get_mutable_float_data() {
|
||||
if (!_is_dims_set) {
|
||||
LOGE("PredictorInput::set_dims is not called");
|
||||
}
|
||||
return _tensor->mutable_data<float>();
|
||||
}
|
||||
|
||||
void PredictorInput::set_data(const float *input_data, int input_float_len) {
|
||||
float *input_raw_data = get_mutable_float_data();
|
||||
memcpy(input_raw_data, input_data, input_float_len * sizeof(float));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <paddle_api.h>
|
||||
#include <vector>
|
||||
|
||||
namespace ppredictor {
|
||||
class PredictorInput {
|
||||
public:
|
||||
PredictorInput(std::unique_ptr<paddle::lite_api::Tensor> &&tensor, int index,
|
||||
int net_flag)
|
||||
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
|
||||
|
||||
void set_dims(std::vector<int64_t> dims);
|
||||
|
||||
float *get_mutable_float_data();
|
||||
|
||||
void set_data(const float *input_data, int input_float_len);
|
||||
|
||||
private:
|
||||
std::unique_ptr<paddle::lite_api::Tensor> _tensor;
|
||||
bool _is_dims_set = false;
|
||||
int _index;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#include "predictor_output.h"
|
||||
namespace ppredictor {
|
||||
const float *PredictorOutput::get_float_data() const {
|
||||
return _tensor->data<float>();
|
||||
}
|
||||
|
||||
const int *PredictorOutput::get_int_data() const {
|
||||
return _tensor->data<int>();
|
||||
}
|
||||
|
||||
const std::vector<std::vector<uint64_t>> PredictorOutput::get_lod() const {
|
||||
return _tensor->lod();
|
||||
}
|
||||
|
||||
int64_t PredictorOutput::get_size() const {
|
||||
if (_net_flag == NET_OCR) {
|
||||
return _tensor->shape().at(2) * _tensor->shape().at(3);
|
||||
} else {
|
||||
return product(_tensor->shape());
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<int64_t> PredictorOutput::get_shape() const {
|
||||
return _tensor->shape();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <paddle_api.h>
|
||||
#include <vector>
|
||||
|
||||
namespace ppredictor {
|
||||
class PredictorOutput {
|
||||
public:
|
||||
PredictorOutput() {}
|
||||
PredictorOutput(std::unique_ptr<const paddle::lite_api::Tensor> &&tensor,
|
||||
int index, int net_flag)
|
||||
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
|
||||
|
||||
const float *get_float_data() const;
|
||||
const int *get_int_data() const;
|
||||
int64_t get_size() const;
|
||||
const std::vector<std::vector<uint64_t>> get_lod() const;
|
||||
const std::vector<int64_t> get_shape() const;
|
||||
|
||||
std::vector<float> data; // return float, or use data_int
|
||||
std::vector<int> data_int; // several layers return int ,or use data
|
||||
std::vector<int64_t> shape; // PaddleLite output shape
|
||||
std::vector<std::vector<uint64_t>> lod; // PaddleLite output lod
|
||||
|
||||
private:
|
||||
std::unique_ptr<const paddle::lite_api::Tensor> _tensor;
|
||||
int _index;
|
||||
int _net_flag;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
#include "preprocess.h"
|
||||
#include <android/bitmap.h>
|
||||
|
||||
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap) {
|
||||
AndroidBitmapInfo info;
|
||||
int result = AndroidBitmap_getInfo(env, bitmap, &info);
|
||||
if (result != ANDROID_BITMAP_RESULT_SUCCESS) {
|
||||
LOGE("AndroidBitmap_getInfo failed, result: %d", result);
|
||||
return cv::Mat{};
|
||||
}
|
||||
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
|
||||
LOGE("Bitmap format is not RGBA_8888 !");
|
||||
return cv::Mat{};
|
||||
}
|
||||
unsigned char *srcData = NULL;
|
||||
AndroidBitmap_lockPixels(env, bitmap, (void **)&srcData);
|
||||
cv::Mat mat = cv::Mat::zeros(info.height, info.width, CV_8UC4);
|
||||
memcpy(mat.data, srcData, info.height * info.width * 4);
|
||||
AndroidBitmap_unlockPixels(env, bitmap);
|
||||
cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR);
|
||||
/**
|
||||
if (!cv::imwrite("/sdcard/1/copy.jpg", mat)){
|
||||
LOGE("Write image failed " );
|
||||
}
|
||||
*/
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
cv::Mat resize_img(const cv::Mat &img, int height, int width) {
|
||||
if (img.rows == height && img.cols == width) {
|
||||
return img;
|
||||
}
|
||||
cv::Mat new_img;
|
||||
cv::resize(img, new_img, cv::Size(height, width));
|
||||
return new_img;
|
||||
}
|
||||
|
||||
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
|
||||
void neon_mean_scale(const float *din, float *dout, int size,
|
||||
const std::vector<float> &mean,
|
||||
const std::vector<float> &scale) {
|
||||
if (mean.size() != 3 || scale.size() != 3) {
|
||||
LOGE("[ERROR] mean or scale size must equal to 3");
|
||||
return;
|
||||
}
|
||||
|
||||
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
|
||||
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
|
||||
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
|
||||
float32x4_t vscale0 = vdupq_n_f32(scale[0]);
|
||||
float32x4_t vscale1 = vdupq_n_f32(scale[1]);
|
||||
float32x4_t vscale2 = vdupq_n_f32(scale[2]);
|
||||
|
||||
float *dout_c0 = dout;
|
||||
float *dout_c1 = dout + size;
|
||||
float *dout_c2 = dout + size * 2;
|
||||
|
||||
int i = 0;
|
||||
for (; i < size - 3; i += 4) {
|
||||
float32x4x3_t vin3 = vld3q_f32(din);
|
||||
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
|
||||
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
|
||||
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
|
||||
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
|
||||
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
|
||||
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
|
||||
vst1q_f32(dout_c0, vs0);
|
||||
vst1q_f32(dout_c1, vs1);
|
||||
vst1q_f32(dout_c2, vs2);
|
||||
|
||||
din += 12;
|
||||
dout_c0 += 4;
|
||||
dout_c1 += 4;
|
||||
dout_c2 += 4;
|
||||
}
|
||||
for (; i < size; i++) {
|
||||
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
|
||||
*(dout_c1++) = (*(din++) - mean[1]) * scale[1];
|
||||
*(dout_c2++) = (*(din++) - mean[2]) * scale[2];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <jni.h>
|
||||
#include <opencv2/opencv.hpp>
|
||||
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap);
|
||||
|
||||
cv::Mat resize_img(const cv::Mat &img, int height, int width);
|
||||
|
||||
void neon_mean_scale(const float *din, float *dout, int size,
|
||||
const std::vector<float> &mean,
|
||||
const std::vector<float> &scale);
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.res.Configuration;
|
||||
import android.os.Bundle;
|
||||
import android.preference.PreferenceActivity;
|
||||
import android.view.MenuInflater;
|
||||
import android.view.View;
|
||||
import android.view.ViewGroup;
|
||||
|
||||
import androidx.annotation.LayoutRes;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.appcompat.app.ActionBar;
|
||||
import androidx.appcompat.app.AppCompatDelegate;
|
||||
import androidx.appcompat.widget.Toolbar;
|
||||
|
||||
/**
|
||||
* A {@link PreferenceActivity} which implements and proxies the necessary calls
|
||||
* to be used with AppCompat.
|
||||
* <p>
|
||||
* This technique can be used with an {@link android.app.Activity} class, not just
|
||||
* {@link PreferenceActivity}.
|
||||
*/
|
||||
public abstract class AppCompatPreferenceActivity extends PreferenceActivity {
|
||||
private AppCompatDelegate mDelegate;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
getDelegate().installViewFactory();
|
||||
getDelegate().onCreate(savedInstanceState);
|
||||
super.onCreate(savedInstanceState);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPostCreate(Bundle savedInstanceState) {
|
||||
super.onPostCreate(savedInstanceState);
|
||||
getDelegate().onPostCreate(savedInstanceState);
|
||||
}
|
||||
|
||||
public ActionBar getSupportActionBar() {
|
||||
return getDelegate().getSupportActionBar();
|
||||
}
|
||||
|
||||
public void setSupportActionBar(@Nullable Toolbar toolbar) {
|
||||
getDelegate().setSupportActionBar(toolbar);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MenuInflater getMenuInflater() {
|
||||
return getDelegate().getMenuInflater();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(@LayoutRes int layoutResID) {
|
||||
getDelegate().setContentView(layoutResID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(View view) {
|
||||
getDelegate().setContentView(view);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContentView(View view, ViewGroup.LayoutParams params) {
|
||||
getDelegate().setContentView(view, params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addContentView(View view, ViewGroup.LayoutParams params) {
|
||||
getDelegate().addContentView(view, params);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPostResume() {
|
||||
super.onPostResume();
|
||||
getDelegate().onPostResume();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onTitleChanged(CharSequence title, int color) {
|
||||
super.onTitleChanged(title, color);
|
||||
getDelegate().setTitle(title);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onConfigurationChanged(Configuration newConfig) {
|
||||
super.onConfigurationChanged(newConfig);
|
||||
getDelegate().onConfigurationChanged(newConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onStop() {
|
||||
super.onStop();
|
||||
getDelegate().onStop();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
super.onDestroy();
|
||||
getDelegate().onDestroy();
|
||||
}
|
||||
|
||||
public void invalidateOptionsMenu() {
|
||||
getDelegate().invalidateOptionsMenu();
|
||||
}
|
||||
|
||||
private AppCompatDelegate getDelegate() {
|
||||
if (mDelegate == null) {
|
||||
mDelegate = AppCompatDelegate.create(this, null);
|
||||
}
|
||||
return mDelegate;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,505 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.Manifest;
|
||||
import android.app.ProgressDialog;
|
||||
import android.content.ContentResolver;
|
||||
import android.content.Context;
|
||||
import android.content.Intent;
|
||||
import android.content.SharedPreferences;
|
||||
import android.content.pm.PackageManager;
|
||||
import android.database.Cursor;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.BitmapFactory;
|
||||
import android.graphics.drawable.BitmapDrawable;
|
||||
import android.media.ExifInterface;
|
||||
import android.content.res.AssetManager;
|
||||
import android.net.Uri;
|
||||
import android.os.Bundle;
|
||||
import android.os.Environment;
|
||||
import android.os.Handler;
|
||||
import android.os.HandlerThread;
|
||||
import android.os.Message;
|
||||
import android.preference.PreferenceManager;
|
||||
import android.provider.MediaStore;
|
||||
import android.text.method.ScrollingMovementMethod;
|
||||
import android.util.Log;
|
||||
import android.view.Menu;
|
||||
import android.view.MenuInflater;
|
||||
import android.view.MenuItem;
|
||||
import android.view.View;
|
||||
import android.widget.ImageView;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import androidx.core.app.ActivityCompat;
|
||||
import androidx.core.content.ContextCompat;
|
||||
import androidx.core.content.FileProvider;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
private static final String TAG = MainActivity.class.getSimpleName();
|
||||
public static final int OPEN_GALLERY_REQUEST_CODE = 0;
|
||||
public static final int TAKE_PHOTO_REQUEST_CODE = 1;
|
||||
|
||||
public static final int REQUEST_LOAD_MODEL = 0;
|
||||
public static final int REQUEST_RUN_MODEL = 1;
|
||||
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
|
||||
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
|
||||
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
|
||||
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
|
||||
|
||||
protected ProgressDialog pbLoadModel = null;
|
||||
protected ProgressDialog pbRunModel = null;
|
||||
|
||||
protected Handler receiver = null; // Receive messages from worker thread
|
||||
protected Handler sender = null; // Send command to worker thread
|
||||
protected HandlerThread worker = null; // Worker thread to load&run model
|
||||
|
||||
// UI components of object detection
|
||||
protected TextView tvInputSetting;
|
||||
protected TextView tvStatus;
|
||||
protected ImageView ivInputImage;
|
||||
protected TextView tvOutputResult;
|
||||
protected TextView tvInferenceTime;
|
||||
|
||||
// Model settings of object detection
|
||||
protected String modelPath = "";
|
||||
protected String labelPath = "";
|
||||
protected String imagePath = "";
|
||||
protected int cpuThreadNum = 1;
|
||||
protected String cpuPowerMode = "";
|
||||
protected String inputColorFormat = "";
|
||||
protected long[] inputShape = new long[]{};
|
||||
protected float[] inputMean = new float[]{};
|
||||
protected float[] inputStd = new float[]{};
|
||||
protected float scoreThreshold = 0.1f;
|
||||
private String currentPhotoPath;
|
||||
private AssetManager assetManager =null;
|
||||
|
||||
protected Predictor predictor = new Predictor();
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
|
||||
// Clear all setting items to avoid app crashing due to the incorrect settings
|
||||
SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.clear();
|
||||
editor.apply();
|
||||
|
||||
// Setup the UI components
|
||||
tvInputSetting = findViewById(R.id.tv_input_setting);
|
||||
tvStatus = findViewById(R.id.tv_model_img_status);
|
||||
ivInputImage = findViewById(R.id.iv_input_image);
|
||||
tvInferenceTime = findViewById(R.id.tv_inference_time);
|
||||
tvOutputResult = findViewById(R.id.tv_output_result);
|
||||
tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance());
|
||||
tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance());
|
||||
|
||||
// Prepare the worker thread for mode loading and inference
|
||||
receiver = new Handler() {
|
||||
@Override
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case RESPONSE_LOAD_MODEL_SUCCESSED:
|
||||
if(pbLoadModel!=null && pbLoadModel.isShowing()){
|
||||
pbLoadModel.dismiss();
|
||||
}
|
||||
onLoadModelSuccessed();
|
||||
break;
|
||||
case RESPONSE_LOAD_MODEL_FAILED:
|
||||
if(pbLoadModel!=null && pbLoadModel.isShowing()){
|
||||
pbLoadModel.dismiss();
|
||||
}
|
||||
Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
|
||||
onLoadModelFailed();
|
||||
break;
|
||||
case RESPONSE_RUN_MODEL_SUCCESSED:
|
||||
if(pbRunModel!=null && pbRunModel.isShowing()){
|
||||
pbRunModel.dismiss();
|
||||
}
|
||||
onRunModelSuccessed();
|
||||
break;
|
||||
case RESPONSE_RUN_MODEL_FAILED:
|
||||
if(pbRunModel!=null && pbRunModel.isShowing()){
|
||||
pbRunModel.dismiss();
|
||||
}
|
||||
Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
|
||||
onRunModelFailed();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
worker = new HandlerThread("Predictor Worker");
|
||||
worker.start();
|
||||
sender = new Handler(worker.getLooper()) {
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case REQUEST_LOAD_MODEL:
|
||||
// Load model and reload test image
|
||||
if (onLoadModel()) {
|
||||
receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_SUCCESSED);
|
||||
} else {
|
||||
receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_FAILED);
|
||||
}
|
||||
break;
|
||||
case REQUEST_RUN_MODEL:
|
||||
// Run model if model is loaded
|
||||
if (onRunModel()) {
|
||||
receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_SUCCESSED);
|
||||
} else {
|
||||
receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_FAILED);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
boolean settingsChanged = false;
|
||||
String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
String label_path = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY),
|
||||
getString(R.string.LABEL_PATH_DEFAULT));
|
||||
String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
|
||||
getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
settingsChanged |= !model_path.equalsIgnoreCase(modelPath);
|
||||
settingsChanged |= !label_path.equalsIgnoreCase(labelPath);
|
||||
settingsChanged |= !image_path.equalsIgnoreCase(imagePath);
|
||||
int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
|
||||
getString(R.string.CPU_THREAD_NUM_DEFAULT)));
|
||||
settingsChanged |= cpu_thread_num != cpuThreadNum;
|
||||
String cpu_power_mode =
|
||||
sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
|
||||
getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
settingsChanged |= !cpu_power_mode.equalsIgnoreCase(cpuPowerMode);
|
||||
String input_color_format =
|
||||
sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
settingsChanged |= !input_color_format.equalsIgnoreCase(inputColorFormat);
|
||||
long[] input_shape =
|
||||
Utils.parseLongsFromString(sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
|
||||
getString(R.string.INPUT_SHAPE_DEFAULT)), ",");
|
||||
float[] input_mean =
|
||||
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
|
||||
getString(R.string.INPUT_MEAN_DEFAULT)), ",");
|
||||
float[] input_std =
|
||||
Utils.parseFloatsFromString(sharedPreferences.getString(getString(R.string.INPUT_STD_KEY)
|
||||
, getString(R.string.INPUT_STD_DEFAULT)), ",");
|
||||
settingsChanged |= input_shape.length != inputShape.length;
|
||||
settingsChanged |= input_mean.length != inputMean.length;
|
||||
settingsChanged |= input_std.length != inputStd.length;
|
||||
if (!settingsChanged) {
|
||||
for (int i = 0; i < input_shape.length; i++) {
|
||||
settingsChanged |= input_shape[i] != inputShape[i];
|
||||
}
|
||||
for (int i = 0; i < input_mean.length; i++) {
|
||||
settingsChanged |= input_mean[i] != inputMean[i];
|
||||
}
|
||||
for (int i = 0; i < input_std.length; i++) {
|
||||
settingsChanged |= input_std[i] != inputStd[i];
|
||||
}
|
||||
}
|
||||
float score_threshold =
|
||||
Float.parseFloat(sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
getString(R.string.SCORE_THRESHOLD_DEFAULT)));
|
||||
settingsChanged |= scoreThreshold != score_threshold;
|
||||
if (settingsChanged) {
|
||||
modelPath = model_path;
|
||||
labelPath = label_path;
|
||||
imagePath = image_path;
|
||||
cpuThreadNum = cpu_thread_num;
|
||||
cpuPowerMode = cpu_power_mode;
|
||||
inputColorFormat = input_color_format;
|
||||
inputShape = input_shape;
|
||||
inputMean = input_mean;
|
||||
inputStd = input_std;
|
||||
scoreThreshold = score_threshold;
|
||||
// Update UI
|
||||
tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\n" + "CPU" +
|
||||
" Thread Num: " + Integer.toString(cpuThreadNum) + "\n" + "CPU Power Mode: " + cpuPowerMode);
|
||||
tvInputSetting.scrollTo(0, 0);
|
||||
// Reload model if configure has been changed
|
||||
// loadModel();
|
||||
set_img();
|
||||
}
|
||||
}
|
||||
|
||||
public void loadModel() {
|
||||
pbLoadModel = ProgressDialog.show(this, "", "loading model...", false, false);
|
||||
sender.sendEmptyMessage(REQUEST_LOAD_MODEL);
|
||||
}
|
||||
|
||||
public void runModel() {
|
||||
pbRunModel = ProgressDialog.show(this, "", "running model...", false, false);
|
||||
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
|
||||
}
|
||||
|
||||
public boolean onLoadModel() {
|
||||
return predictor.init(MainActivity.this, modelPath, labelPath, cpuThreadNum,
|
||||
cpuPowerMode,
|
||||
inputColorFormat,
|
||||
inputShape, inputMean,
|
||||
inputStd, scoreThreshold);
|
||||
}
|
||||
|
||||
public boolean onRunModel() {
|
||||
return predictor.isLoaded() && predictor.runModel();
|
||||
}
|
||||
|
||||
public void onLoadModelSuccessed() {
|
||||
// Load test image from path and run model
|
||||
tvStatus.setText("STATUS: load model successed");
|
||||
}
|
||||
|
||||
public void onLoadModelFailed() {
|
||||
tvStatus.setText("STATUS: load model failed");
|
||||
}
|
||||
|
||||
public void onRunModelSuccessed() {
|
||||
tvStatus.setText("STATUS: run model successed");
|
||||
// Obtain results and update UI
|
||||
tvInferenceTime.setText("Inference time: " + predictor.inferenceTime() + " ms");
|
||||
Bitmap outputImage = predictor.outputImage();
|
||||
if (outputImage != null) {
|
||||
ivInputImage.setImageBitmap(outputImage);
|
||||
}
|
||||
tvOutputResult.setText(predictor.outputResult());
|
||||
tvOutputResult.scrollTo(0, 0);
|
||||
}
|
||||
|
||||
public void onRunModelFailed() {
|
||||
tvStatus.setText("STATUS: run model failed");
|
||||
}
|
||||
|
||||
public void onImageChanged(Bitmap image) {
|
||||
// Rerun model if users pick test image from gallery or camera
|
||||
if (image != null && predictor.isLoaded()) {
|
||||
predictor.setInputImage(image);
|
||||
runModel();
|
||||
}
|
||||
}
|
||||
|
||||
public void set_img() {
|
||||
// Load test image from path and run model
|
||||
try {
|
||||
assetManager= getAssets();
|
||||
InputStream in=assetManager.open(imagePath);
|
||||
Bitmap bmp=BitmapFactory.decodeStream(in);
|
||||
ivInputImage.setImageBitmap(bmp);
|
||||
} catch (IOException e) {
|
||||
Toast.makeText(MainActivity.this, "Load image failed!", Toast.LENGTH_SHORT).show();
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void onSettingsClicked() {
|
||||
startActivity(new Intent(MainActivity.this, SettingsActivity.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onCreateOptionsMenu(Menu menu) {
|
||||
MenuInflater inflater = getMenuInflater();
|
||||
inflater.inflate(R.menu.menu_action_options, menu);
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean onPrepareOptionsMenu(Menu menu) {
|
||||
boolean isLoaded = predictor.isLoaded();
|
||||
return super.onPrepareOptionsMenu(menu);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onOptionsItemSelected(MenuItem item) {
|
||||
switch (item.getItemId()) {
|
||||
case android.R.id.home:
|
||||
finish();
|
||||
break;
|
||||
case R.id.settings:
|
||||
if (requestAllPermissions()) {
|
||||
// Make sure we have SDCard r&w permissions to load model from SDCard
|
||||
onSettingsClicked();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return super.onOptionsItemSelected(item);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
|
||||
@NonNull int[] grantResults) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
if (grantResults[0] != PackageManager.PERMISSION_GRANTED || grantResults[1] != PackageManager.PERMISSION_GRANTED) {
|
||||
Toast.makeText(this, "Permission Denied", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean requestAllPermissions() {
|
||||
if (ContextCompat.checkSelfPermission(this, Manifest.permission.WRITE_EXTERNAL_STORAGE)
|
||||
!= PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this,
|
||||
Manifest.permission.CAMERA)
|
||||
!= PackageManager.PERMISSION_GRANTED) {
|
||||
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE,
|
||||
Manifest.permission.CAMERA},
|
||||
0);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void openGallery() {
|
||||
Intent intent = new Intent(Intent.ACTION_PICK, null);
|
||||
intent.setDataAndType(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, "image/*");
|
||||
startActivityForResult(intent, OPEN_GALLERY_REQUEST_CODE);
|
||||
}
|
||||
|
||||
private void takePhoto() {
|
||||
Intent takePictureIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE);
|
||||
// Ensure that there's a camera activity to handle the intent
|
||||
if (takePictureIntent.resolveActivity(getPackageManager()) != null) {
|
||||
// Create the File where the photo should go
|
||||
File photoFile = null;
|
||||
try {
|
||||
photoFile = createImageFile();
|
||||
} catch (IOException ex) {
|
||||
Log.e("MainActitity", ex.getMessage(), ex);
|
||||
Toast.makeText(MainActivity.this,
|
||||
"Create Camera temp file failed: " + ex.getMessage(), Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
// Continue only if the File was successfully created
|
||||
if (photoFile != null) {
|
||||
Log.i(TAG, "FILEPATH " + getExternalFilesDir("Pictures").getAbsolutePath());
|
||||
Uri photoURI = FileProvider.getUriForFile(this,
|
||||
"com.baidu.paddle.lite.demo.ocr.fileprovider",
|
||||
photoFile);
|
||||
currentPhotoPath = photoFile.getAbsolutePath();
|
||||
takePictureIntent.putExtra(MediaStore.EXTRA_OUTPUT, photoURI);
|
||||
startActivityForResult(takePictureIntent, TAKE_PHOTO_REQUEST_CODE);
|
||||
Log.i(TAG, "startActivityForResult finished");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private File createImageFile() throws IOException {
|
||||
// Create an image file name
|
||||
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
|
||||
String imageFileName = "JPEG_" + timeStamp + "_";
|
||||
File storageDir = getExternalFilesDir(Environment.DIRECTORY_PICTURES);
|
||||
File image = File.createTempFile(
|
||||
imageFileName, /* prefix */
|
||||
".bmp", /* suffix */
|
||||
storageDir /* directory */
|
||||
);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
|
||||
super.onActivityResult(requestCode, resultCode, data);
|
||||
if (resultCode == RESULT_OK) {
|
||||
switch (requestCode) {
|
||||
case OPEN_GALLERY_REQUEST_CODE:
|
||||
if (data == null) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
ContentResolver resolver = getContentResolver();
|
||||
Uri uri = data.getData();
|
||||
Bitmap image = MediaStore.Images.Media.getBitmap(resolver, uri);
|
||||
String[] proj = {MediaStore.Images.Media.DATA};
|
||||
Cursor cursor = managedQuery(uri, proj, null, null, null);
|
||||
cursor.moveToFirst();
|
||||
if (image != null) {
|
||||
// onImageChanged(image);
|
||||
ivInputImage.setImageBitmap(image);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, e.toString());
|
||||
}
|
||||
break;
|
||||
case TAKE_PHOTO_REQUEST_CODE:
|
||||
if (currentPhotoPath != null) {
|
||||
ExifInterface exif = null;
|
||||
try {
|
||||
exif = new ExifInterface(currentPhotoPath);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
int orientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION,
|
||||
ExifInterface.ORIENTATION_UNDEFINED);
|
||||
Log.i(TAG, "rotation " + orientation);
|
||||
Bitmap image = BitmapFactory.decodeFile(currentPhotoPath);
|
||||
image = Utils.rotateBitmap(image, orientation);
|
||||
if (image != null) {
|
||||
// onImageChanged(image);
|
||||
ivInputImage.setImageBitmap(image);
|
||||
}
|
||||
} else {
|
||||
Log.e(TAG, "currentPhotoPath is null");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void btn_load_model_click(View view) {
|
||||
tvStatus.setText("STATUS: load model ......");
|
||||
loadModel();
|
||||
}
|
||||
|
||||
public void btn_run_model_click(View view) {
|
||||
Bitmap image =((BitmapDrawable)ivInputImage.getDrawable()).getBitmap();
|
||||
if(image == null) {
|
||||
tvStatus.setText("STATUS: image is not exists");
|
||||
}
|
||||
else if (!predictor.isLoaded()){
|
||||
tvStatus.setText("STATUS: model is not loaded");
|
||||
}else{
|
||||
tvStatus.setText("STATUS: run model ...... ");
|
||||
predictor.setInputImage(image);
|
||||
runModel();
|
||||
}
|
||||
}
|
||||
public void btn_choice_img_click(View view) {
|
||||
if (requestAllPermissions()) {
|
||||
openGallery();
|
||||
}
|
||||
}
|
||||
|
||||
public void btn_take_photo_click(View view) {
|
||||
if (requestAllPermissions()) {
|
||||
takePhoto();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
if (predictor != null) {
|
||||
predictor.releaseModel();
|
||||
}
|
||||
worker.quit();
|
||||
super.onDestroy();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.BitmapFactory;
|
||||
import android.os.Build;
|
||||
import android.os.Bundle;
|
||||
import android.os.Handler;
|
||||
import android.os.HandlerThread;
|
||||
import android.os.Message;
|
||||
import android.util.Log;
|
||||
import android.view.View;
|
||||
import android.widget.Button;
|
||||
import android.widget.ImageView;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class MiniActivity extends AppCompatActivity {
|
||||
|
||||
|
||||
public static final int REQUEST_LOAD_MODEL = 0;
|
||||
public static final int REQUEST_RUN_MODEL = 1;
|
||||
public static final int REQUEST_UNLOAD_MODEL = 2;
|
||||
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
|
||||
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
|
||||
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
|
||||
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
|
||||
|
||||
private static final String TAG = "MiniActivity";
|
||||
|
||||
protected Handler receiver = null; // Receive messages from worker thread
|
||||
protected Handler sender = null; // Send command to worker thread
|
||||
protected HandlerThread worker = null; // Worker thread to load&run model
|
||||
protected volatile Predictor predictor = null;
|
||||
|
||||
private String assetModelDirPath = "models/ocr_v2_for_cpu";
|
||||
private String assetlabelFilePath = "labels/ppocr_keys_v1.txt";
|
||||
|
||||
private Button button;
|
||||
private ImageView imageView; // image result
|
||||
private TextView textView; // text result
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_mini);
|
||||
|
||||
Log.i(TAG, "SHOW in Logcat");
|
||||
|
||||
// Prepare the worker thread for mode loading and inference
|
||||
worker = new HandlerThread("Predictor Worker");
|
||||
worker.start();
|
||||
sender = new Handler(worker.getLooper()) {
|
||||
public void handleMessage(Message msg) {
|
||||
switch (msg.what) {
|
||||
case REQUEST_LOAD_MODEL:
|
||||
// Load model and reload test image
|
||||
if (!onLoadModel()) {
|
||||
runOnUiThread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
Toast.makeText(MiniActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
});
|
||||
}
|
||||
break;
|
||||
case REQUEST_RUN_MODEL:
|
||||
// Run model if model is loaded
|
||||
final boolean isSuccessed = onRunModel();
|
||||
runOnUiThread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
if (isSuccessed){
|
||||
onRunModelSuccessed();
|
||||
}else{
|
||||
Toast.makeText(MiniActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
sender.sendEmptyMessage(REQUEST_LOAD_MODEL); // corresponding to REQUEST_LOAD_MODEL, to call onLoadModel()
|
||||
|
||||
imageView = findViewById(R.id.imageView);
|
||||
textView = findViewById(R.id.sample_text);
|
||||
button = findViewById(R.id.button);
|
||||
button.setOnClickListener(new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View v) {
|
||||
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
onUnloadModel();
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR2) {
|
||||
worker.quitSafely();
|
||||
} else {
|
||||
worker.quit();
|
||||
}
|
||||
super.onDestroy();
|
||||
}
|
||||
|
||||
/**
|
||||
* call in onCreate, model init
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean onLoadModel() {
|
||||
if (predictor == null) {
|
||||
predictor = new Predictor();
|
||||
}
|
||||
return predictor.init(this, assetModelDirPath, assetlabelFilePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* init engine
|
||||
* call in onCreate
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private boolean onRunModel() {
|
||||
try {
|
||||
String assetImagePath = "images/0.jpg";
|
||||
InputStream imageStream = getAssets().open(assetImagePath);
|
||||
Bitmap image = BitmapFactory.decodeStream(imageStream);
|
||||
// Input is Bitmap
|
||||
predictor.setInputImage(image);
|
||||
return predictor.isLoaded() && predictor.runModel();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void onRunModelSuccessed() {
|
||||
Log.i(TAG, "onRunModelSuccessed");
|
||||
textView.setText(predictor.outputResult);
|
||||
imageView.setImageBitmap(predictor.outputImage);
|
||||
}
|
||||
|
||||
private void onUnloadModel() {
|
||||
if (predictor != null) {
|
||||
predictor.releaseModel();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Bitmap;
|
||||
import android.util.Log;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class OCRPredictorNative {
|
||||
|
||||
private static final AtomicBoolean isSOLoaded = new AtomicBoolean();
|
||||
|
||||
public static void loadLibrary() throws RuntimeException {
|
||||
if (!isSOLoaded.get() && isSOLoaded.compareAndSet(false, true)) {
|
||||
try {
|
||||
System.loadLibrary("Native");
|
||||
} catch (Throwable e) {
|
||||
RuntimeException exception = new RuntimeException(
|
||||
"Load libNative.so failed, please check it exists in apk file.", e);
|
||||
throw exception;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Config config;
|
||||
|
||||
private long nativePointer = 0;
|
||||
|
||||
public OCRPredictorNative(Config config) {
|
||||
this.config = config;
|
||||
loadLibrary();
|
||||
nativePointer = init(config.detModelFilename, config.recModelFilename,config.clsModelFilename,
|
||||
config.cpuThreadNum, config.cpuPower);
|
||||
Log.i("OCRPredictorNative", "load success " + nativePointer);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public ArrayList<OcrResultModel> runImage(float[] inputData, int width, int height, int channels, Bitmap originalImage) {
|
||||
Log.i("OCRPredictorNative", "begin to run image " + inputData.length + " " + width + " " + height);
|
||||
float[] dims = new float[]{1, channels, height, width};
|
||||
float[] rawResults = forward(nativePointer, inputData, dims, originalImage);
|
||||
ArrayList<OcrResultModel> results = postprocess(rawResults);
|
||||
return results;
|
||||
}
|
||||
|
||||
public static class Config {
|
||||
public int cpuThreadNum;
|
||||
public String cpuPower;
|
||||
public String detModelFilename;
|
||||
public String recModelFilename;
|
||||
public String clsModelFilename;
|
||||
|
||||
}
|
||||
|
||||
public void destory(){
|
||||
if (nativePointer > 0) {
|
||||
release(nativePointer);
|
||||
nativePointer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
protected native long init(String detModelPath, String recModelPath,String clsModelPath, int threadNum, String cpuMode);
|
||||
|
||||
protected native float[] forward(long pointer, float[] buf, float[] ddims, Bitmap originalImage);
|
||||
|
||||
protected native void release(long pointer);
|
||||
|
||||
private ArrayList<OcrResultModel> postprocess(float[] raw) {
|
||||
ArrayList<OcrResultModel> results = new ArrayList<OcrResultModel>();
|
||||
int begin = 0;
|
||||
|
||||
while (begin < raw.length) {
|
||||
int point_num = Math.round(raw[begin]);
|
||||
int word_num = Math.round(raw[begin + 1]);
|
||||
OcrResultModel model = parse(raw, begin + 2, point_num, word_num);
|
||||
begin += 2 + 1 + point_num * 2 + word_num;
|
||||
results.add(model);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private OcrResultModel parse(float[] raw, int begin, int pointNum, int wordNum) {
|
||||
int current = begin;
|
||||
OcrResultModel model = new OcrResultModel();
|
||||
model.setConfidence(raw[current]);
|
||||
current++;
|
||||
for (int i = 0; i < pointNum; i++) {
|
||||
model.addPoints(Math.round(raw[current + i * 2]), Math.round(raw[current + i * 2 + 1]));
|
||||
}
|
||||
current += (pointNum * 2);
|
||||
for (int i = 0; i < wordNum; i++) {
|
||||
int index = Math.round(raw[current + i]);
|
||||
model.addWordIndex(index);
|
||||
}
|
||||
Log.i("OCRPredictorNative", "word finished " + wordNum);
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.graphics.Point;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class OcrResultModel {
|
||||
private List<Point> points;
|
||||
private List<Integer> wordIndex;
|
||||
private String label;
|
||||
private float confidence;
|
||||
|
||||
public OcrResultModel() {
|
||||
super();
|
||||
points = new ArrayList<>();
|
||||
wordIndex = new ArrayList<>();
|
||||
}
|
||||
|
||||
public void addPoints(int x, int y) {
|
||||
Point point = new Point(x, y);
|
||||
points.add(point);
|
||||
}
|
||||
|
||||
public void addWordIndex(int index) {
|
||||
wordIndex.add(index);
|
||||
}
|
||||
|
||||
public List<Point> getPoints() {
|
||||
return points;
|
||||
}
|
||||
|
||||
public List<Integer> getWordIndex() {
|
||||
return wordIndex;
|
||||
}
|
||||
|
||||
public String getLabel() {
|
||||
return label;
|
||||
}
|
||||
|
||||
public void setLabel(String label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public float getConfidence() {
|
||||
return confidence;
|
||||
}
|
||||
|
||||
public void setConfidence(float confidence) {
|
||||
this.confidence = confidence;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,357 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.Canvas;
|
||||
import android.graphics.Color;
|
||||
import android.graphics.Paint;
|
||||
import android.graphics.Path;
|
||||
import android.graphics.Point;
|
||||
import android.util.Log;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Vector;
|
||||
|
||||
import static android.graphics.Color.*;
|
||||
|
||||
public class Predictor {
|
||||
private static final String TAG = Predictor.class.getSimpleName();
|
||||
public boolean isLoaded = false;
|
||||
public int warmupIterNum = 1;
|
||||
public int inferIterNum = 1;
|
||||
public int cpuThreadNum = 4;
|
||||
public String cpuPowerMode = "LITE_POWER_HIGH";
|
||||
public String modelPath = "";
|
||||
public String modelName = "";
|
||||
protected OCRPredictorNative paddlePredictor = null;
|
||||
protected float inferenceTime = 0;
|
||||
// Only for object detection
|
||||
protected Vector<String> wordLabels = new Vector<String>();
|
||||
protected String inputColorFormat = "BGR";
|
||||
protected long[] inputShape = new long[]{1, 3, 960};
|
||||
protected float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
|
||||
protected float[] inputStd = new float[]{1.0f / 0.229f, 1.0f / 0.224f, 1.0f / 0.225f};
|
||||
protected float scoreThreshold = 0.1f;
|
||||
protected Bitmap inputImage = null;
|
||||
protected Bitmap outputImage = null;
|
||||
protected volatile String outputResult = "";
|
||||
protected float preprocessTime = 0;
|
||||
protected float postprocessTime = 0;
|
||||
|
||||
|
||||
public Predictor() {
|
||||
}
|
||||
|
||||
public boolean init(Context appCtx, String modelPath, String labelPath) {
|
||||
isLoaded = loadModel(appCtx, modelPath, cpuThreadNum, cpuPowerMode);
|
||||
if (!isLoaded) {
|
||||
return false;
|
||||
}
|
||||
isLoaded = loadLabel(appCtx, labelPath);
|
||||
return isLoaded;
|
||||
}
|
||||
|
||||
|
||||
public boolean init(Context appCtx, String modelPath, String labelPath, int cpuThreadNum, String cpuPowerMode,
|
||||
String inputColorFormat,
|
||||
long[] inputShape, float[] inputMean,
|
||||
float[] inputStd, float scoreThreshold) {
|
||||
if (inputShape.length != 3) {
|
||||
Log.e(TAG, "Size of input shape should be: 3");
|
||||
return false;
|
||||
}
|
||||
if (inputMean.length != inputShape[1]) {
|
||||
Log.e(TAG, "Size of input mean should be: " + Long.toString(inputShape[1]));
|
||||
return false;
|
||||
}
|
||||
if (inputStd.length != inputShape[1]) {
|
||||
Log.e(TAG, "Size of input std should be: " + Long.toString(inputShape[1]));
|
||||
return false;
|
||||
}
|
||||
if (inputShape[0] != 1) {
|
||||
Log.e(TAG, "Only one batch is supported in the image classification demo, you can use any batch size in " +
|
||||
"your Apps!");
|
||||
return false;
|
||||
}
|
||||
if (inputShape[1] != 1 && inputShape[1] != 3) {
|
||||
Log.e(TAG, "Only one/three channels are supported in the image classification demo, you can use any " +
|
||||
"channel size in your Apps!");
|
||||
return false;
|
||||
}
|
||||
if (!inputColorFormat.equalsIgnoreCase("BGR")) {
|
||||
Log.e(TAG, "Only BGR color format is supported.");
|
||||
return false;
|
||||
}
|
||||
boolean isLoaded = init(appCtx, modelPath, labelPath);
|
||||
if (!isLoaded) {
|
||||
return false;
|
||||
}
|
||||
this.inputColorFormat = inputColorFormat;
|
||||
this.inputShape = inputShape;
|
||||
this.inputMean = inputMean;
|
||||
this.inputStd = inputStd;
|
||||
this.scoreThreshold = scoreThreshold;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean loadModel(Context appCtx, String modelPath, int cpuThreadNum, String cpuPowerMode) {
|
||||
// Release model if exists
|
||||
releaseModel();
|
||||
|
||||
// Load model
|
||||
if (modelPath.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
String realPath = modelPath;
|
||||
if (!modelPath.substring(0, 1).equals("/")) {
|
||||
// Read model files from custom path if the first character of mode path is '/'
|
||||
// otherwise copy model to cache from assets
|
||||
realPath = appCtx.getCacheDir() + "/" + modelPath;
|
||||
Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath);
|
||||
}
|
||||
if (realPath.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OCRPredictorNative.Config config = new OCRPredictorNative.Config();
|
||||
config.cpuThreadNum = cpuThreadNum;
|
||||
config.detModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_det_opt.nb";
|
||||
config.recModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_rec_opt.nb";
|
||||
config.clsModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_cls_opt.nb";
|
||||
Log.e("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
|
||||
config.cpuPower = cpuPowerMode;
|
||||
paddlePredictor = new OCRPredictorNative(config);
|
||||
|
||||
this.cpuThreadNum = cpuThreadNum;
|
||||
this.cpuPowerMode = cpuPowerMode;
|
||||
this.modelPath = realPath;
|
||||
this.modelName = realPath.substring(realPath.lastIndexOf("/") + 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
public void releaseModel() {
|
||||
if (paddlePredictor != null) {
|
||||
paddlePredictor.destory();
|
||||
paddlePredictor = null;
|
||||
}
|
||||
isLoaded = false;
|
||||
cpuThreadNum = 1;
|
||||
cpuPowerMode = "LITE_POWER_HIGH";
|
||||
modelPath = "";
|
||||
modelName = "";
|
||||
}
|
||||
|
||||
protected boolean loadLabel(Context appCtx, String labelPath) {
|
||||
wordLabels.clear();
|
||||
wordLabels.add("black");
|
||||
// Load word labels from file
|
||||
try {
|
||||
InputStream assetsInputStream = appCtx.getAssets().open(labelPath);
|
||||
int available = assetsInputStream.available();
|
||||
byte[] lines = new byte[available];
|
||||
assetsInputStream.read(lines);
|
||||
assetsInputStream.close();
|
||||
String words = new String(lines);
|
||||
String[] contents = words.split("\n");
|
||||
for (String content : contents) {
|
||||
wordLabels.add(content);
|
||||
}
|
||||
Log.i(TAG, "Word label size: " + wordLabels.size());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, e.getMessage());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean runModel() {
|
||||
if (inputImage == null || !isLoaded()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Pre-process image, and feed input tensor with pre-processed data
|
||||
|
||||
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
|
||||
|
||||
Date start = new Date();
|
||||
int channels = (int) inputShape[1];
|
||||
int width = scaleImage.getWidth();
|
||||
int height = scaleImage.getHeight();
|
||||
float[] inputData = new float[channels * width * height];
|
||||
if (channels == 3) {
|
||||
int[] channelIdx = null;
|
||||
if (inputColorFormat.equalsIgnoreCase("RGB")) {
|
||||
channelIdx = new int[]{0, 1, 2};
|
||||
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
|
||||
channelIdx = new int[]{2, 1, 0};
|
||||
} else {
|
||||
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
|
||||
"supported!");
|
||||
return false;
|
||||
}
|
||||
int[] channelStride = new int[]{width * height, width * height * 2};
|
||||
int p = scaleImage.getPixel(scaleImage.getWidth() - 1, scaleImage.getHeight() - 1);
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int color = scaleImage.getPixel(x, y);
|
||||
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
|
||||
(float) blue(color) / 255.0f};
|
||||
inputData[y * width + x] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
|
||||
inputData[y * width + x + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
|
||||
inputData[y * width + x + channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
|
||||
|
||||
}
|
||||
}
|
||||
} else if (channels == 1) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int color = inputImage.getPixel(x, y);
|
||||
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
|
||||
inputData[y * width + x] = (gray - inputMean[0]) / inputStd[0];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
|
||||
"supported!");
|
||||
return false;
|
||||
}
|
||||
float[] pixels = inputData;
|
||||
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
|
||||
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
|
||||
Date end = new Date();
|
||||
preprocessTime = (float) (end.getTime() - start.getTime());
|
||||
|
||||
// Warm up
|
||||
for (int i = 0; i < warmupIterNum; i++) {
|
||||
paddlePredictor.runImage(inputData, width, height, channels, inputImage);
|
||||
}
|
||||
warmupIterNum = 0; // do not need warm
|
||||
// Run inference
|
||||
start = new Date();
|
||||
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage);
|
||||
end = new Date();
|
||||
inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
|
||||
|
||||
results = postprocess(results);
|
||||
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime
|
||||
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
|
||||
drawResults(results);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public boolean isLoaded() {
|
||||
return paddlePredictor != null && isLoaded;
|
||||
}
|
||||
|
||||
public String modelPath() {
|
||||
return modelPath;
|
||||
}
|
||||
|
||||
public String modelName() {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
public int cpuThreadNum() {
|
||||
return cpuThreadNum;
|
||||
}
|
||||
|
||||
public String cpuPowerMode() {
|
||||
return cpuPowerMode;
|
||||
}
|
||||
|
||||
public float inferenceTime() {
|
||||
return inferenceTime;
|
||||
}
|
||||
|
||||
public Bitmap inputImage() {
|
||||
return inputImage;
|
||||
}
|
||||
|
||||
public Bitmap outputImage() {
|
||||
return outputImage;
|
||||
}
|
||||
|
||||
public String outputResult() {
|
||||
return outputResult;
|
||||
}
|
||||
|
||||
public float preprocessTime() {
|
||||
return preprocessTime;
|
||||
}
|
||||
|
||||
public float postprocessTime() {
|
||||
return postprocessTime;
|
||||
}
|
||||
|
||||
|
||||
public void setInputImage(Bitmap image) {
|
||||
if (image == null) {
|
||||
return;
|
||||
}
|
||||
this.inputImage = image.copy(Bitmap.Config.ARGB_8888, true);
|
||||
}
|
||||
|
||||
private ArrayList<OcrResultModel> postprocess(ArrayList<OcrResultModel> results) {
|
||||
for (OcrResultModel r : results) {
|
||||
StringBuffer word = new StringBuffer();
|
||||
for (int index : r.getWordIndex()) {
|
||||
if (index >= 0 && index < wordLabels.size()) {
|
||||
word.append(wordLabels.get(index));
|
||||
} else {
|
||||
Log.e(TAG, "Word index is not in label list:" + index);
|
||||
word.append("×");
|
||||
}
|
||||
}
|
||||
r.setLabel(word.toString());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private void drawResults(ArrayList<OcrResultModel> results) {
|
||||
StringBuffer outputResultSb = new StringBuffer("");
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
OcrResultModel result = results.get(i);
|
||||
StringBuilder sb = new StringBuilder("");
|
||||
sb.append(result.getLabel());
|
||||
sb.append(" ").append(result.getConfidence());
|
||||
sb.append("; Points: ");
|
||||
for (Point p : result.getPoints()) {
|
||||
sb.append("(").append(p.x).append(",").append(p.y).append(") ");
|
||||
}
|
||||
Log.i(TAG, sb.toString()); // show LOG in Logcat panel
|
||||
outputResultSb.append(i + 1).append(": ").append(result.getLabel()).append("\n");
|
||||
}
|
||||
outputResult = outputResultSb.toString();
|
||||
outputImage = inputImage;
|
||||
Canvas canvas = new Canvas(outputImage);
|
||||
Paint paintFillAlpha = new Paint();
|
||||
paintFillAlpha.setStyle(Paint.Style.FILL);
|
||||
paintFillAlpha.setColor(Color.parseColor("#3B85F5"));
|
||||
paintFillAlpha.setAlpha(50);
|
||||
|
||||
Paint paint = new Paint();
|
||||
paint.setColor(Color.parseColor("#3B85F5"));
|
||||
paint.setStrokeWidth(5);
|
||||
paint.setStyle(Paint.Style.STROKE);
|
||||
|
||||
for (OcrResultModel result : results) {
|
||||
Path path = new Path();
|
||||
List<Point> points = result.getPoints();
|
||||
path.moveTo(points.get(0).x, points.get(0).y);
|
||||
for (int i = points.size() - 1; i >= 0; i--) {
|
||||
Point p = points.get(i);
|
||||
path.lineTo(p.x, p.y);
|
||||
}
|
||||
canvas.drawPath(path, paint);
|
||||
canvas.drawPath(path, paintFillAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.Bundle;
|
||||
import android.preference.CheckBoxPreference;
|
||||
import android.preference.EditTextPreference;
|
||||
import android.preference.ListPreference;
|
||||
|
||||
import androidx.appcompat.app.ActionBar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class SettingsActivity extends AppCompatPreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener {
|
||||
ListPreference lpChoosePreInstalledModel = null;
|
||||
CheckBoxPreference cbEnableCustomSettings = null;
|
||||
EditTextPreference etModelPath = null;
|
||||
EditTextPreference etLabelPath = null;
|
||||
ListPreference etImagePath = null;
|
||||
ListPreference lpCPUThreadNum = null;
|
||||
ListPreference lpCPUPowerMode = null;
|
||||
ListPreference lpInputColorFormat = null;
|
||||
EditTextPreference etInputShape = null;
|
||||
EditTextPreference etInputMean = null;
|
||||
EditTextPreference etInputStd = null;
|
||||
EditTextPreference etScoreThreshold = null;
|
||||
|
||||
List<String> preInstalledModelPaths = null;
|
||||
List<String> preInstalledLabelPaths = null;
|
||||
List<String> preInstalledImagePaths = null;
|
||||
List<String> preInstalledInputShapes = null;
|
||||
List<String> preInstalledCPUThreadNums = null;
|
||||
List<String> preInstalledCPUPowerModes = null;
|
||||
List<String> preInstalledInputColorFormats = null;
|
||||
List<String> preInstalledInputMeans = null;
|
||||
List<String> preInstalledInputStds = null;
|
||||
List<String> preInstalledScoreThresholds = null;
|
||||
|
||||
@Override
|
||||
public void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
addPreferencesFromResource(R.xml.settings);
|
||||
ActionBar supportActionBar = getSupportActionBar();
|
||||
if (supportActionBar != null) {
|
||||
supportActionBar.setDisplayHomeAsUpEnabled(true);
|
||||
}
|
||||
|
||||
// Initialized pre-installed models
|
||||
preInstalledModelPaths = new ArrayList<String>();
|
||||
preInstalledLabelPaths = new ArrayList<String>();
|
||||
preInstalledImagePaths = new ArrayList<String>();
|
||||
preInstalledInputShapes = new ArrayList<String>();
|
||||
preInstalledCPUThreadNums = new ArrayList<String>();
|
||||
preInstalledCPUPowerModes = new ArrayList<String>();
|
||||
preInstalledInputColorFormats = new ArrayList<String>();
|
||||
preInstalledInputMeans = new ArrayList<String>();
|
||||
preInstalledInputStds = new ArrayList<String>();
|
||||
preInstalledScoreThresholds = new ArrayList<String>();
|
||||
// Add ssd_mobilenet_v1_pascalvoc_for_cpu
|
||||
preInstalledModelPaths.add(getString(R.string.MODEL_PATH_DEFAULT));
|
||||
preInstalledLabelPaths.add(getString(R.string.LABEL_PATH_DEFAULT));
|
||||
preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT));
|
||||
preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
preInstalledInputColorFormats.add(getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
preInstalledInputShapes.add(getString(R.string.INPUT_SHAPE_DEFAULT));
|
||||
preInstalledInputMeans.add(getString(R.string.INPUT_MEAN_DEFAULT));
|
||||
preInstalledInputStds.add(getString(R.string.INPUT_STD_DEFAULT));
|
||||
preInstalledScoreThresholds.add(getString(R.string.SCORE_THRESHOLD_DEFAULT));
|
||||
|
||||
// Setup UI components
|
||||
lpChoosePreInstalledModel =
|
||||
(ListPreference) findPreference(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY));
|
||||
String[] preInstalledModelNames = new String[preInstalledModelPaths.size()];
|
||||
for (int i = 0; i < preInstalledModelPaths.size(); i++) {
|
||||
preInstalledModelNames[i] =
|
||||
preInstalledModelPaths.get(i).substring(preInstalledModelPaths.get(i).lastIndexOf("/") + 1);
|
||||
}
|
||||
lpChoosePreInstalledModel.setEntries(preInstalledModelNames);
|
||||
lpChoosePreInstalledModel.setEntryValues(preInstalledModelPaths.toArray(new String[preInstalledModelPaths.size()]));
|
||||
cbEnableCustomSettings =
|
||||
(CheckBoxPreference) findPreference(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY));
|
||||
etModelPath = (EditTextPreference) findPreference(getString(R.string.MODEL_PATH_KEY));
|
||||
etModelPath.setTitle("Model Path (SDCard: " + Utils.getSDCardDirectory() + ")");
|
||||
etLabelPath = (EditTextPreference) findPreference(getString(R.string.LABEL_PATH_KEY));
|
||||
etImagePath = (ListPreference) findPreference(getString(R.string.IMAGE_PATH_KEY));
|
||||
lpCPUThreadNum =
|
||||
(ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY));
|
||||
lpCPUPowerMode =
|
||||
(ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY));
|
||||
lpInputColorFormat =
|
||||
(ListPreference) findPreference(getString(R.string.INPUT_COLOR_FORMAT_KEY));
|
||||
etInputShape = (EditTextPreference) findPreference(getString(R.string.INPUT_SHAPE_KEY));
|
||||
etInputMean = (EditTextPreference) findPreference(getString(R.string.INPUT_MEAN_KEY));
|
||||
etInputStd = (EditTextPreference) findPreference(getString(R.string.INPUT_STD_KEY));
|
||||
etScoreThreshold = (EditTextPreference) findPreference(getString(R.string.SCORE_THRESHOLD_KEY));
|
||||
}
|
||||
|
||||
private void reloadPreferenceAndUpdateUI() {
|
||||
SharedPreferences sharedPreferences = getPreferenceScreen().getSharedPreferences();
|
||||
boolean enableCustomSettings =
|
||||
sharedPreferences.getBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
|
||||
String modelPath = sharedPreferences.getString(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
int modelIdx = lpChoosePreInstalledModel.findIndexOfValue(modelPath);
|
||||
if (modelIdx >= 0 && modelIdx < preInstalledModelPaths.size()) {
|
||||
if (!enableCustomSettings) {
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.putString(getString(R.string.MODEL_PATH_KEY), preInstalledModelPaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.LABEL_PATH_KEY), preInstalledLabelPaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx));
|
||||
editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx));
|
||||
editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
preInstalledInputColorFormats.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_SHAPE_KEY), preInstalledInputShapes.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_MEAN_KEY), preInstalledInputMeans.get(modelIdx));
|
||||
editor.putString(getString(R.string.INPUT_STD_KEY), preInstalledInputStds.get(modelIdx));
|
||||
editor.putString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
preInstalledScoreThresholds.get(modelIdx));
|
||||
editor.apply();
|
||||
}
|
||||
lpChoosePreInstalledModel.setSummary(modelPath);
|
||||
}
|
||||
cbEnableCustomSettings.setChecked(enableCustomSettings);
|
||||
etModelPath.setEnabled(enableCustomSettings);
|
||||
etLabelPath.setEnabled(enableCustomSettings);
|
||||
etImagePath.setEnabled(enableCustomSettings);
|
||||
lpCPUThreadNum.setEnabled(enableCustomSettings);
|
||||
lpCPUPowerMode.setEnabled(enableCustomSettings);
|
||||
lpInputColorFormat.setEnabled(enableCustomSettings);
|
||||
etInputShape.setEnabled(enableCustomSettings);
|
||||
etInputMean.setEnabled(enableCustomSettings);
|
||||
etInputStd.setEnabled(enableCustomSettings);
|
||||
etScoreThreshold.setEnabled(enableCustomSettings);
|
||||
modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
|
||||
getString(R.string.MODEL_PATH_DEFAULT));
|
||||
String labelPath = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY),
|
||||
getString(R.string.LABEL_PATH_DEFAULT));
|
||||
String imagePath = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
|
||||
getString(R.string.IMAGE_PATH_DEFAULT));
|
||||
String cpuThreadNum = sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
|
||||
getString(R.string.CPU_THREAD_NUM_DEFAULT));
|
||||
String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
|
||||
getString(R.string.CPU_POWER_MODE_DEFAULT));
|
||||
String inputColorFormat = sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY),
|
||||
getString(R.string.INPUT_COLOR_FORMAT_DEFAULT));
|
||||
String inputShape = sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY),
|
||||
getString(R.string.INPUT_SHAPE_DEFAULT));
|
||||
String inputMean = sharedPreferences.getString(getString(R.string.INPUT_MEAN_KEY),
|
||||
getString(R.string.INPUT_MEAN_DEFAULT));
|
||||
String inputStd = sharedPreferences.getString(getString(R.string.INPUT_STD_KEY),
|
||||
getString(R.string.INPUT_STD_DEFAULT));
|
||||
String scoreThreshold = sharedPreferences.getString(getString(R.string.SCORE_THRESHOLD_KEY),
|
||||
getString(R.string.SCORE_THRESHOLD_DEFAULT));
|
||||
etModelPath.setSummary(modelPath);
|
||||
etModelPath.setText(modelPath);
|
||||
etLabelPath.setSummary(labelPath);
|
||||
etLabelPath.setText(labelPath);
|
||||
etImagePath.setSummary(imagePath);
|
||||
etImagePath.setValue(imagePath);
|
||||
lpCPUThreadNum.setValue(cpuThreadNum);
|
||||
lpCPUThreadNum.setSummary(cpuThreadNum);
|
||||
lpCPUPowerMode.setValue(cpuPowerMode);
|
||||
lpCPUPowerMode.setSummary(cpuPowerMode);
|
||||
lpInputColorFormat.setValue(inputColorFormat);
|
||||
lpInputColorFormat.setSummary(inputColorFormat);
|
||||
etInputShape.setSummary(inputShape);
|
||||
etInputShape.setText(inputShape);
|
||||
etInputMean.setSummary(inputMean);
|
||||
etInputMean.setText(inputMean);
|
||||
etInputStd.setSummary(inputStd);
|
||||
etInputStd.setText(inputStd);
|
||||
etScoreThreshold.setText(scoreThreshold);
|
||||
etScoreThreshold.setSummary(scoreThreshold);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
getPreferenceScreen().getSharedPreferences().registerOnSharedPreferenceChangeListener(this);
|
||||
reloadPreferenceAndUpdateUI();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
getPreferenceScreen().getSharedPreferences().unregisterOnSharedPreferenceChangeListener(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) {
|
||||
if (key.equals(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY))) {
|
||||
SharedPreferences.Editor editor = sharedPreferences.edit();
|
||||
editor.putBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
|
||||
editor.commit();
|
||||
}
|
||||
reloadPreferenceAndUpdateUI();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
package com.baidu.paddle.lite.demo.ocr;
|
||||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.Matrix;
|
||||
import android.media.ExifInterface;
|
||||
import android.os.Environment;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Utils {
|
||||
private static final String TAG = Utils.class.getSimpleName();
|
||||
|
||||
public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) {
|
||||
if (srcPath.isEmpty() || dstPath.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
InputStream is = null;
|
||||
OutputStream os = null;
|
||||
try {
|
||||
is = new BufferedInputStream(appCtx.getAssets().open(srcPath));
|
||||
os = new BufferedOutputStream(new FileOutputStream(new File(dstPath)));
|
||||
byte[] buffer = new byte[1024];
|
||||
int length = 0;
|
||||
while ((length = is.read(buffer)) != -1) {
|
||||
os.write(buffer, 0, length);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
os.close();
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) {
|
||||
if (srcDir.isEmpty() || dstDir.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (!new File(dstDir).exists()) {
|
||||
new File(dstDir).mkdirs();
|
||||
}
|
||||
for (String fileName : appCtx.getAssets().list(srcDir)) {
|
||||
String srcSubPath = srcDir + File.separator + fileName;
|
||||
String dstSubPath = dstDir + File.separator + fileName;
|
||||
if (new File(srcSubPath).isDirectory()) {
|
||||
copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
} else {
|
||||
copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static float[] parseFloatsFromString(String string, String delimiter) {
|
||||
String[] pieces = string.trim().toLowerCase().split(delimiter);
|
||||
float[] floats = new float[pieces.length];
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
floats[i] = Float.parseFloat(pieces[i].trim());
|
||||
}
|
||||
return floats;
|
||||
}
|
||||
|
||||
public static long[] parseLongsFromString(String string, String delimiter) {
|
||||
String[] pieces = string.trim().toLowerCase().split(delimiter);
|
||||
long[] longs = new long[pieces.length];
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
longs[i] = Long.parseLong(pieces[i].trim());
|
||||
}
|
||||
return longs;
|
||||
}
|
||||
|
||||
public static String getSDCardDirectory() {
|
||||
return Environment.getExternalStorageDirectory().getAbsolutePath();
|
||||
}
|
||||
|
||||
public static boolean isSupportedNPU() {
|
||||
return false;
|
||||
// String hardware = android.os.Build.HARDWARE;
|
||||
// return hardware.equalsIgnoreCase("kirin810") || hardware.equalsIgnoreCase("kirin990");
|
||||
}
|
||||
|
||||
public static Bitmap resizeWithStep(Bitmap bitmap, int maxLength, int step) {
|
||||
int width = bitmap.getWidth();
|
||||
int height = bitmap.getHeight();
|
||||
int maxWH = Math.max(width, height);
|
||||
float ratio = 1;
|
||||
int newWidth = width;
|
||||
int newHeight = height;
|
||||
if (maxWH > maxLength) {
|
||||
ratio = maxLength * 1.0f / maxWH;
|
||||
newWidth = (int) Math.floor(ratio * width);
|
||||
newHeight = (int) Math.floor(ratio * height);
|
||||
}
|
||||
|
||||
newWidth = newWidth - newWidth % step;
|
||||
if (newWidth == 0) {
|
||||
newWidth = step;
|
||||
}
|
||||
newHeight = newHeight - newHeight % step;
|
||||
if (newHeight == 0) {
|
||||
newHeight = step;
|
||||
}
|
||||
return Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true);
|
||||
}
|
||||
|
||||
public static Bitmap rotateBitmap(Bitmap bitmap, int orientation) {
|
||||
|
||||
Matrix matrix = new Matrix();
|
||||
switch (orientation) {
|
||||
case ExifInterface.ORIENTATION_NORMAL:
|
||||
return bitmap;
|
||||
case ExifInterface.ORIENTATION_FLIP_HORIZONTAL:
|
||||
matrix.setScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_180:
|
||||
matrix.setRotate(180);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_FLIP_VERTICAL:
|
||||
matrix.setRotate(180);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_TRANSPOSE:
|
||||
matrix.setRotate(90);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_90:
|
||||
matrix.setRotate(90);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_TRANSVERSE:
|
||||
matrix.setRotate(-90);
|
||||
matrix.postScale(-1, 1);
|
||||
break;
|
||||
case ExifInterface.ORIENTATION_ROTATE_270:
|
||||
matrix.setRotate(-90);
|
||||
break;
|
||||
default:
|
||||
return bitmap;
|
||||
}
|
||||
try {
|
||||
Bitmap bmRotated = Bitmap.createBitmap(bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
|
||||
bitmap.recycle();
|
||||
return bmRotated;
|
||||
}
|
||||
catch (OutOfMemoryError e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillType="evenOdd"
|
||||
android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="78.5885"
|
||||
android:endY="90.9159"
|
||||
android:startX="48.7653"
|
||||
android:startY="61.0927"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
|
@ -0,0 +1,170 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#008577"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
|
@ -0,0 +1,148 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<RelativeLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent">
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/v_input_info"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentTop="true"
|
||||
android:orientation="vertical">
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/btn_layout"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:orientation="horizontal">
|
||||
|
||||
<Button
|
||||
android:id="@+id/btn_load_model"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_load_model_click"
|
||||
android:text="加载模型" />
|
||||
<Button
|
||||
android:id="@+id/btn_run_model"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_run_model_click"
|
||||
android:text="运行模型" />
|
||||
<Button
|
||||
android:id="@+id/btn_take_photo"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_take_photo_click"
|
||||
android:text="拍照识别" />
|
||||
<Button
|
||||
android:id="@+id/btn_choice_img"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:onClick="btn_choice_img_click"
|
||||
android:text="选取图片" />
|
||||
|
||||
</LinearLayout>
|
||||
<TextView
|
||||
android:id="@+id/tv_input_setting"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="10dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:lineSpacingExtra="4dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="6"
|
||||
android:text=""/>
|
||||
<TextView
|
||||
android:id="@+id/tv_model_img_status"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="-5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:lineSpacingExtra="4dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="6"
|
||||
android:text="STATUS: ok"/>
|
||||
|
||||
</LinearLayout>
|
||||
|
||||
<RelativeLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:layout_above="@+id/v_output_info"
|
||||
android:layout_below="@+id/v_input_info">
|
||||
|
||||
<ImageView
|
||||
android:id="@+id/iv_input_image"
|
||||
android:layout_width="400dp"
|
||||
android:layout_height="400dp"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:adjustViewBounds="true"
|
||||
android:scaleType="fitCenter"/>
|
||||
</RelativeLayout>
|
||||
|
||||
|
||||
<RelativeLayout
|
||||
android:id="@+id/v_output_info"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentBottom="true"
|
||||
android:layout_centerHorizontal="true">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/tv_output_result"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentTop="true"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:scrollbars="vertical"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="5dp"
|
||||
android:textAlignment="center"
|
||||
android:lineSpacingExtra="5dp"
|
||||
android:singleLine="false"
|
||||
android:maxLines="5"
|
||||
android:text=""/>
|
||||
|
||||
<TextView
|
||||
android:id="@+id/tv_inference_time"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_below="@+id/tv_output_result"
|
||||
android:layout_centerHorizontal="true"
|
||||
android:layout_centerVertical="true"
|
||||
android:textAlignment="center"
|
||||
android:layout_marginLeft="12dp"
|
||||
android:layout_marginRight="12dp"
|
||||
android:layout_marginTop="5dp"
|
||||
android:layout_marginBottom="10dp"
|
||||
android:text=""/>
|
||||
|
||||
</RelativeLayout>
|
||||
|
||||
</RelativeLayout>
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
|
@ -0,0 +1,46 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- for MiniActivity Use Only -->
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintLeft_toRightOf="parent"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/sample_text"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Hello World!"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
app:layout_constraintTop_toBottomOf="@id/imageView"
|
||||
android:scrollbars="vertical"
|
||||
/>
|
||||
|
||||
<ImageView
|
||||
android:id="@+id/imageView"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:paddingTop="20dp"
|
||||
android:paddingBottom="20dp"
|
||||
app:layout_constraintBottom_toTopOf="@id/imageView"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
app:layout_constraintTop_toTopOf="parent"
|
||||
tools:srcCompat="@tools:sample/avatars" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/button"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginBottom="4dp"
|
||||
android:text="Button"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
tools:layout_editor_absoluteX="161dp" />
|
||||
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
|
@ -0,0 +1,10 @@
|
|||
<menu xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto">
|
||||
|
||||
<group>
|
||||
<item
|
||||
android:id="@+id/settings"
|
||||
android:title="Settings..."
|
||||
app:showAsAction="withText"/>
|
||||
</group>
|
||||
</menu>
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
After Width: | Height: | Size: 2.9 KiB |
After Width: | Height: | Size: 4.8 KiB |
After Width: | Height: | Size: 2.0 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 4.4 KiB |
After Width: | Height: | Size: 6.7 KiB |
After Width: | Height: | Size: 6.2 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 8.9 KiB |
After Width: | Height: | Size: 15 KiB |
|
@ -0,0 +1,51 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<string-array name="image_name_entries">
|
||||
<item>0.jpg</item>
|
||||
<item>90.jpg</item>
|
||||
<item>180.jpg</item>
|
||||
<item>270.jpg</item>
|
||||
</string-array>
|
||||
<string-array name="image_name_values">
|
||||
<item>images/0.jpg</item>
|
||||
<item>images/90.jpg</item>
|
||||
<item>images/180.jpg</item>
|
||||
<item>images/270.jpg</item>
|
||||
</string-array>
|
||||
<string-array name="cpu_thread_num_entries">
|
||||
<item>1 threads</item>
|
||||
<item>2 threads</item>
|
||||
<item>4 threads</item>
|
||||
<item>8 threads</item>
|
||||
</string-array>
|
||||
<string-array name="cpu_thread_num_values">
|
||||
<item>1</item>
|
||||
<item>2</item>
|
||||
<item>4</item>
|
||||
<item>8</item>
|
||||
</string-array>
|
||||
<string-array name="cpu_power_mode_entries">
|
||||
<item>HIGH(only big cores)</item>
|
||||
<item>LOW(only LITTLE cores)</item>
|
||||
<item>FULL(all cores)</item>
|
||||
<item>NO_BIND(depends on system)</item>
|
||||
<item>RAND_HIGH</item>
|
||||
<item>RAND_LOW</item>
|
||||
</string-array>
|
||||
<string-array name="cpu_power_mode_values">
|
||||
<item>LITE_POWER_HIGH</item>
|
||||
<item>LITE_POWER_LOW</item>
|
||||
<item>LITE_POWER_FULL</item>
|
||||
<item>LITE_POWER_NO_BIND</item>
|
||||
<item>LITE_POWER_RAND_HIGH</item>
|
||||
<item>LITE_POWER_RAND_LOW</item>
|
||||
</string-array>
|
||||
<string-array name="input_color_format_entries">
|
||||
<item>BGR color format</item>
|
||||
<item>RGB color format</item>
|
||||
</string-array>
|
||||
<string-array name="input_color_format_values">
|
||||
<item>BGR</item>
|
||||
<item>RGB</item>
|
||||
</string-array>
|
||||
</resources>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="colorPrimary">#008577</color>
|
||||
<color name="colorPrimaryDark">#00574B</color>
|
||||
<color name="colorAccent">#D81B60</color>
|
||||
</resources>
|
|
@ -0,0 +1,26 @@
|
|||
<resources>
|
||||
<string name="app_name">OCR Chinese</string>
|
||||
<string name="CHOOSE_PRE_INSTALLED_MODEL_KEY">CHOOSE_PRE_INSTALLED_MODEL_KEY</string>
|
||||
<string name="ENABLE_CUSTOM_SETTINGS_KEY">ENABLE_CUSTOM_SETTINGS_KEY</string>
|
||||
<string name="MODEL_PATH_KEY">MODEL_PATH_KEY</string>
|
||||
<string name="LABEL_PATH_KEY">LABEL_PATH_KEY</string>
|
||||
<string name="IMAGE_PATH_KEY">IMAGE_PATH_KEY</string>
|
||||
<string name="CPU_THREAD_NUM_KEY">CPU_THREAD_NUM_KEY</string>
|
||||
<string name="CPU_POWER_MODE_KEY">CPU_POWER_MODE_KEY</string>
|
||||
<string name="INPUT_COLOR_FORMAT_KEY">INPUT_COLOR_FORMAT_KEY</string>
|
||||
<string name="INPUT_SHAPE_KEY">INPUT_SHAPE_KEY</string>
|
||||
<string name="INPUT_MEAN_KEY">INPUT_MEAN_KEY</string>
|
||||
<string name="INPUT_STD_KEY">INPUT_STD_KEY</string>
|
||||
<string name="SCORE_THRESHOLD_KEY">SCORE_THRESHOLD_KEY</string>
|
||||
<string name="MODEL_PATH_DEFAULT">models/ocr_v2_for_cpu</string>
|
||||
<string name="LABEL_PATH_DEFAULT">labels/ppocr_keys_v1.txt</string>
|
||||
<string name="IMAGE_PATH_DEFAULT">images/0.jpg</string>
|
||||
<string name="CPU_THREAD_NUM_DEFAULT">4</string>
|
||||
<string name="CPU_POWER_MODE_DEFAULT">LITE_POWER_HIGH</string>
|
||||
<string name="INPUT_COLOR_FORMAT_DEFAULT">BGR</string>
|
||||
<string name="INPUT_SHAPE_DEFAULT">1,3,960</string>
|
||||
<string name="INPUT_MEAN_DEFAULT">0.485, 0.456, 0.406</string>
|
||||
<string name="INPUT_STD_DEFAULT">0.229,0.224,0.225</string>
|
||||
<string name="SCORE_THRESHOLD_DEFAULT">0.1</string>
|
||||
</resources>
|
||||
|